Compare commits
277 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 46505210cd | |||
| abf051afdb | |||
| d4d1a332fb | |||
| a3ed321e14 | |||
| 2bb738ed3f | |||
| 10c8b62818 | |||
| 87abadbbfc | |||
| defc55a5dc | |||
| 6a68e45e7c | |||
| a2fa4b2832 | |||
| 9cfadbfc04 | |||
| 6fed78196e | |||
| db430dd2c9 | |||
| 3618aed39e | |||
| 4c083c7314 | |||
| 3202640cf7 | |||
| c9bbe71869 | |||
| 00edb273cd | |||
| 608b3282dd | |||
| 2888bd5693 | |||
| 90d95da48d | |||
| 31fb52761e | |||
| f7e651d0bc | |||
| ff41fb37fd | |||
| 776c8ad3f7 | |||
| db97bf8654 | |||
| e2e0ffce44 | |||
| 2f3a3e0be4 | |||
| c5f7221fb2 | |||
| 4a42b947bc | |||
| 46b260641f | |||
| 60d80970a4 | |||
| daabd90359 | |||
| fe54733a39 | |||
| 8d6eff2792 | |||
| e4f3beee01 | |||
| 54b69f0d68 | |||
| c4b3df04a0 | |||
| d4c0da85da | |||
| 3fa376905c | |||
| a4a34e46a8 | |||
| d235dfa023 | |||
| 24898439cb | |||
| 6da98b4e7b | |||
| fba4b9b785 | |||
| b246a03ac4 | |||
| 04c64e97b0 | |||
| af9a55cebd | |||
| 44391d3d1d | |||
| 619c62d89a | |||
| dfc81923ba | |||
| 5a6205476a | |||
| be7f512244 | |||
| 5eec34c17e | |||
| 656214b162 | |||
| 45fc25d02b | |||
| 9aec5582db | |||
| 0f7e87a91c | |||
| 5acac804a1 | |||
| 85db90a2df | |||
| a026f2eddb | |||
| 73783b85d5 | |||
| 4cce4f6392 | |||
| 24e63a967a | |||
| dbb6ba333a | |||
| 97b48ca3dd | |||
| a4106ee20d | |||
| 21817543ad | |||
| 6babbc99e7 | |||
| 1f5e9d815b | |||
| 83caa48cdb | |||
| b51019a7e8 | |||
| 72d65cd7ae | |||
| 76251e935e | |||
| a58a14111b | |||
| 49230c3a44 | |||
| 262d2b2133 | |||
| ad2ff2ccc4 | |||
| dff7a58736 | |||
| 44c9bd645e | |||
| 4741d60e4c | |||
| 1a079a41e7 | |||
| ebbd3bcc61 | |||
| 54fdc8addc | |||
| e0320e761c | |||
| 2b7c308188 | |||
| 40ac52654f | |||
| 034e405824 | |||
| 20404cf3f2 | |||
| 264bb5475c | |||
| 6e3f9f6e79 | |||
| 9d0a993c2a | |||
| cd3e60ba4f | |||
| 360299f5f6 | |||
| d61e33113c | |||
| 5faf7cf45f | |||
| cd922fa750 | |||
| a4d4c386f7 | |||
| c8da826ef7 | |||
| 5166c2c4d7 | |||
| ec70e70a5d | |||
| 4a79b37714 | |||
| 76ae1c3603 | |||
| a60b88b80e | |||
| e31b4433a1 | |||
| 19183ad14a | |||
| e1412320a7 | |||
| b9c94dfab0 | |||
| 6f43c09bd0 | |||
| 9e15e95c2b | |||
| 1306c4cc9c | |||
| f1247817d3 | |||
| fdad5b85c9 | |||
| 39ee0b5973 | |||
| 33675c8ae8 | |||
| 90d5e9887a | |||
| c3af591810 | |||
| bb8a6200aa | |||
| 44573366eb | |||
| edb0af2bda | |||
| 7d5bb54b64 | |||
| a18c63792a | |||
| 0b58707a49 | |||
| 0561b55af5 | |||
| d785ed9054 | |||
| 88fb8417fd | |||
| f70d743c8b | |||
| 251b8a10c0 | |||
| 3f06e2ee77 | |||
| 7f11c793ef | |||
| e28dcbff9a | |||
| 89ec0186a4 | |||
| 6e1efde8c6 | |||
| 6aa80d4210 | |||
| 4e86006b3f | |||
| 679e22a7c2 | |||
| 4d3228a4a8 | |||
| 0aa307f0b6 | |||
| 6a69ecefb1 | |||
| c05beb66e9 | |||
| 34ddb24014 | |||
| 9d69613df7 | |||
| 630f818538 | |||
| b280a720ff | |||
| 48bac9c212 | |||
| e88c49fb50 | |||
| 9e10a5a400 | |||
| 1dbea24fa2 | |||
| 0606228b40 | |||
| f35b9f0988 | |||
| c400c46672 | |||
| fbdeb2161d | |||
| 8c7d03dd29 | |||
| 135ce7b2df | |||
| 0e47ae051b | |||
| 04255473d2 | |||
| ce6bbff389 | |||
| 92c4bf36f6 | |||
| 0bedbf1877 | |||
| a5cb6e1242 | |||
| a33f6a2f15 | |||
| d79e9090e6 | |||
| 97fd660e38 | |||
| 96e168d035 | |||
| 4d2b77ecaf | |||
| e48da80a4b | |||
| 6125312f61 | |||
| 007fd0c2e3 | |||
| c4f90d6a57 | |||
| 5dd62c9466 | |||
| 4d072d7217 | |||
| b4242b1394 | |||
| fa2343dff9 | |||
| 1b1667bc2b | |||
| c2b4bf9c67 | |||
| 0845fefe6c | |||
| d911556a84 | |||
| 38be8d9401 | |||
| 9f3190f62a | |||
| 41aeb7e0f2 | |||
| f8e67519e1 | |||
| 4279dcba1e | |||
| be7e3d6b56 | |||
| 41e128190b | |||
| ba869ccde5 | |||
| 27fe066b23 | |||
| e94b8ff714 | |||
| e3a6894904 | |||
| 92b97bda00 | |||
| d5c6039296 | |||
| 3fa13c8bfd | |||
| 9d306b71fa | |||
| 38a936c120 | |||
| 86d13a7240 | |||
| 0b2d449ffa | |||
| d881373dce | |||
| 9ade4c65f3 | |||
| 5c73b85f65 | |||
| f5764c01fc | |||
| 8c7c2a4407 | |||
| 978de5e9a4 | |||
| 4e9859117c | |||
| a134a0fc08 | |||
| 6df58af0c3 | |||
| 852606ec8b | |||
| caae6922be | |||
| fafeaf3d83 | |||
| 2ab8dad6a5 | |||
| 50216accde | |||
| bf2fdac2d0 | |||
| 626c4bf562 | |||
| a56b3f3d51 | |||
| 2896fa1dc9 | |||
| 04251401aa | |||
| e86b6e83ae | |||
| 6f5e75da15 | |||
| b2742aab80 | |||
| 208365cd3d | |||
| 26f679d86e | |||
| cf39a15db1 | |||
| 1f3c35f162 | |||
| 2bccc3dad9 | |||
| 959cb8b21a | |||
| f8a2410a0a | |||
| 03b984d5a7 | |||
| 57db18c6a3 | |||
| ea79e94842 | |||
| b0612cfa0f | |||
| 4e61d73da5 | |||
| 3b41776110 | |||
| 3e3d38696c | |||
| 7b22e5be0f | |||
| 39fba49cfe | |||
| 706a15f0bc | |||
| b8dc413b73 | |||
| 8d29ce0122 | |||
| a272e7cbab | |||
| ce55b239e2 | |||
| 432ab73741 | |||
| f93d650992 | |||
| f9da19d1a1 | |||
| d2b6a26fe4 | |||
| 482ef89a73 | |||
| 34fd17ba55 | |||
| 8baa07db84 | |||
| ba8a53803a | |||
| 31fade9730 | |||
| fffe483c02 | |||
| 8c79993280 | |||
| 8a0672a6be | |||
| 395f798ee2 | |||
| debff75221 | |||
| 4bf0a6c22e | |||
| fb025821cb | |||
| ff880fd4c9 | |||
| 03495d901d | |||
| 798958f20a | |||
| 699295c5be | |||
| a62a007c87 | |||
| d4fc1de80d | |||
| 0902b5653f | |||
| 0b6a02075c | |||
| 7880a8de30 | |||
| 2abedd6b4b | |||
| 5a251a99e6 | |||
| 25ef33de7f | |||
| ec2c274cd9 | |||
| 47f0b3db9a | |||
| 233de3508f | |||
| 13b2d0048c | |||
| 944dd760ca | |||
| d67aa6ae5c | |||
| f1a5fac1b9 | |||
| d0691d5aa0 | |||
| f1610bbd2e | |||
| 327d843f64 | |||
| b8010270c1 |
@@ -0,0 +1,89 @@
|
||||
name: Build and Publish Astrolabe App Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'astrolabe-v*'
|
||||
|
||||
env:
|
||||
APP_NAME: astrolabe
|
||||
APP_DIR: third_party/astrolabe
|
||||
|
||||
jobs:
|
||||
build-and-publish:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get version from tag
|
||||
id: tag
|
||||
run: |
|
||||
echo "TAG=${GITHUB_REF#refs/tags/astrolabe-v}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Validate version in info.xml matches tag
|
||||
working-directory: ${{ env.APP_DIR }}
|
||||
run: |
|
||||
INFO_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' appinfo/info.xml | tr -d '\t')
|
||||
if [ "$INFO_VERSION" != "${{ steps.tag.outputs.TAG }}" ]; then
|
||||
echo "Version mismatch: info.xml has $INFO_VERSION but tag is ${{ steps.tag.outputs.TAG }}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Version validated: $INFO_VERSION"
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: 8.1
|
||||
coverage: none
|
||||
|
||||
- name: Checkout Nextcloud server (for signing)
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: nextcloud/server
|
||||
ref: stable30
|
||||
path: server
|
||||
|
||||
- name: Install dependencies and build
|
||||
working-directory: ${{ env.APP_DIR }}
|
||||
run: |
|
||||
composer install --no-dev --optimize-autoloader
|
||||
npm ci
|
||||
npm run build
|
||||
|
||||
- name: Setup signing certificate
|
||||
run: |
|
||||
mkdir -p $HOME/.nextcloud/certificates
|
||||
echo "${{ secrets.APP_PRIVATE_KEY }}" > $HOME/.nextcloud/certificates/${{ env.APP_NAME }}.key
|
||||
echo "${{ secrets.APP_PUBLIC_CRT }}" > $HOME/.nextcloud/certificates/${{ env.APP_NAME }}.crt
|
||||
|
||||
- name: Build app store package
|
||||
working-directory: ${{ env.APP_DIR }}
|
||||
run: make appstore server_dir=${{ github.workspace }}/server
|
||||
|
||||
- name: Create GitHub release and attach tarball
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
file: ${{ env.APP_DIR }}/build/artifacts/${{ env.APP_NAME }}.tar.gz
|
||||
asset_name: ${{ env.APP_NAME }}-${{ steps.tag.outputs.TAG }}.tar.gz
|
||||
tag: ${{ github.ref }}
|
||||
release_name: Astrolabe ${{ steps.tag.outputs.TAG }}
|
||||
prerelease: ${{ contains(steps.tag.outputs.TAG, '-alpha') || contains(steps.tag.outputs.TAG, '-beta') || contains(steps.tag.outputs.TAG, '-rc') }}
|
||||
|
||||
- name: Upload to Nextcloud App Store
|
||||
uses: R0Wi/nextcloud-appstore-push-action@v1.0.4
|
||||
with:
|
||||
app_name: ${{ env.APP_NAME }}
|
||||
appstore_token: ${{ secrets.APPSTORE_TOKEN }}
|
||||
download_url: ${{ github.server_url }}/${{ github.repository }}/releases/download/${{ github.ref_name }}/${{ env.APP_NAME }}-${{ steps.tag.outputs.TAG }}.tar.gz
|
||||
app_private_key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
nightly: ${{ contains(steps.tag.outputs.TAG, '-alpha') || contains(steps.tag.outputs.TAG, '-beta') || contains(steps.tag.outputs.TAG, '-rc') }}
|
||||
@@ -0,0 +1,275 @@
|
||||
# Consolidated CI workflow for Astroglobe Nextcloud app
|
||||
#
|
||||
# Runs on PRs that modify the astroglobe directory
|
||||
# Based on Nextcloud app skeleton workflows
|
||||
#
|
||||
# SPDX-FileCopyrightText: 2025 Nextcloud MCP Server contributors
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
name: Astroglobe CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'third_party/astroglobe/**'
|
||||
- '.github/workflows/astroglobe-ci.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: astroglobe-ci-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
outputs:
|
||||
frontend: ${{ steps.changes.outputs.frontend }}
|
||||
php: ${{ steps.changes.outputs.php }}
|
||||
steps:
|
||||
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
id: changes
|
||||
continue-on-error: true
|
||||
with:
|
||||
filters: |
|
||||
frontend:
|
||||
- 'third_party/astroglobe/src/**'
|
||||
- 'third_party/astroglobe/package.json'
|
||||
- 'third_party/astroglobe/package-lock.json'
|
||||
- 'third_party/astroglobe/vite.config.js'
|
||||
- 'third_party/astroglobe/**/*.js'
|
||||
- 'third_party/astroglobe/**/*.ts'
|
||||
- 'third_party/astroglobe/**/*.vue'
|
||||
php:
|
||||
- 'third_party/astroglobe/lib/**'
|
||||
- 'third_party/astroglobe/appinfo/**'
|
||||
- 'third_party/astroglobe/composer.json'
|
||||
- 'third_party/astroglobe/psalm.xml'
|
||||
|
||||
# Node.js build and lint
|
||||
node-build:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.frontend != 'false'
|
||||
name: Node.js build
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Read package.json node and npm engines version
|
||||
uses: skjnldsv/read-package-engines-version-actions@06d6baf7d8f41934ab630e97d9e6c0bc9c9ac5e4 # v3
|
||||
id: versions
|
||||
with:
|
||||
path: third_party/astroglobe
|
||||
fallbackNode: '^20'
|
||||
fallbackNpm: '^10'
|
||||
|
||||
- name: Set up node ${{ steps.versions.outputs.nodeVersion }}
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: ${{ steps.versions.outputs.nodeVersion }}
|
||||
|
||||
- name: Set up npm ${{ steps.versions.outputs.npmVersion }}
|
||||
run: npm i -g 'npm@${{ steps.versions.outputs.npmVersion }}'
|
||||
|
||||
- name: Install dependencies & build
|
||||
env:
|
||||
CYPRESS_INSTALL_BINARY: 0
|
||||
PUPPETEER_SKIP_DOWNLOAD: true
|
||||
run: |
|
||||
npm ci
|
||||
npm run build --if-present
|
||||
|
||||
- name: Check webpack build changes
|
||||
run: |
|
||||
bash -c "[[ ! \"`git status --porcelain `\" ]] || (echo 'Please recompile and commit the assets' && exit 1)"
|
||||
|
||||
# ESLint
|
||||
eslint:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.frontend != 'false'
|
||||
name: ESLint
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Read package.json node and npm engines version
|
||||
uses: skjnldsv/read-package-engines-version-actions@06d6baf7d8f41934ab630e97d9e6c0bc9c9ac5e4 # v3
|
||||
id: versions
|
||||
with:
|
||||
path: third_party/astroglobe
|
||||
fallbackNode: '^20'
|
||||
fallbackNpm: '^10'
|
||||
|
||||
- name: Set up node ${{ steps.versions.outputs.nodeVersion }}
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: ${{ steps.versions.outputs.nodeVersion }}
|
||||
|
||||
- name: Set up npm ${{ steps.versions.outputs.npmVersion }}
|
||||
run: npm i -g 'npm@${{ steps.versions.outputs.npmVersion }}'
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
CYPRESS_INSTALL_BINARY: 0
|
||||
PUPPETEER_SKIP_DOWNLOAD: true
|
||||
run: npm ci
|
||||
|
||||
- name: Lint
|
||||
run: npm run lint
|
||||
|
||||
# Stylelint
|
||||
stylelint:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.frontend != 'false'
|
||||
name: Stylelint
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Read package.json node and npm engines version
|
||||
uses: skjnldsv/read-package-engines-version-actions@06d6baf7d8f41934ab630e97d9e6c0bc9c9ac5e4 # v3
|
||||
id: versions
|
||||
with:
|
||||
path: third_party/astroglobe
|
||||
fallbackNode: '^20'
|
||||
fallbackNpm: '^10'
|
||||
|
||||
- name: Set up node ${{ steps.versions.outputs.nodeVersion }}
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: ${{ steps.versions.outputs.nodeVersion }}
|
||||
|
||||
- name: Set up npm ${{ steps.versions.outputs.npmVersion }}
|
||||
run: npm i -g 'npm@${{ steps.versions.outputs.npmVersion }}'
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
CYPRESS_INSTALL_BINARY: 0
|
||||
PUPPETEER_SKIP_DOWNLOAD: true
|
||||
run: npm ci
|
||||
|
||||
- name: Lint
|
||||
run: npm run stylelint
|
||||
|
||||
# PHP Code Style
|
||||
php-cs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.php != 'false'
|
||||
name: PHP CS Fixer
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Get php version
|
||||
id: versions
|
||||
uses: icewind1991/nextcloud-version-matrix@58becf3b4bb6dc6cef677b15e2fd8e7d48c0908f # v1.3.1
|
||||
with:
|
||||
filename: third_party/astroglobe/appinfo/info.xml
|
||||
|
||||
- name: Set up php${{ steps.versions.outputs.php-min }}
|
||||
uses: shivammathur/setup-php@cf4cade2721270509d5b1c766ab3549210a39a2a # v2.33.0
|
||||
with:
|
||||
php-version: ${{ steps.versions.outputs.php-min }}
|
||||
extensions: bz2, ctype, curl, dom, fileinfo, gd, iconv, intl, json, libxml, mbstring, openssl, pcntl, posix, session, simplexml, xmlreader, xmlwriter, zip, zlib, sqlite, pdo_sqlite
|
||||
coverage: none
|
||||
ini-file: development
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
composer remove nextcloud/ocp --dev || true
|
||||
composer i
|
||||
|
||||
- name: Lint
|
||||
run: composer run cs:check || ( echo 'Please run `composer run cs:fix` to format your code' && exit 1 )
|
||||
|
||||
# Psalm Static Analysis
|
||||
psalm:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.php != 'false'
|
||||
name: Psalm
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Get php version
|
||||
id: versions
|
||||
uses: icewind1991/nextcloud-version-matrix@58becf3b4bb6dc6cef677b15e2fd8e7d48c0908f # v1.3.1
|
||||
with:
|
||||
filename: third_party/astroglobe/appinfo/info.xml
|
||||
|
||||
- name: Set up php${{ steps.versions.outputs.php-min }}
|
||||
uses: shivammathur/setup-php@cf4cade2721270509d5b1c766ab3549210a39a2a # v2.33.0
|
||||
with:
|
||||
php-version: ${{ steps.versions.outputs.php-min }}
|
||||
extensions: bz2, ctype, curl, dom, fileinfo, gd, iconv, intl, json, libxml, mbstring, openssl, pcntl, posix, session, simplexml, xmlreader, xmlwriter, zip, zlib, sqlite, pdo_sqlite
|
||||
coverage: none
|
||||
ini-file: development
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
composer remove nextcloud/ocp --dev || true
|
||||
composer i
|
||||
|
||||
- name: Get OCP version matrix
|
||||
id: ocp-versions
|
||||
uses: icewind1991/nextcloud-version-matrix@58becf3b4bb6dc6cef677b15e2fd8e7d48c0908f # v1.3.1
|
||||
with:
|
||||
filename: third_party/astroglobe/appinfo/info.xml
|
||||
|
||||
- name: Install OCP for static analysis
|
||||
run: |
|
||||
# Get first OCP version from matrix
|
||||
OCP_VERSION=$(echo '${{ steps.ocp-versions.outputs.ocp-matrix }}' | jq -r '.include[0]."ocp-version"')
|
||||
composer require --dev "nextcloud/ocp:$OCP_VERSION" --ignore-platform-reqs --with-dependencies
|
||||
|
||||
- name: Run Psalm
|
||||
run: composer run psalm -- --threads=1 --monochrome --no-progress --output-format=github
|
||||
|
||||
# Summary job
|
||||
summary:
|
||||
permissions:
|
||||
contents: none
|
||||
runs-on: ubuntu-latest
|
||||
needs: [changes, node-build, eslint, stylelint, php-cs, psalm]
|
||||
if: always()
|
||||
name: astroglobe-ci-summary
|
||||
steps:
|
||||
- name: Summary status
|
||||
run: |
|
||||
if ${{ needs.changes.outputs.frontend != 'false' && (needs.node-build.result != 'success' || needs.eslint.result != 'success' || needs.stylelint.result != 'success') }}; then
|
||||
echo "Frontend checks failed"
|
||||
exit 1
|
||||
fi
|
||||
if ${{ needs.changes.outputs.php != 'false' && (needs.php-cs.result != 'success' || needs.psalm.result != 'success') }}; then
|
||||
echo "PHP checks failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "All checks passed"
|
||||
@@ -7,26 +7,152 @@ on:
|
||||
|
||||
jobs:
|
||||
bump-version:
|
||||
if: "!startsWith(github.event.head_commit.message, 'bump:')"
|
||||
if: "!startsWith(github.event.head_commit.message, 'bump:') && !startsWith(github.event.head_commit.message, 'chore(release):')"
|
||||
runs-on: ubuntu-latest
|
||||
name: "Bump version and create changelog with commitizen"
|
||||
name: "Bump version and create changelog for monorepo components"
|
||||
permissions:
|
||||
contents: write
|
||||
packages: write
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
|
||||
- name: Create bump and changelog
|
||||
uses: commitizen-tools/commitizen-action@bb4f1df6601e2a1a891506581b0c53acdc88e07d # 0.26.0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
|
||||
changelog_increment_filename: body.md
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2.4.2
|
||||
with:
|
||||
body_path: "body.md"
|
||||
tag_name: v${{ env.REVISION }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
- name: Detect and bump component versions
|
||||
id: bump
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Track which components were bumped
|
||||
BUMPED_COMPONENTS=""
|
||||
|
||||
# Helper function to check for commits with specific scope since last tag
|
||||
has_commits_since_tag() {
|
||||
local tag_pattern="$1"
|
||||
local scope_pattern="$2"
|
||||
|
||||
# Get the most recent tag matching the pattern
|
||||
local last_tag=$(git tag --sort=-creatordate | grep -E "^${tag_pattern}" | head -n 1 || echo "")
|
||||
|
||||
if [ -z "$last_tag" ]; then
|
||||
# No previous tag, check all commits on master
|
||||
local commit_range="master"
|
||||
else
|
||||
# Check commits since last tag
|
||||
local commit_range="${last_tag}..HEAD"
|
||||
fi
|
||||
|
||||
# Count commits matching the scope pattern
|
||||
local commit_count=$(git log "$commit_range" --oneline --grep="^${scope_pattern}" -E | wc -l)
|
||||
|
||||
if [ "$commit_count" -gt 0 ]; then
|
||||
echo "Found $commit_count commits for scope '$scope_pattern' since $last_tag"
|
||||
return 0
|
||||
else
|
||||
echo "No commits found for scope '$scope_pattern' since $last_tag"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Bump MCP server (default - all commits except helm/astrolabe scopes)
|
||||
echo "Checking MCP server for version bump..."
|
||||
|
||||
# Get the most recent MCP tag
|
||||
last_mcp_tag=$(git tag --sort=-creatordate | grep -E "^v[0-9]" | head -n 1 || echo "")
|
||||
|
||||
if [ -z "$last_mcp_tag" ]; then
|
||||
commit_range="master"
|
||||
else
|
||||
commit_range="${last_mcp_tag}..HEAD"
|
||||
fi
|
||||
|
||||
# Count conventional commits that are NOT scoped to helm or astrolabe
|
||||
mcp_commit_count=$(git log "$commit_range" --oneline --grep="^(feat|fix|docs|refactor|perf|test|build|ci|chore)" -E | \
|
||||
{ grep -v "(helm)" || true; } | { grep -v "(astrolabe)" || true; } | wc -l)
|
||||
|
||||
if [ "$mcp_commit_count" -gt 0 ]; then
|
||||
echo "Found $mcp_commit_count commits for MCP server since $last_mcp_tag"
|
||||
echo "Bumping MCP server version..."
|
||||
./scripts/bump-mcp.sh
|
||||
BUMPED_COMPONENTS="$BUMPED_COMPONENTS mcp"
|
||||
else
|
||||
echo "No commits found for MCP server since $last_mcp_tag"
|
||||
fi
|
||||
|
||||
# Bump Helm chart (scope: helm)
|
||||
echo "Checking Helm chart for version bump..."
|
||||
if has_commits_since_tag "nextcloud-mcp-server-" "(feat|fix|docs|refactor|perf|test|build|ci|chore)\(helm\)(!)?:"; then
|
||||
echo "Bumping Helm chart version..."
|
||||
./scripts/bump-helm.sh
|
||||
BUMPED_COMPONENTS="$BUMPED_COMPONENTS helm"
|
||||
fi
|
||||
|
||||
# Bump Astrolabe (scope: astrolabe)
|
||||
echo "Checking Astrolabe for version bump..."
|
||||
if has_commits_since_tag "astrolabe-v" "(feat|fix|docs|refactor|perf|test|build|ci|chore)\(astrolabe\)(!)?:"; then
|
||||
echo "Bumping Astrolabe version..."
|
||||
./scripts/bump-astrolabe.sh
|
||||
BUMPED_COMPONENTS="$BUMPED_COMPONENTS astrolabe"
|
||||
fi
|
||||
|
||||
# Output summary
|
||||
if [ -z "$BUMPED_COMPONENTS" ]; then
|
||||
echo "No components required version bumps"
|
||||
echo "bumped=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "Bumped components:$BUMPED_COMPONENTS"
|
||||
echo "bumped=true" >> $GITHUB_OUTPUT
|
||||
echo "components=$BUMPED_COMPONENTS" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Push tags
|
||||
if: steps.bump.outputs.bumped == 'true'
|
||||
run: |
|
||||
git push
|
||||
git push --tags
|
||||
echo "Pushed tags for components:${{ steps.bump.outputs.components }}"
|
||||
|
||||
- name: Summary
|
||||
if: steps.bump.outputs.bumped == 'true'
|
||||
run: |
|
||||
echo "## Version Bump Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "The following components were bumped:" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
for component in ${{ steps.bump.outputs.components }}; do
|
||||
case $component in
|
||||
mcp)
|
||||
tag=$(git tag --sort=-creatordate | grep -E '^v[0-9]' | head -n 1)
|
||||
echo "- **MCP Server**: \`$tag\`" >> $GITHUB_STEP_SUMMARY
|
||||
;;
|
||||
helm)
|
||||
tag=$(git tag --sort=-creatordate | grep -E '^nextcloud-mcp-server-' | head -n 1)
|
||||
echo "- **Helm Chart**: \`$tag\`" >> $GITHUB_STEP_SUMMARY
|
||||
;;
|
||||
astrolabe)
|
||||
tag=$(git tag --sort=-creatordate | grep -E '^astrolabe-v' | head -n 1)
|
||||
echo "- **Astrolabe**: \`$tag\`" >> $GITHUB_STEP_SUMMARY
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Tags have been pushed and release workflows will trigger automatically." >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
name: Claude Code Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
# Optional: Only run on specific file changes
|
||||
# paths:
|
||||
# - "src/**/*.ts"
|
||||
# - "src/**/*.tsx"
|
||||
# - "src/**/*.js"
|
||||
# - "src/**/*.jsx"
|
||||
|
||||
jobs:
|
||||
claude-review:
|
||||
# Optional: Filter by PR author
|
||||
# if: |
|
||||
# github.event.pull_request.user.login == 'external-contributor' ||
|
||||
# github.event.pull_request.user.login == 'new-developer' ||
|
||||
# github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code Review
|
||||
id: claude-review
|
||||
uses: anthropics/claude-code-action@d7b6d50442a89f005016e778bf825a72ef582525 # v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
prompt: |
|
||||
REPO: ${{ github.repository }}
|
||||
PR NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
Please review this pull request and provide feedback on:
|
||||
- Code quality and best practices
|
||||
- Potential bugs or issues
|
||||
- Performance considerations
|
||||
- Security concerns
|
||||
- Test coverage
|
||||
|
||||
Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
|
||||
|
||||
Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
|
||||
|
||||
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
# or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
|
||||
claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
name: Claude Code
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
pull_request_review_comment:
|
||||
types: [created]
|
||||
issues:
|
||||
types: [opened, assigned]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
jobs:
|
||||
claude:
|
||||
if: |
|
||||
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
|
||||
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
actions: read # Required for Claude to read CI results on PRs
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code
|
||||
id: claude
|
||||
uses: anthropics/claude-code-action@d7b6d50442a89f005016e778bf825a72ef582525 # v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
|
||||
# This is an optional setting that allows Claude to read CI results on PRs
|
||||
additional_permissions: |
|
||||
actions: read
|
||||
|
||||
# Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
|
||||
# prompt: 'Update the pull request description to include a summary of changes.'
|
||||
|
||||
# Optional: Add claude_args to customize behavior and configuration
|
||||
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
# or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
|
||||
# claude_args: '--allowed-tools Bash(gh pr:*)'
|
||||
|
||||
@@ -2,7 +2,8 @@ name: Build and Publish Docker Image
|
||||
|
||||
on:
|
||||
push:
|
||||
tags: ["*"]
|
||||
tags:
|
||||
- "v*"
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
@@ -12,11 +13,11 @@ jobs:
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5
|
||||
with:
|
||||
# list of Docker images to use as base name for tags
|
||||
images: |
|
||||
|
||||
@@ -14,7 +14,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -38,6 +38,8 @@ jobs:
|
||||
|
||||
- name: Run chart-releaser
|
||||
uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f # v1.7.0
|
||||
with:
|
||||
skip_existing: true
|
||||
env:
|
||||
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
name: RAG Evaluation
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
manual_path:
|
||||
description: 'Path to Nextcloud User Manual PDF in Nextcloud'
|
||||
required: false
|
||||
default: 'Nextcloud Manual.pdf'
|
||||
embedding_model:
|
||||
description: 'OpenAI embedding model'
|
||||
required: false
|
||||
default: 'openai/text-embedding-3-small'
|
||||
generation_model:
|
||||
description: 'OpenAI generation model'
|
||||
required: false
|
||||
default: 'openai/gpt-4o-mini'
|
||||
|
||||
jobs:
|
||||
rag-evaluation:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
permissions:
|
||||
models: read
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
|
||||
- name: Run docker compose with vector sync
|
||||
uses: hoverkraft-tech/compose-action@248470ecc5ed40d8ed3d4480d8260d77179ef579 # v2.4.2
|
||||
with:
|
||||
compose-file: |
|
||||
./docker-compose.yml
|
||||
./docker-compose.ci.yml
|
||||
up-flags: "--build"
|
||||
env:
|
||||
# Environment variables passed to docker-compose.ci.yml
|
||||
OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
|
||||
OPENAI_BASE_URL: "https://models.github.ai/inference"
|
||||
OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
|
||||
OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
|
||||
VECTOR_SYNC_SCAN_INTERVAL: "5"
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
|
||||
|
||||
- name: Wait for Nextcloud to be ready
|
||||
run: |
|
||||
echo "Waiting for Nextcloud..."
|
||||
max_attempts=60
|
||||
attempt=0
|
||||
until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
|
||||
attempt=$((attempt + 1))
|
||||
if [ $attempt -ge $max_attempts ]; then
|
||||
echo "Service did not become ready in time."
|
||||
exit 1
|
||||
fi
|
||||
echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
|
||||
sleep 5
|
||||
done
|
||||
echo "Nextcloud is ready."
|
||||
|
||||
- name: Wait for MCP server to be ready
|
||||
run: |
|
||||
echo "Waiting for MCP server..."
|
||||
max_attempts=30
|
||||
attempt=0
|
||||
until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8000/health/live | grep -q "200"; do
|
||||
attempt=$((attempt + 1))
|
||||
if [ $attempt -ge $max_attempts ]; then
|
||||
echo "MCP server did not become ready in time."
|
||||
exit 1
|
||||
fi
|
||||
echo "Attempt $attempt/$max_attempts: MCP not ready, sleeping for 2 seconds..."
|
||||
sleep 2
|
||||
done
|
||||
echo "MCP server is ready."
|
||||
|
||||
- name: Run RAG evaluation tests
|
||||
env:
|
||||
NEXTCLOUD_HOST: "http://localhost:8080"
|
||||
NEXTCLOUD_USERNAME: "admin"
|
||||
NEXTCLOUD_PASSWORD: "admin"
|
||||
RAG_MANUAL_PATH: ${{ inputs.manual_path }}
|
||||
OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
|
||||
OPENAI_BASE_URL: "https://models.github.ai/inference"
|
||||
OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
|
||||
OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
|
||||
run: |
|
||||
uv run pytest tests/integration/test_rag.py -v --log-cli-level=INFO --provider openai
|
||||
|
||||
- name: Capture MCP container logs
|
||||
if: always()
|
||||
run: |
|
||||
echo "=== MCP Container Logs ==="
|
||||
docker compose logs mcp --tail=500
|
||||
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5
|
||||
with:
|
||||
name: rag-evaluation-results
|
||||
path: |
|
||||
pytest-results.xml
|
||||
retention-days: 30
|
||||
@@ -18,9 +18,9 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
|
||||
- name: Install Python 3.11
|
||||
run: uv python install 3.11
|
||||
- name: Build
|
||||
|
||||
@@ -9,9 +9,9 @@ jobs:
|
||||
linting:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
|
||||
- name: Check format
|
||||
run: |
|
||||
uv run --frozen ruff format --diff
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
###### Required to build OIDC App ######
|
||||
|
||||
- name: Set up php 8.4
|
||||
uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
|
||||
uses: shivammathur/setup-php@44454db4f0199b8b9685a5d763dc37cbf79108e1 # v2
|
||||
with:
|
||||
php-version: 8.4
|
||||
coverage: none
|
||||
@@ -49,14 +49,14 @@ jobs:
|
||||
|
||||
|
||||
- name: Run docker compose
|
||||
uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
|
||||
uses: hoverkraft-tech/compose-action@248470ecc5ed40d8ed3d4480d8260d77179ef579 # v2.4.2
|
||||
with:
|
||||
compose-file: "./docker-compose.yml"
|
||||
#compose-flags: "--profile qdrant"
|
||||
up-flags: "--build"
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
|
||||
|
||||
- name: Install Playwright dependencies
|
||||
run: |
|
||||
|
||||
+290
@@ -1,3 +1,293 @@
|
||||
# Changelog - MCP Server
|
||||
|
||||
All notable changes to the Nextcloud MCP Server will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [PEP 440](https://peps.python.org/pep-0440/).
|
||||
|
||||
## v0.56.1 (2025-12-19)
|
||||
|
||||
### Fix
|
||||
|
||||
- **astrolabe**: Update screenshots
|
||||
- **ci**: skip existing Helm chart releases to prevent duplicate release errors
|
||||
|
||||
## v0.56.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- **ci**: add --increment flag to bump scripts for manual version control
|
||||
|
||||
### Fix
|
||||
|
||||
- **astrolabe**: add contents:write permission to appstore workflow
|
||||
- **astrolabe**: update commitizen pattern to properly update info.xml version
|
||||
- **astrolabe**: prevent workflow failure when only helm/astrolabe commits exist
|
||||
- **astrolabe**: info.xml
|
||||
|
||||
## v0.55.1 (2025-12-19)
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: push all tags explicitly in bump workflow
|
||||
|
||||
## v0.55.0 (2025-12-19)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- MCP server now bumps for ANY conventional commit except
|
||||
those explicitly scoped to helm or astrolabe.
|
||||
|
||||
### Feat
|
||||
|
||||
- **ci**: implement monorepo-aware version bumping workflow
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: make MCP server default bump target for all non-scoped commits
|
||||
- **ci**: restrict docker build to MCP server tags only
|
||||
- **ci**: correct appstore-push-action version to v1.0.4
|
||||
|
||||
## v0.54.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- **astrolabe**: add Nextcloud App Store deployment automation
|
||||
- configure commitizen monorepo with independent versioning
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: improve versioning and error handling
|
||||
- **ci**: address critical workflow and validation issues
|
||||
- **astrolabe**: address code review feedback
|
||||
|
||||
## v0.53.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- add Alembic database migration system
|
||||
- make chunk modal title clickable link to documents
|
||||
- add native Plotly hover styling for clickable points
|
||||
- add click interactivity to Plotly 3D scatter chart
|
||||
- improve chunk viewer with fixed navigation and markdown rendering
|
||||
- **astrolabe**: enable multi-select for document types and refactor PDF viewer
|
||||
- **auth**: implement refresh token rotation for Nextcloud OIDC
|
||||
- **astrolabe**: enhance unified search and add webhook management
|
||||
- **astrolabe**: add webhook management UI to admin settings
|
||||
- **astrolabe**: add OAuth token refresh and webhook presets
|
||||
- **search**: add file_path metadata and chunk offsets to search results
|
||||
- **astrolabe**: use proper icons and thumbnails in unified search
|
||||
- **astrolabe**: add admin search settings and enhanced UI
|
||||
- **astrolabe**: add unified search provider with clickable file links
|
||||
- **astrolabe**: add 3D PCA visualization for semantic search
|
||||
- **astrolabe**: add Nextcloud PHP app for MCP server management
|
||||
- **vector-sync**: enable background sync in OAuth mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **security**: address critical security issues from PR #401 code review
|
||||
- **oauth**: enable PKCE for all clients and add token_broker to oauth_context
|
||||
- **astrolabe**: revert invalid files_pdfviewer URL for file links
|
||||
- resolve type checking warnings for CI
|
||||
- move Alembic to package submodule for Docker compatibility
|
||||
- update unified search results to match chunk viz display
|
||||
- **astrolabe**: handle OAuth refresh token rotation
|
||||
- address critical code review issues (4 fixes)
|
||||
- resolve CI linting issues for Astroglobe
|
||||
|
||||
### Refactor
|
||||
|
||||
- **astrolabe**: extract PDF viewer to dedicated component
|
||||
- **astrolabe**: reframe UI as semantic search service
|
||||
|
||||
## v0.52.1 (2025-12-13)
|
||||
|
||||
### Perf
|
||||
|
||||
- **deck**: optimize card lookup by storing board_id/stack_id in metadata
|
||||
|
||||
## v0.52.0 (2025-12-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- **vector**: add Deck card vector search with visualization support
|
||||
|
||||
## v0.51.0 (2025-12-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- **vector-viz**: add news_item support for links and chunk expansion
|
||||
|
||||
## v0.50.2 (2025-12-13)
|
||||
|
||||
### Fix
|
||||
|
||||
- **news**: revert get_item() to use get_items() + filter
|
||||
|
||||
## v0.50.1 (2025-12-12)
|
||||
|
||||
### Fix
|
||||
|
||||
- Disable DNS rebinding protection for containerized deployments
|
||||
- **deps**: update dependency mcp to >=1.23,<1.24
|
||||
|
||||
## v0.50.0 (2025-12-11)
|
||||
|
||||
### Feat
|
||||
|
||||
- add MCP tool annotations for enhanced UX
|
||||
|
||||
### Fix
|
||||
|
||||
- address PR review feedback
|
||||
|
||||
## v0.49.2 (2025-12-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- Update lockfile
|
||||
|
||||
## v0.49.1 (2025-12-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- Revert mcp version <1.23
|
||||
|
||||
## v0.49.0 (2025-12-08)
|
||||
|
||||
### Feat
|
||||
|
||||
- **news**: add Nextcloud News app integration
|
||||
|
||||
### Fix
|
||||
|
||||
- resolve all type checking errors (8 errors fixed)
|
||||
|
||||
### Refactor
|
||||
|
||||
- **news**: simplify vector sync to fetch all items
|
||||
|
||||
### Perf
|
||||
|
||||
- **news**: use direct API endpoint for get_item()
|
||||
|
||||
## v0.48.6 (2025-12-03)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.23,<1.24
|
||||
|
||||
## v0.48.5 (2025-11-28)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency pillow to v12
|
||||
|
||||
## v0.48.4 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Add rate limit retry logic to OpenAI provider
|
||||
|
||||
## v0.48.3 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Increase MCP sampling timeout to 5 minutes for slower LLMs
|
||||
|
||||
## v0.48.2 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Share vector sync state with FastMCP session lifespan via module singleton
|
||||
- Share vector sync state with FastMCP session lifespan via module singleton
|
||||
|
||||
## v0.48.1 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Use WebDAV for tag creation and add LLM-as-a-judge for RAG tests
|
||||
|
||||
### Refactor
|
||||
|
||||
- Move background tasks to server lifespan and deprecate SSE transport
|
||||
|
||||
## v0.48.0 (2025-11-23)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add tag management methods to WebDAV client
|
||||
|
||||
## v0.47.0 (2025-11-23)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add OpenAI provider support for embeddings and generation
|
||||
|
||||
## v0.46.2 (2025-11-22)
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Enable JSON response format for scanner compatibility
|
||||
|
||||
## v0.46.1 (2025-11-22)
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize vector viz search performance
|
||||
|
||||
## v0.46.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add Smithery CLI deployment support
|
||||
- Implement ADR-016 Smithery stateless deployment mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
|
||||
- **smithery**: Use container runtime pattern for config discovery
|
||||
- Add Smithery lifespan and auth mode detection
|
||||
|
||||
## v0.45.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add context expansion to semantic search with chunk overlap removal
|
||||
- Use Ollama native batch API in embed_batch()
|
||||
- Implement Qdrant placeholder state management
|
||||
- Switch files to use numeric IDs with file_path resolution
|
||||
- Implement per-chunk vector visualization with context expansion
|
||||
|
||||
### Fix
|
||||
|
||||
- Use alpha_composite for proper RGBA highlight blending
|
||||
- Remove pymupdf.layout.activate() to fix page_chunks behavior
|
||||
- Centralize PDF processing and generate separate images per chunk
|
||||
- Set is_placeholder=False in processor to fix search filtering
|
||||
- Increase placeholder staleness threshold to 5x scan interval
|
||||
- Add placeholder staleness check to prevent duplicate processing
|
||||
- Use empty SparseVector instead of None for placeholders
|
||||
- Return empty array instead of null for query_coords when no results
|
||||
- Align PDF text extraction between indexing and context expansion
|
||||
- Update models and viz to use int-only doc_id
|
||||
- Reconstruct full content for notes to match indexed offsets
|
||||
- Add async/await, PDF metadata, and type safety fixes
|
||||
|
||||
### Refactor
|
||||
|
||||
- Simplify PDF text extraction with single to_markdown call
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize PDF processing with parallel extraction and single-render highlights
|
||||
|
||||
## v0.44.1 (2025-11-21)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.22,<1.23
|
||||
|
||||
## v0.44.0 (2025-11-19)
|
||||
|
||||
### Feat
|
||||
|
||||
@@ -56,6 +56,68 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
||||
- Pass-through (default): Simple, stateless (ENABLE_TOKEN_EXCHANGE=false)
|
||||
- Token exchange (opt-in): RFC 8693 delegation (ENABLE_TOKEN_EXCHANGE=true)
|
||||
|
||||
### MCP Tool Annotations (ADR-017)
|
||||
|
||||
**All tools MUST include annotations** following these patterns:
|
||||
|
||||
```python
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
# Read-only tools (list, search, get)
|
||||
@mcp.tool(
|
||||
title="Human Readable Name",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True, # Nextcloud is external to MCP server
|
||||
),
|
||||
)
|
||||
|
||||
# Create operations
|
||||
@mcp.tool(
|
||||
title="Create Resource",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Creates new resources each time
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Update operations (with etag/version control)
|
||||
@mcp.tool(
|
||||
title="Update Resource",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # ETag changes = different inputs
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Delete operations
|
||||
@mcp.tool(
|
||||
title="Delete Resource",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, # Permanently deletes data
|
||||
idempotentHint=True, # Same end state if called repeatedly
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
|
||||
# HTTP PUT without version control (special case)
|
||||
@mcp.tool(
|
||||
title="Write File",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=True, # Same content = same end state
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
**Key Principles**:
|
||||
- **Idempotency**: Same inputs → same result. ETags change after updates, making them non-idempotent
|
||||
- **Destructive**: Operations that permanently delete/overwrite data
|
||||
- **Open World**: All Nextcloud tools access external service (openWorldHint=True)
|
||||
- **Titles**: Use human-readable names, not snake_case function names
|
||||
|
||||
**See**: `docs/ADR-017-mcp-tool-annotations.md` for detailed rationale and examples
|
||||
|
||||
### Project Structure
|
||||
- `nextcloud_mcp_server/client/` - HTTP clients for Nextcloud APIs
|
||||
- `nextcloud_mcp_server/server/` - MCP tool/resource definitions
|
||||
@@ -444,6 +506,29 @@ docker compose exec app php occ user_oidc:provider keycloak
|
||||
**Nextcloud**: `docker compose exec app php occ ...` for occ commands
|
||||
**MariaDB**: `docker compose exec db mariadb -u [user] -p [password] [database]` for queries
|
||||
|
||||
### Querying Nextcloud Application Logs
|
||||
|
||||
**Use this pattern** to inspect Nextcloud application logs during debugging:
|
||||
|
||||
```bash
|
||||
# View recent log entries
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq | tail
|
||||
|
||||
# Filter by app
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.app == "astrolabe")' | tail
|
||||
|
||||
# Filter by log level (0=DEBUG, 1=INFO, 2=WARN, 3=ERROR, 4=FATAL)
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.level >= 3)' | tail
|
||||
|
||||
# Search for specific messages
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.message | contains("OAuth"))' | tail -20
|
||||
|
||||
# View full exception traces
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.exception != null)' | tail -5
|
||||
```
|
||||
|
||||
**Log Structure**: Each entry is a JSON object with fields: `reqId`, `level`, `time`, `remoteAddr`, `user`, `app`, `method`, `url`, `message`, `userAgent`, `version`, `exception`
|
||||
|
||||
**For detailed setup, see**:
|
||||
- `docs/installation.md` - Installation guide
|
||||
- `docs/configuration.md` - Configuration options
|
||||
|
||||
+116
@@ -0,0 +1,116 @@
|
||||
# Contributing to Nextcloud MCP Server
|
||||
|
||||
## Version Management
|
||||
|
||||
This monorepo uses commitizen for version management with **independent versioning** for three components:
|
||||
|
||||
### Components
|
||||
|
||||
| Component | Scope | Bump Command | Tag Example |
|
||||
|-----------|-------|--------------|-------------|
|
||||
| MCP Server | `mcp` or none | `./scripts/bump-mcp.sh` | `v0.54.0` |
|
||||
| Helm Chart | `helm` | `./scripts/bump-helm.sh` | `nextcloud-mcp-server-0.54.0` |
|
||||
| Astrolabe App | `astrolabe` | `./scripts/bump-astrolabe.sh` | `astrolabe-v0.2.0` |
|
||||
|
||||
### Commit Message Format
|
||||
|
||||
Use conventional commits with **scopes** to target specific components:
|
||||
|
||||
```bash
|
||||
# MCP server changes
|
||||
feat(mcp): add calendar sync API
|
||||
fix(mcp): resolve authentication bug
|
||||
|
||||
# Helm chart changes
|
||||
feat(helm): add resource limits
|
||||
docs(helm): update values documentation
|
||||
|
||||
# Astrolabe app changes
|
||||
feat(astrolabe): add dark mode toggle
|
||||
fix(astrolabe): resolve search UI bug
|
||||
```
|
||||
|
||||
**Unscoped commits** default to the MCP server:
|
||||
```bash
|
||||
feat: add new feature # → MCP server (v0.54.0)
|
||||
```
|
||||
|
||||
### Release Workflow
|
||||
|
||||
#### 1. Make Changes with Scoped Commits
|
||||
|
||||
```bash
|
||||
git commit -m "feat(astrolabe): add dark mode toggle"
|
||||
git commit -m "feat(helm): add ingress annotations"
|
||||
git commit -m "feat(mcp): add calendar sync"
|
||||
```
|
||||
|
||||
#### 2. Bump Component Versions
|
||||
|
||||
```bash
|
||||
# Bump MCP server (reads commits with scope=mcp or unscoped)
|
||||
./scripts/bump-mcp.sh
|
||||
# → Creates tag: v0.54.0
|
||||
# → Updates: pyproject.toml, Chart.yaml:appVersion
|
||||
|
||||
# Bump Helm chart (reads commits with scope=helm)
|
||||
./scripts/bump-helm.sh
|
||||
# → Creates tag: nextcloud-mcp-server-0.54.0
|
||||
# → Updates: Chart.yaml:version
|
||||
|
||||
# Bump Astrolabe (reads commits with scope=astrolabe)
|
||||
./scripts/bump-astrolabe.sh
|
||||
# → Creates tag: astrolabe-v0.2.0
|
||||
# → Updates: info.xml, package.json
|
||||
```
|
||||
|
||||
#### 3. Push Tags
|
||||
|
||||
```bash
|
||||
git push --follow-tags
|
||||
```
|
||||
|
||||
### Changelog Filtering
|
||||
|
||||
Each component maintains its own `CHANGELOG.md`:
|
||||
|
||||
- **MCP Server**: `CHANGELOG.md` (root) - includes `feat(mcp):` and unscoped commits
|
||||
- **Helm Chart**: `charts/nextcloud-mcp-server/CHANGELOG.md` - includes `feat(helm):` only
|
||||
- **Astrolabe**: `third_party/astrolabe/CHANGELOG.md` - includes `feat(astrolabe):` only
|
||||
|
||||
### Manual Version Bumps
|
||||
|
||||
For specific increments:
|
||||
|
||||
```bash
|
||||
# Patch bump (0.53.0 → 0.53.1)
|
||||
uv run cz bump --increment PATCH
|
||||
|
||||
# Minor bump (0.53.0 → 0.54.0)
|
||||
uv run cz bump --increment MINOR
|
||||
|
||||
# Major bump (0.53.0 → 1.0.0)
|
||||
uv run cz bump --increment MAJOR
|
||||
|
||||
# For non-MCP components, use --config
|
||||
cd charts/nextcloud-mcp-server
|
||||
uv run cz --config .cz.toml bump --increment MINOR
|
||||
```
|
||||
|
||||
### Versioning Philosophy
|
||||
|
||||
- **MCP Server**: Follows PEP 440, `major_version_zero = true` (0.x.x for pre-1.0)
|
||||
- **Helm Chart**: Follows PEP 440, starts at 0.53.0 (continues from current)
|
||||
- **Astrolabe**: Follows PEP 440, `major_version_zero = true` (0.x.x for alpha/beta)
|
||||
|
||||
### Chart.yaml Version vs appVersion
|
||||
|
||||
The Helm chart has TWO version fields:
|
||||
|
||||
- **`version`**: Chart packaging version (bumped by `feat(helm):`)
|
||||
- Example: `0.53.0` → `0.54.0` when adding resource limits
|
||||
|
||||
- **`appVersion`**: MCP server version being deployed (bumped by `feat(mcp):`)
|
||||
- Example: `"0.53.0"` → `"0.54.0"` when MCP server releases
|
||||
|
||||
This allows the chart to evolve independently from the application.
|
||||
+10
-3
@@ -1,21 +1,28 @@
|
||||
FROM docker.io/library/python:3.12-slim-trixie@sha256:2e683fc3e18a248aa23b8022f2a3474b072b04fb851efe9b49f6b516a8944939
|
||||
FROM docker.io/library/python:3.12-slim-trixie@sha256:fa48eefe2146644c2308b909d6bb7651a768178f84fc9550dcd495e4d6d84d01
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.10@sha256:29bd45092ea8902c0bbb7f0a338f0494a382b1f4b18355df5be270ade679ff1d /uv /uvx /bin/
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.18@sha256:5713fa8217f92b80223bc83aac7db36ec80a84437dbc0d04bbc659cae030d8c9 /uv /uvx /bin/
|
||||
|
||||
# Install dependencies
|
||||
# 1. git (required for caldav dependency from git)
|
||||
# 2. sqlite for development with token db
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
|
||||
git \
|
||||
tesseract-ocr \
|
||||
sqlite3 && apt clean
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY pyproject.toml uv.lock README.md .
|
||||
|
||||
RUN uv sync --locked --no-dev --no-install-project --no-cache
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN uv sync --locked --no-dev --no-editable --no-cache
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV VIRTUAL_ENV=/app/.venv
|
||||
ENV PATH=/app/.venv/bin:$PATH
|
||||
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
|
||||
|
||||
ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"]
|
||||
ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "run", "--host", "0.0.0.0"]
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
# Dockerfile for Smithery stateless deployment
|
||||
# ADR-016: Stateless mode for multi-user public Nextcloud instances
|
||||
#
|
||||
# This image excludes:
|
||||
# - Vector database dependencies (qdrant-client)
|
||||
# - Background sync workers
|
||||
# - Admin UI routes (/app)
|
||||
# - Semantic search tools
|
||||
#
|
||||
# Features included:
|
||||
# - Core Nextcloud tools (notes, calendar, contacts, files, deck, tables, cookbook)
|
||||
# - Per-session app password authentication
|
||||
# - Multi-user support via Smithery session config
|
||||
|
||||
FROM docker.io/library/python:3.12-slim-trixie@sha256:fa48eefe2146644c2308b909d6bb7651a768178f84fc9550dcd495e4d6d84d01
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install uv for fast dependency management
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.18@sha256:5713fa8217f92b80223bc83aac7db36ec80a84437dbc0d04bbc659cae030d8c9 /uv /uvx /bin/
|
||||
|
||||
# Install dependencies
|
||||
# 1. git (required for caldav dependency from git)
|
||||
# 2. sqlite for development with token db
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
|
||||
git
|
||||
|
||||
# Copy project files
|
||||
COPY . .
|
||||
|
||||
RUN uv sync --locked --no-dev --no-editable --no-cache
|
||||
|
||||
# Set Smithery mode environment variables
|
||||
ENV SMITHERY_DEPLOYMENT=true
|
||||
ENV VECTOR_SYNC_ENABLED=false
|
||||
|
||||
# Smithery sets PORT=8081 by default
|
||||
EXPOSE 8081
|
||||
|
||||
# Health check endpoint
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD uv run python -c "import httpx; httpx.get('http://localhost:${PORT:-8081}/health/live').raise_for_status()"
|
||||
|
||||
CMD ["/app/.venv/bin/smithery-main"]
|
||||
@@ -5,6 +5,7 @@
|
||||
# Nextcloud MCP Server
|
||||
|
||||
[](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)
|
||||
[](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
|
||||
|
||||
**A production-ready MCP server that connects AI assistants to your Nextcloud instance.**
|
||||
|
||||
@@ -17,7 +18,20 @@ This is a **dedicated standalone MCP server** designed for external MCP clients
|
||||
|
||||
## Quick Start
|
||||
|
||||
Get up and running in 60 seconds using Docker:
|
||||
The fastest way to get started is via [Smithery](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server) - no Docker or self-hosting required:
|
||||
|
||||
1. Visit the [Smithery marketplace page](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
|
||||
2. Click "Deploy" and configure:
|
||||
- **Nextcloud URL**: Your Nextcloud instance (e.g., `https://cloud.example.com`)
|
||||
- **Username**: Your Nextcloud username
|
||||
- **App Password**: Generate one in Nextcloud → Settings → Security → Devices & sessions
|
||||
|
||||
> [!NOTE]
|
||||
> Smithery runs in stateless mode without semantic search. For full features, use [Docker](#docker-self-hosted) or see [ADR-016](docs/ADR-016-smithery-stateless-deployment.md).
|
||||
|
||||
## Docker (Self-Hosted)
|
||||
|
||||
For full features including semantic search, run with Docker:
|
||||
|
||||
```bash
|
||||
# 1. Create a minimal configuration
|
||||
@@ -37,12 +51,11 @@ curl http://127.0.0.1:8000/health/ready
|
||||
# 4. Connect to the endpoint
|
||||
http://127.0.0.1:8000/sse
|
||||
|
||||
# 4. Or with --transport streamable-http
|
||||
# Or with --transport streamable-http
|
||||
http://127.0.0.1:8000/mcp
|
||||
```
|
||||
|
||||
**Next Steps:**
|
||||
- Create an app password in Nextcloud: Settings → Security → Devices & sessions
|
||||
- Connect your MCP client (Claude Desktop, IDEs, `mcp dev`, etc.)
|
||||
- See [docs/installation.md](docs/installation.md) for other deployment options (local, Kubernetes)
|
||||
|
||||
@@ -50,7 +63,7 @@ http://127.0.0.1:8000/mcp
|
||||
|
||||
- **90+ MCP Tools** - Comprehensive API coverage across 8 Nextcloud apps
|
||||
- **MCP Resources** - Structured data URIs for browsing Nextcloud data
|
||||
- **Semantic Search (Experimental)** - Optional vector-powered search for Notes (requires Qdrant + Ollama)
|
||||
- **Semantic Search (Experimental)** - Optional vector-powered search for Notes, Files, News items, and Deck cards (requires Qdrant + Ollama)
|
||||
- **Document Processing** - OCR and text extraction from PDFs, DOCX, images with progress notifications
|
||||
- **Flexible Deployment** - Docker, Kubernetes (Helm), VM, or local installation
|
||||
- **Production-Ready Auth** - Basic Auth with app passwords (recommended) or OAuth2/OIDC (experimental)
|
||||
@@ -68,7 +81,7 @@ http://127.0.0.1:8000/mcp
|
||||
| **Cookbook** | 13 | Recipe management, URL import (schema.org) |
|
||||
| **Tables** | 5 | Row operations on Nextcloud Tables |
|
||||
| **Sharing** | 10+ | Create and manage shares |
|
||||
| **Semantic Search** | 2+ | Vector search for Notes (experimental, opt-in, requires infrastructure) |
|
||||
| **Semantic Search** | 2+ | Vector search for Notes, Files, News items, and Deck cards (experimental, opt-in, requires infrastructure) |
|
||||
|
||||
Want to see another Nextcloud app supported? [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) or contribute a pull request!
|
||||
|
||||
@@ -132,7 +145,7 @@ This enables natural language queries and helps discover related content across
|
||||
### Features
|
||||
- **[App Documentation](docs/)** - Notes, Calendar, Contacts, WebDAV, Deck, Cookbook, Tables
|
||||
- **[Document Processing](docs/configuration.md#document-processing)** - OCR and text extraction setup
|
||||
- **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes only, opt-in)
|
||||
- **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes, Files, News items, Deck cards; opt-in)
|
||||
- **[Vector Sync UI Guide](docs/user-guide/vector-sync-ui.md)** - Browser interface for semantic search visualization and testing
|
||||
|
||||
### Advanced Topics
|
||||
|
||||
+90
@@ -0,0 +1,90 @@
|
||||
# Alembic configuration file for nextcloud-mcp-server
|
||||
|
||||
[alembic]
|
||||
# Path to migration scripts
|
||||
script_location = nextcloud_mcp_server/alembic
|
||||
|
||||
# Template used to generate migration file names
|
||||
# Default: %%(rev)s_%%(slug)s
|
||||
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
|
||||
|
||||
# Timezone for migration timestamps
|
||||
# Default: utc
|
||||
timezone = utc
|
||||
|
||||
# Max length of characters to apply to the "slug" field
|
||||
# Default: 40
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# Set to 'true' to run the environment during the 'revision' command
|
||||
# Default: false
|
||||
# revision_environment = false
|
||||
|
||||
# Set to 'true' to allow .pyc and .pyo files without a source .py file
|
||||
# Default: false
|
||||
# sourceless = false
|
||||
|
||||
# Version location specification
|
||||
# Supports single or multiple directories
|
||||
version_locations = nextcloud_mcp_server/alembic/versions
|
||||
|
||||
# Path separator for version locations (required to suppress deprecation warning)
|
||||
# Use os (for cross-platform compatibility)
|
||||
path_separator = os
|
||||
|
||||
# Set to 'true' to search source files recursively in each "version_locations" directory
|
||||
# Default: false
|
||||
# recursive_version_locations = false
|
||||
|
||||
# Output encoding used when revision files are written
|
||||
# Default: utf-8
|
||||
# output_encoding = utf-8
|
||||
|
||||
# Database URL - can be overridden by:
|
||||
# 1. Passing -x database_url=... to alembic commands
|
||||
# 2. Setting in environment via get_database_url() in env.py
|
||||
# Default: sqlite:///app/data/tokens.db
|
||||
sqlalchemy.url = sqlite+aiosqlite:////app/data/tokens.db
|
||||
|
||||
[post_write_hooks]
|
||||
# Post-write hooks allow you to run scripts after generating migration files
|
||||
# Example: format migrations with ruff
|
||||
# hooks = ruff
|
||||
# ruff.type = exec
|
||||
# ruff.executable = ruff
|
||||
# ruff.options = format REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
@@ -0,0 +1,71 @@
|
||||
Database Migrations for nextcloud-mcp-server
|
||||
============================================
|
||||
|
||||
This directory contains Alembic database migrations for the token storage database.
|
||||
|
||||
Structure
|
||||
---------
|
||||
- env.py: Alembic environment configuration
|
||||
- script.py.mako: Template for generating new migration files
|
||||
- versions/: Directory containing migration scripts
|
||||
|
||||
Usage
|
||||
-----
|
||||
Migrations are managed via the CLI:
|
||||
|
||||
# Upgrade database to latest version
|
||||
uv run nextcloud-mcp-server db upgrade
|
||||
|
||||
# Show current database version
|
||||
uv run nextcloud-mcp-server db current
|
||||
|
||||
# Show migration history
|
||||
uv run nextcloud-mcp-server db history
|
||||
|
||||
# Create a new migration (developers only)
|
||||
uv run nextcloud-mcp-server db migrate "description of changes"
|
||||
|
||||
# Downgrade database by one version (emergency use only)
|
||||
uv run nextcloud-mcp-server db downgrade
|
||||
|
||||
Direct Alembic Usage
|
||||
--------------------
|
||||
You can also use Alembic commands directly:
|
||||
|
||||
# Specify database URL via -x flag
|
||||
uv run alembic -x database_url=sqlite+aiosqlite:////path/to/tokens.db upgrade head
|
||||
|
||||
# Or set in alembic.ini and run
|
||||
uv run alembic upgrade head
|
||||
uv run alembic current
|
||||
uv run alembic history
|
||||
|
||||
Writing Migrations
|
||||
------------------
|
||||
Since we don't use SQLAlchemy models, migrations are written with raw SQL:
|
||||
|
||||
def upgrade() -> None:
|
||||
op.execute("""
|
||||
ALTER TABLE refresh_tokens
|
||||
ADD COLUMN new_field TEXT
|
||||
""")
|
||||
|
||||
def downgrade() -> None:
|
||||
# SQLite doesn't support DROP COLUMN, use table recreation
|
||||
op.execute("""
|
||||
CREATE TABLE refresh_tokens_new AS
|
||||
SELECT user_id, encrypted_token, ... FROM refresh_tokens
|
||||
""")
|
||||
op.execute("DROP TABLE refresh_tokens")
|
||||
op.execute("ALTER TABLE refresh_tokens_new RENAME TO refresh_tokens")
|
||||
|
||||
Migration File Naming
|
||||
---------------------
|
||||
Format: YYYYMMDD_HHMM_<revision>_<slug>.py
|
||||
Example: 20251217_2200_001_initial_schema.py
|
||||
|
||||
Notes
|
||||
-----
|
||||
- Migrations run automatically when RefreshTokenStorage.initialize() is called
|
||||
- Existing databases are automatically stamped with the initial version
|
||||
- SQLite has limited ALTER TABLE support - complex changes require table recreation
|
||||
@@ -0,0 +1,26 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = ${repr(up_revision)}
|
||||
down_revision = ${repr(down_revision)}
|
||||
branch_labels = ${repr(branch_labels)}
|
||||
depends_on = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Apply migration changes to upgrade the database schema."""
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Revert migration changes to downgrade the database schema."""
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -3,3 +3,9 @@
|
||||
set -euox pipefail
|
||||
|
||||
php /var/www/html/occ config:system:set trusted_domains 2 --value=host.docker.internal
|
||||
|
||||
# Set overwrite settings for URL generation (needed for OIDC discovery to return correct URLs)
|
||||
# These ensure that URLs generated by Nextcloud include the correct host:port
|
||||
php /var/www/html/occ config:system:set overwritehost --value="localhost:8080"
|
||||
php /var/www/html/occ config:system:set overwriteprotocol --value="http"
|
||||
php /var/www/html/occ config:system:set overwrite.cli.url --value="http://localhost:8080"
|
||||
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euox pipefail
|
||||
|
||||
php /var/www/html/occ app:enable news
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euox pipefail
|
||||
|
||||
echo "Installing and configuring Astrolabe app for testing..."
|
||||
|
||||
# Check if development astrolabe app is mounted at /opt/apps/astrolabe
|
||||
if [ -d /opt/apps/astrolabe ]; then
|
||||
echo "Development astrolabe app found at /opt/apps/astrolabe"
|
||||
|
||||
# Remove any existing astrolabe app in custom_apps (from app store or old symlink)
|
||||
if [ -e /var/www/html/custom_apps/astrolabe ]; then
|
||||
echo "Removing existing astrolabe in custom_apps..."
|
||||
rm -rf /var/www/html/custom_apps/astrolabe
|
||||
fi
|
||||
|
||||
# Create symlink from custom_apps to the mounted development version
|
||||
# Per Nextcloud docs: apps outside server root need symlinks in server root
|
||||
echo "Creating symlink: custom_apps/astrolabe -> /opt/apps/astrolabe"
|
||||
ln -sf /opt/apps/astrolabe /var/www/html/custom_apps/astrolabe
|
||||
|
||||
echo "Enabling astrolabe app from /opt/apps (development mode via symlink)"
|
||||
php /var/www/html/occ app:enable astrolabe
|
||||
elif [ -d /var/www/html/custom_apps/astrolabe ]; then
|
||||
echo "astrolabe app directory found in custom_apps (already installed)"
|
||||
php /var/www/html/occ app:enable astrolabe
|
||||
else
|
||||
echo "astrolabe app not found, installing from app store..."
|
||||
php /var/www/html/occ app:install astrolabe
|
||||
php /var/www/html/occ app:enable astrolabe
|
||||
fi
|
||||
|
||||
# Configure MCP server URLs in Nextcloud system config
|
||||
# - mcp_server_url: Internal URL for PHP app to call MCP server APIs (Docker internal network)
|
||||
# - mcp_server_public_url: Public URL for OAuth token audience (what browsers/MCP clients see)
|
||||
php /var/www/html/occ config:system:set mcp_server_url --value='http://mcp-oauth:8001'
|
||||
php /var/www/html/occ config:system:set mcp_server_public_url --value='http://localhost:8001'
|
||||
|
||||
# Create OAuth client for Astrolabe app
|
||||
# The resource_url MUST match what the MCP server expects as token audience
|
||||
# This allows tokens from this client to be validated by MCP server's UnifiedTokenVerifier
|
||||
MCP_CLIENT_ID="nextcloudMcpServerUIPublicClient"
|
||||
MCP_RESOURCE_URL="http://localhost:8001"
|
||||
MCP_REDIRECT_URI="http://localhost:8080/apps/astrolabe/oauth/callback"
|
||||
|
||||
echo "Configuring OAuth client for Astrolabe..."
|
||||
|
||||
# Check if client already exists
|
||||
if php /var/www/html/occ oidc:list 2>/dev/null | grep -q "$MCP_CLIENT_ID"; then
|
||||
echo "OAuth client $MCP_CLIENT_ID already exists, removing to recreate with correct settings..."
|
||||
php /var/www/html/occ oidc:remove "$MCP_CLIENT_ID" || true
|
||||
fi
|
||||
|
||||
# Create OAuth client with correct resource_url for MCP server audience
|
||||
echo "Creating OAuth confidential client with resource_url=$MCP_RESOURCE_URL"
|
||||
CLIENT_OUTPUT=$(php /var/www/html/occ oidc:create \
|
||||
"Astrolabe" \
|
||||
"$MCP_REDIRECT_URI" \
|
||||
--client_id="$MCP_CLIENT_ID" \
|
||||
--type=confidential \
|
||||
--flow=code \
|
||||
--token_type=jwt \
|
||||
--resource_url="$MCP_RESOURCE_URL" \
|
||||
--allowed_scopes="openid profile email offline_access notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write")
|
||||
|
||||
echo "$CLIENT_OUTPUT"
|
||||
|
||||
# Extract client_secret from JSON output
|
||||
CLIENT_SECRET=$(echo "$CLIENT_OUTPUT" | php -r 'echo json_decode(file_get_contents("php://stdin"), true)["client_secret"] ?? "";')
|
||||
|
||||
if [ -n "$CLIENT_SECRET" ]; then
|
||||
echo "Configuring Astrolabe client secret in system config..."
|
||||
php /var/www/html/occ config:system:set astrolabe_client_secret --value="$CLIENT_SECRET"
|
||||
echo "✓ Client secret configured: ${CLIENT_SECRET:0:8}..."
|
||||
else
|
||||
echo "⚠ Warning: Could not extract client_secret from OIDC client creation"
|
||||
fi
|
||||
|
||||
# Configure OAuth client ID in system config
|
||||
echo "Configuring Astrolabe client ID in system config..."
|
||||
php /var/www/html/occ config:system:set astrolabe_client_id --value="$MCP_CLIENT_ID"
|
||||
echo "✓ Client ID configured: $MCP_CLIENT_ID"
|
||||
|
||||
echo "Astrolabe app installed and configured successfully"
|
||||
@@ -0,0 +1,24 @@
|
||||
[tool.commitizen]
|
||||
name = "cz_conventional_commits"
|
||||
version = "0.54.0"
|
||||
tag_format = "nextcloud-mcp-server-$version"
|
||||
version_scheme = "semver"
|
||||
update_changelog_on_bump = true
|
||||
major_version_zero = true
|
||||
|
||||
# Update chart version only (NOT appVersion)
|
||||
version_files = [
|
||||
"Chart.yaml:^version:"
|
||||
]
|
||||
|
||||
# Ignore tags from other components
|
||||
ignored_tag_formats = [
|
||||
"v*", # MCP server tags
|
||||
"astrolabe-v*", # Astrolabe tags
|
||||
]
|
||||
|
||||
# Filter commits by scope
|
||||
[tool.commitizen.customize]
|
||||
changelog_pattern = "^(feat|fix|docs|refactor|perf|test|build|ci|chore)\\(helm\\)(!)?:"
|
||||
schema_pattern = "^(feat|fix|docs|refactor|perf|test|build|ci|chore)\\(helm\\)(!)?:\\s.+"
|
||||
message_template = "{{change_type}}(helm): {{message}}"
|
||||
@@ -0,0 +1,746 @@
|
||||
# Changelog - Helm Chart
|
||||
|
||||
All notable changes to the Helm chart will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
|
||||
### Added
|
||||
- Initial independent versioning release
|
||||
- Support for Nextcloud MCP server deployment
|
||||
- Qdrant subchart integration
|
||||
- Ollama subchart integration
|
||||
- Configurable resource limits
|
||||
- Grafana dashboard annotations
|
||||
|
||||
## nextcloud-mcp-server-0.54.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- **ci**: implement monorepo-aware version bumping workflow
|
||||
- **astrolabe**: add Nextcloud App Store deployment automation
|
||||
- configure commitizen monorepo with independent versioning
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: improve versioning and error handling
|
||||
- **ci**: address critical workflow and validation issues
|
||||
- **astrolabe**: address code review feedback
|
||||
|
||||
## nextcloud-mcp-server-0.53.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- add Alembic database migration system
|
||||
- make chunk modal title clickable link to documents
|
||||
- add native Plotly hover styling for clickable points
|
||||
- add click interactivity to Plotly 3D scatter chart
|
||||
- improve chunk viewer with fixed navigation and markdown rendering
|
||||
- **astrolabe**: enable multi-select for document types and refactor PDF viewer
|
||||
- **auth**: implement refresh token rotation for Nextcloud OIDC
|
||||
- **astrolabe**: enhance unified search and add webhook management
|
||||
- **astrolabe**: add webhook management UI to admin settings
|
||||
- **astrolabe**: add OAuth token refresh and webhook presets
|
||||
- **search**: add file_path metadata and chunk offsets to search results
|
||||
- **astrolabe**: use proper icons and thumbnails in unified search
|
||||
- **astrolabe**: add admin search settings and enhanced UI
|
||||
- **astrolabe**: add unified search provider with clickable file links
|
||||
- **astrolabe**: add 3D PCA visualization for semantic search
|
||||
- **astrolabe**: add Nextcloud PHP app for MCP server management
|
||||
- **vector-sync**: enable background sync in OAuth mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **security**: address critical security issues from PR #401 code review
|
||||
- **oauth**: enable PKCE for all clients and add token_broker to oauth_context
|
||||
- **astrolabe**: revert invalid files_pdfviewer URL for file links
|
||||
- resolve type checking warnings for CI
|
||||
- move Alembic to package submodule for Docker compatibility
|
||||
- update unified search results to match chunk viz display
|
||||
- **astrolabe**: handle OAuth refresh token rotation
|
||||
- address critical code review issues (4 fixes)
|
||||
- resolve CI linting issues for Astroglobe
|
||||
|
||||
### Refactor
|
||||
|
||||
- **astrolabe**: extract PDF viewer to dedicated component
|
||||
- **astrolabe**: reframe UI as semantic search service
|
||||
|
||||
## nextcloud-mcp-server-0.52.1 (2025-12-13)
|
||||
|
||||
## nextcloud-mcp-server-0.52.0 (2025-12-13)
|
||||
|
||||
## nextcloud-mcp-server-0.51.0 (2025-12-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- **vector**: add Deck card vector search with visualization support
|
||||
- **vector-viz**: add news_item support for links and chunk expansion
|
||||
|
||||
### Perf
|
||||
|
||||
- **deck**: optimize card lookup by storing board_id/stack_id in metadata
|
||||
|
||||
## nextcloud-mcp-server-0.50.2 (2025-12-13)
|
||||
|
||||
### Fix
|
||||
|
||||
- **news**: revert get_item() to use get_items() + filter
|
||||
|
||||
## nextcloud-mcp-server-0.50.1 (2025-12-12)
|
||||
|
||||
### Fix
|
||||
|
||||
- Disable DNS rebinding protection for containerized deployments
|
||||
- **deps**: update dependency mcp to >=1.23,<1.24
|
||||
|
||||
## nextcloud-mcp-server-0.50.0 (2025-12-11)
|
||||
|
||||
### Feat
|
||||
|
||||
- add MCP tool annotations for enhanced UX
|
||||
|
||||
### Fix
|
||||
|
||||
- address PR review feedback
|
||||
|
||||
## nextcloud-mcp-server-0.49.2 (2025-12-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- Update lockfile
|
||||
|
||||
## nextcloud-mcp-server-0.49.1 (2025-12-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- Revert mcp version <1.23
|
||||
|
||||
## nextcloud-mcp-server-0.49.0 (2025-12-08)
|
||||
|
||||
### Fix
|
||||
|
||||
- resolve all type checking errors (8 errors fixed)
|
||||
- **deps**: update dependency mcp to >=1.23,<1.24
|
||||
|
||||
### Perf
|
||||
|
||||
- **news**: use direct API endpoint for get_item()
|
||||
|
||||
## nextcloud-mcp-server-0.48.5 (2025-11-28)
|
||||
|
||||
### Feat
|
||||
|
||||
- **news**: add Nextcloud News app integration
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency pillow to v12
|
||||
|
||||
### Refactor
|
||||
|
||||
- **news**: simplify vector sync to fetch all items
|
||||
|
||||
## nextcloud-mcp-server-0.48.4 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Add rate limit retry logic to OpenAI provider
|
||||
|
||||
## nextcloud-mcp-server-0.48.3 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Increase MCP sampling timeout to 5 minutes for slower LLMs
|
||||
|
||||
## nextcloud-mcp-server-0.48.2 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Share vector sync state with FastMCP session lifespan via module singleton
|
||||
|
||||
## nextcloud-mcp-server-0.48.1 (2025-11-23)
|
||||
|
||||
## nextcloud-mcp-server-0.48.0 (2025-11-23)
|
||||
|
||||
## nextcloud-mcp-server-0.47.0 (2025-11-23)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add tag management methods to WebDAV client
|
||||
- Add OpenAI provider support for embeddings and generation
|
||||
|
||||
### Fix
|
||||
|
||||
- Share vector sync state with FastMCP session lifespan via module singleton
|
||||
- Use WebDAV for tag creation and add LLM-as-a-judge for RAG tests
|
||||
|
||||
### Refactor
|
||||
|
||||
- Move background tasks to server lifespan and deprecate SSE transport
|
||||
|
||||
## nextcloud-mcp-server-0.46.2 (2025-11-22)
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Enable JSON response format for scanner compatibility
|
||||
|
||||
## nextcloud-mcp-server-0.46.1 (2025-11-22)
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize vector viz search performance
|
||||
|
||||
## nextcloud-mcp-server-0.46.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add Smithery CLI deployment support
|
||||
- Implement ADR-016 Smithery stateless deployment mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
|
||||
- **smithery**: Use container runtime pattern for config discovery
|
||||
- Add Smithery lifespan and auth mode detection
|
||||
|
||||
## nextcloud-mcp-server-0.45.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add context expansion to semantic search with chunk overlap removal
|
||||
- Use Ollama native batch API in embed_batch()
|
||||
- Implement Qdrant placeholder state management
|
||||
- Switch files to use numeric IDs with file_path resolution
|
||||
- Implement per-chunk vector visualization with context expansion
|
||||
|
||||
### Fix
|
||||
|
||||
- Use alpha_composite for proper RGBA highlight blending
|
||||
- Remove pymupdf.layout.activate() to fix page_chunks behavior
|
||||
- Centralize PDF processing and generate separate images per chunk
|
||||
- Set is_placeholder=False in processor to fix search filtering
|
||||
- Increase placeholder staleness threshold to 5x scan interval
|
||||
- Add placeholder staleness check to prevent duplicate processing
|
||||
- Use empty SparseVector instead of None for placeholders
|
||||
- Return empty array instead of null for query_coords when no results
|
||||
- Align PDF text extraction between indexing and context expansion
|
||||
- Update models and viz to use int-only doc_id
|
||||
- Reconstruct full content for notes to match indexed offsets
|
||||
- Add async/await, PDF metadata, and type safety fixes
|
||||
|
||||
### Refactor
|
||||
|
||||
- Simplify PDF text extraction with single to_markdown call
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize PDF processing with parallel extraction and single-render highlights
|
||||
|
||||
## nextcloud-mcp-server-0.44.1 (2025-11-21)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.22,<1.23
|
||||
|
||||
## nextcloud-mcp-server-0.44.0 (2025-11-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- Improve vector visualization with static assets and fixes
|
||||
- Redesign UI to match Nextcloud ecosystem aesthetic
|
||||
|
||||
### Fix
|
||||
|
||||
- Improve 3D plot rendering with explicit dimensions and window resize support
|
||||
- Preserve 3D plot camera and improve documentation
|
||||
- Preserve 3D plot camera position and fix CSS loading
|
||||
|
||||
## nextcloud-mcp-server-0.43.0 (2025-11-18)
|
||||
|
||||
### Feat
|
||||
|
||||
- Replace custom document chunker with LangChain MarkdownTextSplitter
|
||||
|
||||
## nextcloud-mcp-server-0.42.0 (2025-11-17)
|
||||
|
||||
### Feat
|
||||
|
||||
- **viz**: Add dual-score display and improve UI controls
|
||||
|
||||
## nextcloud-mcp-server-0.41.0 (2025-11-17)
|
||||
|
||||
### Feat
|
||||
|
||||
- add configurable fusion algorithms for BM25 hybrid search
|
||||
- add chunk position tracking to vector indexing and search
|
||||
- add vector viz template and chunk context endpoint
|
||||
|
||||
### Fix
|
||||
|
||||
- prevent infinite loop in DocumentChunker with position tracking
|
||||
- Relax SearchResult validation to support DBSF fusion scores > 1.0
|
||||
|
||||
## nextcloud-mcp-server-0.40.0 (2025-11-16)
|
||||
|
||||
### Feat
|
||||
|
||||
- add unified provider architecture with Amazon Bedrock support
|
||||
|
||||
### Fix
|
||||
|
||||
- suppress Starlette middleware type warnings in ty checker
|
||||
|
||||
## nextcloud-mcp-server-0.39.0 (2025-11-16)
|
||||
|
||||
## nextcloud-mcp-server-0.38.0 (2025-11-16)
|
||||
|
||||
### Feat
|
||||
|
||||
- add concurrent uploads and --force flag to upload command
|
||||
- implement RAG evaluation framework with CLI tooling
|
||||
- Add OpenTelemetry tracing to @instrument_tool decorator
|
||||
- Implement BM25 hybrid search with native Qdrant RRF fusion
|
||||
|
||||
### Fix
|
||||
|
||||
- download qrels from BEIR ZIP instead of HuggingFace
|
||||
- Handle named vectors in visualization and semantic search
|
||||
- Update vizApp to use bm25_hybrid algorithm and remove deprecated weights
|
||||
- Update viz routes to use BM25 hybrid search after refactor
|
||||
|
||||
### Refactor
|
||||
|
||||
- migrate asyncio to anyio for consistent structured concurrency
|
||||
- replace httpx client with NextcloudClient in upload command
|
||||
|
||||
### Perf
|
||||
|
||||
- Eliminate double-fetching in semantic search sampling
|
||||
- fix vector viz search performance and visual encoding
|
||||
- make note deletion concurrent in upload --force
|
||||
|
||||
## nextcloud-mcp-server-0.36.0 (2025-11-15)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- Search algorithms now require Qdrant to be populated.
|
||||
Vector sync must be enabled and documents indexed for search to work.
|
||||
|
||||
### Feat
|
||||
|
||||
- Normalize hybrid search RRF scores to 0-1 range
|
||||
- Enhance vector visualization UI and parallelize search verification
|
||||
- Add Vector Viz tab to app home page
|
||||
- Add vector visualization pane with multi-select document types
|
||||
- Implement custom PCA to remove sklearn dependency
|
||||
- Add multi-document Protocol with cross-app search support
|
||||
- Update nc_semantic_search tool with algorithm selection
|
||||
- Implement unified search algorithm module
|
||||
|
||||
### Fix
|
||||
|
||||
- Reorder tabs and fix viz pane session access
|
||||
|
||||
### Refactor
|
||||
|
||||
- Optimize Nextcloud access verification with centralized filtering
|
||||
- Make all search algorithms query Qdrant payload, not Nextcloud
|
||||
|
||||
### Perf
|
||||
|
||||
- Exclude vector-sync status polling from distributed tracing
|
||||
|
||||
## nextcloud-mcp-server-0.35.0 (2025-11-15)
|
||||
|
||||
### Feat
|
||||
|
||||
- Enable SSE transport for mcp service and update test fixtures
|
||||
|
||||
## nextcloud-mcp-server-0.34.2 (2025-11-13)
|
||||
|
||||
### Fix
|
||||
|
||||
- Use NEXTCLOUD_OIDC_CLIENT_ID/SECRET env vars consistently
|
||||
- return all notes when search query is empty
|
||||
|
||||
## nextcloud-mcp-server-0.34.0 (2025-11-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- Complete Phase 5 - Instrument all 93 MCP tools
|
||||
- Add instrumentation decorator and apply to notes tools (Phase 5)
|
||||
- Add OAuth token and database metrics (Phases 3-4)
|
||||
- Add metrics instrumentation for queue, health, and database operations
|
||||
|
||||
## nextcloud-mcp-server-0.33.1 (2025-11-13)
|
||||
|
||||
### Fix
|
||||
|
||||
- Move grafana_folder from labels to annotations
|
||||
|
||||
## nextcloud-mcp-server-0.33.0 (2025-11-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add Grafana dashboard and vector sync metric instrumentation
|
||||
|
||||
## nextcloud-mcp-server-0.32.1 (2025-11-12)
|
||||
|
||||
### Fix
|
||||
|
||||
- add dynamic dimension detection for Ollama embedding models
|
||||
|
||||
## nextcloud-mcp-server-0.32.0 (2025-11-11)
|
||||
|
||||
### Feat
|
||||
|
||||
- **ollama**: Pull model on startup if not available in ollama
|
||||
- add dynamic vector sync status updates with htmx polling
|
||||
- add webhook management UI and BeforeNodeDeletedEvent support
|
||||
- validate Nextcloud webhook schemas and document findings
|
||||
|
||||
### Fix
|
||||
|
||||
- improve webapp tab UI with CSS Grid and viewport-filling container
|
||||
|
||||
### Refactor
|
||||
|
||||
- move webapp from /user/page to /app
|
||||
- consolidate database storage for webhooks and OAuth tokens
|
||||
|
||||
## nextcloud-mcp-server-0.31.1 (2025-11-10)
|
||||
|
||||
### Refactor
|
||||
|
||||
- simplify OpenTelemetry tracing configuration
|
||||
|
||||
## nextcloud-mcp-server-0.31.0 (2025-11-10)
|
||||
|
||||
### Feat
|
||||
|
||||
- skip tracing for health and metrics endpoints
|
||||
|
||||
### Fix
|
||||
|
||||
- add retry logic for ETag conflicts in category change test
|
||||
- optimize Notes API pagination with pruneBefore parameter
|
||||
|
||||
## nextcloud-mcp-server-0.30.0 (2025-11-10)
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: Add document chunking configuration
|
||||
- **vector**: Add configurable chunk size and overlap for document embedding
|
||||
- **vector**: Support multiple embedding models with auto-generated collection names
|
||||
|
||||
### Fix
|
||||
|
||||
- Support in-memory Qdrant for CI testing
|
||||
|
||||
## nextcloud-mcp-server-0.29.2 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Set default strategy to Recreate
|
||||
|
||||
## nextcloud-mcp-server-0.29.1 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **observability**: isolate metrics endpoint to dedicated port
|
||||
|
||||
## nextcloud-mcp-server-0.29.0 (2025-11-09)
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: Add observability support with ServiceMonitor and Grafana dashboard
|
||||
|
||||
### Fix
|
||||
|
||||
- **readiness**: Only check external Qdrant in network mode
|
||||
|
||||
## nextcloud-mcp-server-0.28.0 (2025-11-09)
|
||||
|
||||
### Feat
|
||||
|
||||
- **observability**: Add comprehensive monitoring with Prometheus and OpenTelemetry
|
||||
|
||||
### Fix
|
||||
|
||||
- **vector**: Handle missing 'modified' field in notes gracefully
|
||||
|
||||
## nextcloud-mcp-server-0.27.3 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: Use helm dependency build instead of update to use Chart.lock
|
||||
|
||||
## nextcloud-mcp-server-0.27.2 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: update Qdrant dependency condition to match new mode structure
|
||||
|
||||
## nextcloud-mcp-server-0.27.1 (2025-11-09)
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: add Qdrant local mode support with three deployment options [skip ci]
|
||||
- add Qdrant local mode support with in-memory and persistent storage
|
||||
- implement ADR-009 - refactor semantic search to use generic semantic:read scope
|
||||
- implement MCP sampling for semantic search RAG (ADR-008)
|
||||
- add optional vector database and semantic search to helm chart
|
||||
- add vector sync processing status to /user/page endpoint
|
||||
- implement semantic search tool and fix vector sync issues (ADR-007 Phase 3)
|
||||
- implement vector sync scanner and processor (ADR-007 Phase 2)
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: add Helm repository setup to chart release workflow
|
||||
- implement deletion grace period and vector sync status tool
|
||||
- remove unnecessary urllib3<2.0 constraint
|
||||
- integrate vector sync tasks with Starlette lifespan for streamable-http
|
||||
|
||||
### Refactor
|
||||
|
||||
- migrate vector sync from asyncio.Queue to anyio memory object streams
|
||||
- update to Qdrant query_points API and fix Playwright Keycloak login
|
||||
|
||||
## nextcloud-mcp-server-0.26.1 (2025-11-08)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.21,<1.22
|
||||
|
||||
## nextcloud-mcp-server-0.26.0 (2025-11-08)
|
||||
|
||||
### Feat
|
||||
|
||||
- add real elicitation integration test with python-sdk MCP client
|
||||
- unify session architecture and enhance login status visibility
|
||||
|
||||
### Fix
|
||||
|
||||
- Consolidate OAuth callbacks and implement PKCE for all flows
|
||||
|
||||
## nextcloud-mcp-server-0.25.0 (2025-11-05)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- All OAuth deployments must be reconfigured to specify
|
||||
resource URIs (NEXTCLOUD_MCP_SERVER_URL and NEXTCLOUD_RESOURCE_URI) and
|
||||
choose between multi-audience or token exchange mode.
|
||||
|
||||
### Feat
|
||||
|
||||
- Implement ADR-005 unified token verifier to eliminate token passthrough vulnerability
|
||||
|
||||
### Fix
|
||||
|
||||
- Implement proper OAuth resource parameters and PRM-based discovery
|
||||
- Simplify token verifier to be RFC 7519 compliant
|
||||
- Use Keycloak client ID for NEXTCLOUD_RESOURCE_URI in token exchange
|
||||
- Correct OAuth token audience validation for multi-audience mode
|
||||
|
||||
### Refactor
|
||||
|
||||
- Eliminate duplicate validation logic in UnifiedTokenVerifier
|
||||
|
||||
## nextcloud-mcp-server-0.24.1 (2025-11-04)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.20,<1.21
|
||||
|
||||
## nextcloud-mcp-server-0.24.0 (2025-11-04)
|
||||
|
||||
### Feat
|
||||
|
||||
- add scope protection to OAuth provisioning tools
|
||||
- enable authorization services for token exchange in Keycloak
|
||||
- implement scope-based audience mapping and RFC 9728 support
|
||||
- integrate token exchange into MCP server application
|
||||
- implement RFC 8693 Standard Token Exchange for Keycloak
|
||||
- Add userinfo route/page
|
||||
- add browser-based user info page with separate OAuth flow
|
||||
- Implement ADR-004 Progressive Consent foundation (partial)
|
||||
- Complete ADR-004 Progressive Consent OAuth flows implementation
|
||||
- Implement ADR-004 Progressive Consent foundation components
|
||||
- Implement ADR-004 Hybrid Flow with comprehensive integration tests
|
||||
|
||||
### Fix
|
||||
|
||||
- add missing await for get_nextcloud_client in capabilities resource
|
||||
- use valid Fernet encryption keys in token exchange tests
|
||||
- accept resource URL in token audience for Nextcloud JWT tokens
|
||||
- remove token-exchange-nextcloud scope and accept tokens without audience
|
||||
- move audience mapper from scope to nextcloud-mcp-server client
|
||||
- move token-exchange-nextcloud from default to optional scopes
|
||||
- restructure routes to prevent SessionAuthBackend from interfering with FastMCP OAuth
|
||||
- allow OAuth Bearer tokens on /mcp endpoint by excluding from session auth
|
||||
- correct OAuth token audience validation using RFC 8707 resource parameter
|
||||
- remove remaining references to deleted oauth_callback and oauth_token
|
||||
- remove Hybrid Flow, make Progressive Consent default (ADR-004)
|
||||
- browser OAuth userinfo endpoint and refresh token rotation
|
||||
- make ENABLE_PROGRESSIVE_CONSENT consistently opt-in (default false)
|
||||
- make provisioning checks opt-in (default false)
|
||||
- Disable Progressive Consent for mcp-oauth to enable Hybrid Flow tests
|
||||
|
||||
### Refactor
|
||||
|
||||
- integrate token exchange into unified get_client() pattern
|
||||
|
||||
## nextcloud-mcp-server-0.23.0 (2025-11-03)
|
||||
|
||||
### Feat
|
||||
|
||||
- Auto-configure impersonation role in Keycloak realm import
|
||||
- Implement dual-tier token exchange (Standard V2 + Legacy V1 impersonation)
|
||||
- Add Keycloak external IdP integration with custom scopes
|
||||
- Implement RFC 8693 token exchange for Keycloak (ADR-002 Tier 2)
|
||||
- Add Keycloak OAuth provider support with refresh token storage
|
||||
|
||||
### Fix
|
||||
|
||||
- Complete Keycloak external IdP integration with all tests passing
|
||||
- Complete Keycloak external IdP integration with all tests passing
|
||||
- Update DCR token_type tests for OIDC app changes
|
||||
|
||||
### Refactor
|
||||
|
||||
- Remove NEXTCLOUD_OIDC_CLIENT_STORAGE environment variable
|
||||
- Remove unnecessary user_oidc patch - CORSMiddleware patch is sufficient
|
||||
- Unify OAuth configuration to be provider-agnostic
|
||||
|
||||
## nextcloud-mcp-server-0.22.7 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Remove image tag overide
|
||||
|
||||
## nextcloud-mcp-server-0.22.6 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm chart with extraArgs
|
||||
|
||||
## nextcloud-mcp-server-0.22.5 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- Update helm chart variables
|
||||
|
||||
## nextcloud-mcp-server-0.22.4 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
- **helm**: Update helm version with release
|
||||
- **helm**: Update helm version with release
|
||||
|
||||
## nextcloud-mcp-server-0.1.1 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
- Trigger release
|
||||
|
||||
## nextcloud-mcp-server-0.1.0 (2025-10-29)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- FASTMCP_-prefixed env vars have been replaced by CLI
|
||||
arguments. Refer to the README for updated usage.
|
||||
|
||||
### Feat
|
||||
|
||||
- **server**: Add /live & /health endpoints
|
||||
- Initialize helm chart
|
||||
- Add text processing background worker for telling client about progress
|
||||
- **auth**: Add support for client registration deletion
|
||||
- Split read/write scopes into app:read/write scopes
|
||||
- Enable token introspection for opaque tokens
|
||||
- **server**: Add support for custom OIDC scopes and permissions via JWTs
|
||||
- Initialize JWT-scoped tools
|
||||
- **caldav**: Add support for tasks
|
||||
- **webdav**: Add search and list favorite response tools
|
||||
- **cookbook**: Add full Cookbook app support with 13 tools and 2 resources
|
||||
- Add Groups API client
|
||||
- add sharing API client and server tools
|
||||
- **server**: Experimental support for OAuth2/OIDC authentication
|
||||
- **users**: Initialize user API client
|
||||
- **server**: Add support for `streamable-http` transport type
|
||||
- Add WebDAV resource copy functionality
|
||||
- Add WebDAV resource move/rename functionality
|
||||
- **deck**: Add support for stack, cards, labels
|
||||
- **deck**: Initialize Deck app client/server
|
||||
- **cli**: Replace `mcp run` with click CLI and runtime options
|
||||
- **client**: Preserve fields when modifying contacts/calendar resources
|
||||
- **server**: Add structured output to all tool/resource output
|
||||
- **contacts**: Initialize Contacts App
|
||||
- **calendar**: add comprehensive Calendar app support via CalDAV protocol
|
||||
- Update webdav client create_directory method to handle recursive directories
|
||||
- **webdav**: add complete file system support
|
||||
- Add TablesClient and associated tools
|
||||
- Switch to using async client
|
||||
- **notes**: Add append to note functionality
|
||||
|
||||
### Fix
|
||||
|
||||
- Add support for RFC 7592 client registration and deletion
|
||||
- Update webdav models for proper serialization
|
||||
- **deps**: update dependency mcp to >=1.19,<1.20
|
||||
- Add CORS middleware to allow browser-based clients like MCP Inspector
|
||||
- Use occ-created OAuth clients with allowed_scopes for all tests
|
||||
- Separate OAuth fixtures for opaque vs JWT tokens
|
||||
- **caldav**: Fix caldav search() due to missing todos
|
||||
- **caldav**: Check that calendar exists after creation to avoid race condition
|
||||
- **caldav**: Properly parse datetimes as vDDDTypes
|
||||
- Increase HTTP client timeout to 30s
|
||||
- Handle RequestError in mcp tools
|
||||
- **deps**: update dependency mcp to >=1.18,<1.19
|
||||
- **deps**: update dependency pillow to v12
|
||||
- **oauth**: Remove the option to force_register new clients
|
||||
- Update user/groups API to OCS v2
|
||||
- **deps**: update dependency mcp to >=1.17,<1.18
|
||||
- **deps**: update dependency mcp to >=1.16,<1.17
|
||||
- **deps**: update dependency mcp to >=1.15,<1.16
|
||||
- **docker**: Provide --host 0.0.0.0 in default docker image
|
||||
- **deps**: update dependency mcp to >=1.13,<1.14
|
||||
- **server**: Replace ErrorResponses with standard McpErrors
|
||||
- **notes**: Include ETags in responses to avoid accidently updates
|
||||
- **notes**: Remove note contents from responses to reduce token usage
|
||||
- **model**: Serialize timestamps in RFC3339 format
|
||||
- **client**: Use paging to fetch all notes
|
||||
- **client**: Strip cookies from responses to avoid falsely raising CSRF errors
|
||||
- **calendar**: Fix iCalendar date vs datetime format
|
||||
- **calendar**: Remove try/except in calendar API
|
||||
- apply ruff formatting to pass CI checks
|
||||
- **calendar**: address PR feedback from maintainer
|
||||
- apply ruff formatting to test_webdav_operations.py
|
||||
- **deps**: update dependency mcp to >=1.10,<1.11
|
||||
- update tests
|
||||
- Commitizen release process
|
||||
- Do not update dependencies when running in Dockerfile
|
||||
- Configure logging
|
||||
- Limit search results to notes with score > 0.5
|
||||
- Install deps before checking service
|
||||
- **deps**: update dependency mcp to >=1.9,<1.10
|
||||
|
||||
### Refactor
|
||||
|
||||
- Transform document parsing into pluggable processor architecture
|
||||
- Update JWT client to use DCR, re-enable tool filtering
|
||||
- Migrate from internal CalendarClient to caldav library
|
||||
- Unify logging & remove factory deployment
|
||||
- Add tools for all resources to enable tool-only workflows
|
||||
- Add `http` to --transport option
|
||||
- Use _make_request where available
|
||||
- **calendar**: optimize logging for production readiness
|
||||
- Modularize NC and Notes app client
|
||||
|
||||
### Perf
|
||||
|
||||
- **notes**: Improve notes search performance using async iterators
|
||||
@@ -1,9 +1,9 @@
|
||||
dependencies:
|
||||
- name: qdrant
|
||||
repository: https://qdrant.github.io/qdrant-helm
|
||||
version: 1.16.0
|
||||
version: 1.16.2
|
||||
- name: ollama
|
||||
repository: https://otwld.github.io/ollama-helm
|
||||
version: 1.34.0
|
||||
digest: sha256:9dfb8d6e3d5488f669d4c37f3a766213b598ff3de2aead2c734789736c7835b4
|
||||
generated: "2025-11-17T17:08:48.055530019Z"
|
||||
version: 1.36.0
|
||||
digest: sha256:fab8008217b27ff4e3e139c2f481eedaa23f9f64a3a086d0e9deea2195b69b63
|
||||
generated: "2025-12-14T11:07:07.024787592Z"
|
||||
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: nextcloud-mcp-server
|
||||
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
|
||||
type: application
|
||||
version: 0.44.0
|
||||
appVersion: "0.44.0"
|
||||
version: 0.54.0
|
||||
appVersion: "0.56.1"
|
||||
keywords:
|
||||
- nextcloud
|
||||
- mcp
|
||||
@@ -27,10 +27,10 @@ annotations:
|
||||
grafana_dashboard_folder: "Nextcloud MCP"
|
||||
dependencies:
|
||||
- name: qdrant
|
||||
version: "1.16.0"
|
||||
version: "1.16.2"
|
||||
repository: https://qdrant.github.io/qdrant-helm
|
||||
condition: qdrant.networkMode.deploySubchart
|
||||
- name: ollama
|
||||
version: "1.34.0"
|
||||
version: "1.36.0"
|
||||
repository: https://otwld.github.io/ollama-helm
|
||||
condition: ollama.enabled
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
# CI-specific overrides for RAG evaluation pipeline
|
||||
# This file is used by the rag-evaluation.yml workflow to configure the MCP
|
||||
# container with OpenAI/GitHub Models API for vector embeddings.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.yml -f docker-compose.ci.yml up
|
||||
#
|
||||
# Environment variables (set in CI workflow):
|
||||
# OPENAI_API_KEY - API key for embeddings (GitHub Models uses GITHUB_TOKEN)
|
||||
# OPENAI_BASE_URL - API endpoint (e.g., https://models.github.ai/inference)
|
||||
# OPENAI_EMBEDDING_MODEL - Model name (e.g., openai/text-embedding-3-small)
|
||||
# OPENAI_GENERATION_MODEL - Model name for generation (e.g., openai/gpt-4o-mini)
|
||||
|
||||
services:
|
||||
mcp:
|
||||
environment:
|
||||
# OpenAI provider configuration (required for CI vector sync)
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
- OPENAI_BASE_URL=${OPENAI_BASE_URL:-https://models.github.ai/inference}
|
||||
- OPENAI_EMBEDDING_MODEL=${OPENAI_EMBEDDING_MODEL:-openai/text-embedding-3-small}
|
||||
- OPENAI_GENERATION_MODEL=${OPENAI_GENERATION_MODEL:-openai/gpt-4o-mini}
|
||||
# Faster sync for CI
|
||||
- VECTOR_SYNC_SCAN_INTERVAL=${VECTOR_SYNC_SCAN_INTERVAL:-5}
|
||||
# Enable document processing for PDF parsing
|
||||
- ENABLE_DOCUMENT_PROCESSING=true
|
||||
+35
-6
@@ -17,11 +17,11 @@ services:
|
||||
# Note: Redis is an external service. You can find more information about the configuration here:
|
||||
# https://hub.docker.com/_/redis
|
||||
redis:
|
||||
image: docker.io/library/redis:alpine@sha256:5013e94192ef18a5d8368179c7522e5300f9265cc339cadac76c7b93303a2752
|
||||
image: docker.io/library/redis:alpine@sha256:6cbef353e480a8a6e7f10ec545f13d7d3fa85a212cdcc5ffaf5a1c818b9d3798
|
||||
restart: always
|
||||
|
||||
app:
|
||||
image: docker.io/library/nextcloud:32.0.1@sha256:d572839eeb693026d72a0c6aa48076df0bb8930797ea321e604936ef7189d06e
|
||||
image: docker.io/library/nextcloud:32.0.3@sha256:53231a9fb9233af2c15bfe70fc03ebe639fd53243fa42a9369884b1e0008deae
|
||||
restart: always
|
||||
ports:
|
||||
- 0.0.0.0:8080:80
|
||||
@@ -34,7 +34,8 @@ services:
|
||||
- ./app-hooks:/docker-entrypoint-hooks.d:ro
|
||||
# Mount OIDC development directory outside /var/www/html to avoid rsync conflicts
|
||||
# The post-installation hook will register /opt/apps as an additional app directory
|
||||
- ./third_party:/opt/apps:ro
|
||||
#- ./third_party:/opt/apps:ro
|
||||
#- ./third_party/astrolabe:/opt/apps/astrolabe:ro
|
||||
environment:
|
||||
- NEXTCLOUD_TRUSTED_DOMAINS=app
|
||||
- NEXTCLOUD_ADMIN_USER=admin
|
||||
@@ -51,7 +52,7 @@ services:
|
||||
retries: 30
|
||||
|
||||
recipes:
|
||||
image: docker.io/library/nginx:alpine@sha256:b3c656d55d7ad751196f21b7fd2e8d4da9cb430e32f646adcf92441b72f82b14
|
||||
image: docker.io/library/nginx:alpine@sha256:052b75ab72f690f33debaa51c7e08d9b969a0447a133eb2b99cc905d9188cb2b
|
||||
restart: always
|
||||
volumes:
|
||||
- ./tests/fixtures/test_recipe.html:/usr/share/nginx/html/test_recipe.html:ro
|
||||
@@ -150,6 +151,14 @@ services:
|
||||
# Tokens must contain BOTH MCP and Nextcloud audiences
|
||||
# No token exchange needed - tokens work for both MCP auth and Nextcloud APIs
|
||||
|
||||
# Vector sync configuration (ADR-007)
|
||||
- VECTOR_SYNC_ENABLED=true
|
||||
- VECTOR_SYNC_SCAN_INTERVAL=60
|
||||
- VECTOR_SYNC_PROCESSOR_WORKERS=1
|
||||
|
||||
# Qdrant configuration - persistent local storage
|
||||
- QDRANT_LOCATION=/app/data/qdrant
|
||||
|
||||
# NO admin credentials - using OAuth with Dynamic Client Registration (DCR)
|
||||
# Client credentials registered via RFC 7591 and stored in volume
|
||||
# JWT token type is used for testing (faster validation, scopes embedded in token)
|
||||
@@ -158,7 +167,7 @@ services:
|
||||
- oauth-tokens:/app/data
|
||||
|
||||
keycloak:
|
||||
image: quay.io/keycloak/keycloak:26.4.5@sha256:653852bfdea2be6e958b9e90a976eff1c6de34edd55f2f679bdc48ef16bc528e
|
||||
image: quay.io/keycloak/keycloak:26.4.7@sha256:9409c59bdfb65dbffa20b11e6f18b8abb9281d480c7ca402f51ed3d5977e6007
|
||||
command:
|
||||
- "start-dev"
|
||||
- "--import-realm"
|
||||
@@ -224,8 +233,28 @@ services:
|
||||
- keycloak-tokens:/app/data
|
||||
- keycloak-oauth-storage:/app/.oauth
|
||||
|
||||
# Smithery stateless deployment mode (ADR-016)
|
||||
# Test with: docker compose --profile smithery up smithery
|
||||
# Then: curl http://localhost:8081/.well-known/mcp-config
|
||||
smithery:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.smithery
|
||||
restart: always
|
||||
depends_on:
|
||||
app:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- 127.0.0.1:8081:8081
|
||||
environment:
|
||||
- SMITHERY_DEPLOYMENT=true
|
||||
- VECTOR_SYNC_ENABLED=false
|
||||
- PORT=8081
|
||||
profiles:
|
||||
- smithery
|
||||
|
||||
qdrant:
|
||||
image: qdrant/qdrant:v1.16.0@sha256:1005201498cf927d835383d0f918b17d8c9da7db58550f169f694455e42d78f4
|
||||
image: qdrant/qdrant:v1.16.2@sha256:dab6de32f7b2cc599985a7c764db3e8b062f70508fb85ca074aa856f829bf335
|
||||
restart: always
|
||||
ports:
|
||||
- 127.0.0.1:6333:6333 # REST API
|
||||
|
||||
@@ -0,0 +1,492 @@
|
||||
# ADR-016: Smithery Stateless Deployment for Multi-User Public Nextcloud Instances
|
||||
|
||||
**Status:** Proposed
|
||||
**Date:** 2025-01-22
|
||||
**Deciders:** Development Team
|
||||
**Related:** ADR-004 (OAuth), ADR-007 (Background Vector Sync), ADR-015 (Unified Provider)
|
||||
|
||||
## Context
|
||||
|
||||
[Smithery](https://smithery.ai) is a hosting platform and marketplace for MCP servers that provides:
|
||||
|
||||
- **Discovery**: Marketplace listing for MCP servers
|
||||
- **Hosting**: Containerized deployment with auto-scaling
|
||||
- **Authentication UI**: OAuth flow presentation for users
|
||||
- **Session Configuration**: Per-user settings passed via URL parameters
|
||||
- **Observability**: Usage logs and monitoring
|
||||
|
||||
### Current Architecture Limitations
|
||||
|
||||
The current nextcloud-mcp-server architecture assumes a **self-hosted deployment** with:
|
||||
|
||||
1. **Persistent Infrastructure**
|
||||
- Qdrant vector database for semantic search
|
||||
- Background sync worker for content indexing
|
||||
- Refresh token storage for offline access
|
||||
|
||||
2. **Single-Tenant Configuration**
|
||||
- Environment variables configure one Nextcloud instance
|
||||
- `NEXTCLOUD_HOST`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`
|
||||
- Or OAuth with a single IdP
|
||||
|
||||
3. **Stateful Operations**
|
||||
- Vector sync maintains index state across requests
|
||||
- Token storage persists between sessions
|
||||
|
||||
### Smithery Hosting Constraints
|
||||
|
||||
Smithery-hosted containers are **stateless by design**:
|
||||
|
||||
- No persistent storage between requests
|
||||
- No background workers or cron jobs
|
||||
- No databases (Qdrant, Redis, etc.)
|
||||
- Containers may be recycled at any time
|
||||
- Configuration passed per-session via URL parameters
|
||||
|
||||
### Opportunity
|
||||
|
||||
Many users have **publicly accessible Nextcloud instances** and want to:
|
||||
|
||||
1. Try the MCP server without self-hosting infrastructure
|
||||
2. Connect multiple users to different Nextcloud instances
|
||||
3. Use basic Nextcloud tools without semantic search
|
||||
4. Benefit from Smithery's discovery and OAuth UI
|
||||
|
||||
## Decision
|
||||
|
||||
Implement a **stateless deployment mode** for Smithery that:
|
||||
|
||||
1. **Disables stateful features** (vector sync, semantic search)
|
||||
2. **Creates clients per-session** from Smithery configuration
|
||||
3. **Supports multiple Nextcloud instances** via session config
|
||||
4. **Provides a useful subset of tools** that work without infrastructure
|
||||
|
||||
### Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ Smithery-Hosted Stateless Mode │
|
||||
├─────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ MCP Client Smithery │
|
||||
│ (Cursor, Claude) Infrastructure │
|
||||
│ │ │ │
|
||||
│ │ 1. Connect │ │
|
||||
│ ├───────────────────────────►│ │
|
||||
│ │ │ │
|
||||
│ │ 2. Config UI │ │
|
||||
│ │◄───────────────────────────┤ User enters: │
|
||||
│ │ (Smithery presents) │ - nextcloud_url │
|
||||
│ │ │ - auth_mode (basic/oauth) │
|
||||
│ │ │ - credentials │
|
||||
│ │ 3. Tool call │ │
|
||||
│ ├───────────────────────────►│ │
|
||||
│ │ + session config │ │
|
||||
│ │ │ │
|
||||
│ │ ┌───────┴───────┐ │
|
||||
│ │ │ MCP Server │ │
|
||||
│ │ │ Container │ │
|
||||
│ │ │ │ │
|
||||
│ │ │ 4. Create │ │
|
||||
│ │ │ client │ │
|
||||
│ │ │ from │ │
|
||||
│ │ │ config │ │
|
||||
│ │ │ │ │ │
|
||||
│ │ │ ▼ │ │
|
||||
│ │ │ 5. Call │ │
|
||||
│ │ │ Nextcloud │───────► User's Nextcloud │
|
||||
│ │ │ API │ Instance │
|
||||
│ │ │ │ │ │
|
||||
│ │ │ ▼ │ │
|
||||
│ │ 6. Response │ Return result │ │
|
||||
│ │◄───────────────────┤ │ │
|
||||
│ │ └───────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Session Configuration Schema
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class SmitheryConfigSchema(BaseModel):
|
||||
"""Configuration schema for Smithery session."""
|
||||
|
||||
# Required: Nextcloud instance
|
||||
nextcloud_url: str = Field(
|
||||
...,
|
||||
description="Your Nextcloud instance URL (e.g., https://cloud.example.com)"
|
||||
)
|
||||
|
||||
# Authentication mode
|
||||
auth_mode: str = Field(
|
||||
"app_password",
|
||||
description="Authentication method: 'app_password' or 'oauth'"
|
||||
)
|
||||
|
||||
# App Password authentication (recommended for Smithery)
|
||||
username: str | None = Field(
|
||||
None,
|
||||
description="Nextcloud username (required for app_password auth)"
|
||||
)
|
||||
app_password: str | None = Field(
|
||||
None,
|
||||
description="Nextcloud app password (Settings → Security → App passwords)"
|
||||
)
|
||||
|
||||
# OAuth authentication (advanced)
|
||||
# When auth_mode='oauth', Smithery handles the OAuth flow
|
||||
# and passes the access token automatically
|
||||
```
|
||||
|
||||
### Feature Matrix
|
||||
|
||||
| Feature | Self-Hosted | Smithery Stateless |
|
||||
|---------|-------------|-------------------|
|
||||
| **Notes** | | |
|
||||
| List/Search notes | ✓ | ✓ |
|
||||
| Get/Create/Update notes | ✓ | ✓ |
|
||||
| Semantic search | ✓ | ✗ |
|
||||
| **Calendar** | | |
|
||||
| List calendars | ✓ | ✓ |
|
||||
| Get/Create events | ✓ | ✓ |
|
||||
| **Contacts** | | |
|
||||
| List address books | ✓ | ✓ |
|
||||
| Search/Get contacts | ✓ | ✓ |
|
||||
| **Files (WebDAV)** | | |
|
||||
| List/Download files | ✓ | ✓ |
|
||||
| Upload files | ✓ | ✓ |
|
||||
| Search files | ✓ | ✓ (keyword only) |
|
||||
| **Deck** | | |
|
||||
| List boards/cards | ✓ | ✓ |
|
||||
| Create/Update cards | ✓ | ✓ |
|
||||
| **Tables** | | |
|
||||
| List/Query tables | ✓ | ✓ |
|
||||
| Create/Update rows | ✓ | ✓ |
|
||||
| **Cookbook** | | |
|
||||
| List/Get recipes | ✓ | ✓ |
|
||||
| **Semantic Search** | | |
|
||||
| Vector search | ✓ | ✗ |
|
||||
| RAG answers | ✓ | ✗ |
|
||||
| **Background Sync** | | |
|
||||
| Auto-indexing | ✓ | ✗ |
|
||||
| Webhook sync | ✓ | ✗ |
|
||||
| **Admin UI (`/app`)** | | |
|
||||
| Vector sync status | ✓ | ✗ |
|
||||
| Vector visualization | ✓ | ✗ |
|
||||
| Webhook management | ✓ | ✗ |
|
||||
| Session management | ✓ | ✗ |
|
||||
|
||||
### Implementation
|
||||
|
||||
#### 1. Deployment Mode Detection
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/config.py
|
||||
|
||||
class DeploymentMode(Enum):
|
||||
SELF_HOSTED = "self_hosted" # Full features, env-based config
|
||||
SMITHERY_STATELESS = "smithery" # Stateless, session-based config
|
||||
|
||||
def get_deployment_mode() -> DeploymentMode:
|
||||
"""Detect deployment mode from environment."""
|
||||
if os.getenv("SMITHERY_DEPLOYMENT") == "true":
|
||||
return DeploymentMode.SMITHERY_STATELESS
|
||||
return DeploymentMode.SELF_HOSTED
|
||||
```
|
||||
|
||||
#### 2. Session-Based Client Factory
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/context.py
|
||||
|
||||
async def get_client(ctx: Context) -> NextcloudClient:
|
||||
"""Get NextcloudClient - from session config or environment."""
|
||||
|
||||
mode = get_deployment_mode()
|
||||
|
||||
if mode == DeploymentMode.SMITHERY_STATELESS:
|
||||
# Create client from Smithery session config
|
||||
config = ctx.session_config
|
||||
if not config:
|
||||
raise McpError("Session configuration required")
|
||||
|
||||
return NextcloudClient(
|
||||
base_url=config.nextcloud_url,
|
||||
username=config.username,
|
||||
password=config.app_password,
|
||||
)
|
||||
else:
|
||||
# Existing behavior: from environment or OAuth context
|
||||
return await _get_client_from_context(ctx)
|
||||
```
|
||||
|
||||
#### 3. Conditional Tool Registration
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/app.py
|
||||
|
||||
def create_mcp_server(mode: DeploymentMode) -> FastMCP:
|
||||
"""Create MCP server with mode-appropriate tools."""
|
||||
|
||||
mcp = FastMCP("Nextcloud MCP")
|
||||
|
||||
# Always register core tools
|
||||
configure_notes_tools(mcp)
|
||||
configure_calendar_tools(mcp)
|
||||
configure_contacts_tools(mcp)
|
||||
configure_webdav_tools(mcp)
|
||||
configure_deck_tools(mcp)
|
||||
configure_tables_tools(mcp)
|
||||
configure_cookbook_tools(mcp)
|
||||
|
||||
# Only register stateful tools in self-hosted mode
|
||||
if mode == DeploymentMode.SELF_HOSTED:
|
||||
configure_semantic_tools(mcp) # Requires Qdrant
|
||||
register_oauth_tools(mcp) # Requires token storage
|
||||
|
||||
return mcp
|
||||
```
|
||||
|
||||
#### 4. Exclude Admin UI Routes
|
||||
|
||||
The `/app` admin UI should **not be installed** in Smithery mode because:
|
||||
|
||||
- **Vector sync status** - No vector sync in stateless mode
|
||||
- **Vector visualization** - No Qdrant to visualize
|
||||
- **Webhook management** - No webhook sync without background workers
|
||||
- **Session management** - No persistent sessions to manage
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/app.py
|
||||
|
||||
def create_app(mode: DeploymentMode) -> Starlette:
|
||||
"""Create Starlette app with mode-appropriate routes."""
|
||||
|
||||
routes = [
|
||||
Route("/health/live", health_live, methods=["GET"]),
|
||||
Route("/health/ready", health_ready, methods=["GET"]),
|
||||
]
|
||||
|
||||
# Only mount admin UI in self-hosted mode
|
||||
if mode == DeploymentMode.SELF_HOSTED:
|
||||
browser_app = create_browser_app()
|
||||
routes.append(
|
||||
Route("/app", lambda r: RedirectResponse("/app/", status_code=307))
|
||||
)
|
||||
routes.append(Mount("/app", app=browser_app))
|
||||
logger.info("Admin UI mounted at /app")
|
||||
else:
|
||||
logger.info("Admin UI disabled in Smithery stateless mode")
|
||||
|
||||
# Mount FastMCP at root
|
||||
mcp_app = create_mcp_server(mode).streamable_http_app()
|
||||
routes.append(Mount("/", app=mcp_app))
|
||||
|
||||
return Starlette(routes=routes, lifespan=starlette_lifespan)
|
||||
```
|
||||
|
||||
**Endpoints by Mode:**
|
||||
|
||||
| Endpoint | Self-Hosted | Smithery |
|
||||
|----------|-------------|----------|
|
||||
| `/mcp` | ✓ | ✓ |
|
||||
| `/health/live` | ✓ | ✓ |
|
||||
| `/health/ready` | ✓ | ✓ |
|
||||
| `/.well-known/mcp-config` | ✓ | ✓ |
|
||||
| `/app` | ✓ | ✗ |
|
||||
| `/app/vector-sync/status` | ✓ | ✗ |
|
||||
| `/app/vector-viz` | ✓ | ✗ |
|
||||
| `/app/webhooks` | ✓ | ✗ |
|
||||
|
||||
#### 5. Smithery Integration Files
|
||||
|
||||
**smithery.yaml:**
|
||||
```yaml
|
||||
runtime: "container"
|
||||
build:
|
||||
dockerfile: "Dockerfile.smithery"
|
||||
dockerBuildPath: "."
|
||||
startCommand:
|
||||
type: "http"
|
||||
configSchema:
|
||||
type: "object"
|
||||
required: ["nextcloud_url", "username", "app_password"]
|
||||
properties:
|
||||
nextcloud_url:
|
||||
type: "string"
|
||||
title: "Nextcloud URL"
|
||||
description: "Your Nextcloud instance URL (e.g., https://cloud.example.com)"
|
||||
username:
|
||||
type: "string"
|
||||
title: "Username"
|
||||
description: "Your Nextcloud username"
|
||||
app_password:
|
||||
type: "string"
|
||||
title: "App Password"
|
||||
description: "Generate at Settings → Security → App passwords"
|
||||
exampleConfig:
|
||||
nextcloud_url: "https://cloud.example.com"
|
||||
username: "alice"
|
||||
app_password: "xxxxx-xxxxx-xxxxx-xxxxx-xxxxx"
|
||||
```
|
||||
|
||||
**Dockerfile.smithery:**
|
||||
```dockerfile
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install uv
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
|
||||
|
||||
# Copy project files
|
||||
COPY pyproject.toml uv.lock ./
|
||||
COPY nextcloud_mcp_server ./nextcloud_mcp_server
|
||||
|
||||
# Install dependencies (without vector/semantic extras)
|
||||
RUN uv sync --frozen --no-dev
|
||||
|
||||
# Set Smithery mode
|
||||
ENV SMITHERY_DEPLOYMENT=true
|
||||
ENV VECTOR_SYNC_ENABLED=false
|
||||
|
||||
# Smithery sets PORT=8081
|
||||
EXPOSE 8081
|
||||
|
||||
CMD ["uv", "run", "python", "-m", "nextcloud_mcp_server.smithery_main"]
|
||||
```
|
||||
|
||||
**nextcloud_mcp_server/smithery_main.py:**
|
||||
```python
|
||||
"""Smithery-specific entrypoint for stateless deployment."""
|
||||
|
||||
import os
|
||||
import uvicorn
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
|
||||
from nextcloud_mcp_server.app import create_mcp_server
|
||||
from nextcloud_mcp_server.config import DeploymentMode
|
||||
|
||||
def main():
|
||||
# Force stateless mode
|
||||
os.environ["SMITHERY_DEPLOYMENT"] = "true"
|
||||
os.environ["VECTOR_SYNC_ENABLED"] = "false"
|
||||
|
||||
mcp = create_mcp_server(DeploymentMode.SMITHERY_STATELESS)
|
||||
app = mcp.streamable_http_app()
|
||||
|
||||
# Add CORS for browser-based clients
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["GET", "POST", "OPTIONS"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["mcp-session-id", "mcp-protocol-version"],
|
||||
)
|
||||
|
||||
# Smithery sets PORT environment variable
|
||||
port = int(os.environ.get("PORT", 8081))
|
||||
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
### Security Considerations
|
||||
|
||||
1. **App Passwords over User Passwords**
|
||||
- Smithery config encourages app passwords (revocable, scoped)
|
||||
- Documentation guides users to create dedicated app passwords
|
||||
- App passwords can be revoked without changing main password
|
||||
|
||||
2. **HTTPS Required**
|
||||
- `nextcloud_url` must be HTTPS for production use
|
||||
- Validation rejects HTTP URLs in Smithery mode
|
||||
|
||||
3. **No Credential Storage**
|
||||
- Credentials exist only for request duration
|
||||
- No server-side persistence of user credentials
|
||||
- Smithery handles secure config transmission
|
||||
|
||||
4. **Scope Limitation**
|
||||
- Stateless mode cannot access offline_access
|
||||
- No background operations on user's behalf
|
||||
- Clear user expectation: tools work during session only
|
||||
|
||||
### Migration Path
|
||||
|
||||
Users can start with Smithery stateless mode and migrate to self-hosted:
|
||||
|
||||
1. **Try on Smithery** → Basic tools, no setup
|
||||
2. **Self-host for semantic search** → Add Qdrant, enable vector sync
|
||||
3. **Full deployment** → Background sync, webhooks, multi-user OAuth
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
1. **Lower barrier to entry** - Users can try without infrastructure
|
||||
2. **Multi-user support** - Each session connects to different Nextcloud
|
||||
3. **Smithery ecosystem** - Discovery, observability, OAuth UI
|
||||
4. **Clear feature tiers** - Stateless (simple) vs self-hosted (full)
|
||||
|
||||
### Negative
|
||||
|
||||
1. **No semantic search** - Key differentiator unavailable on Smithery
|
||||
2. **Per-request auth** - Credentials sent with each request
|
||||
3. **No offline access** - Cannot perform background operations
|
||||
4. **Maintenance burden** - Two deployment modes to support
|
||||
|
||||
### Neutral
|
||||
|
||||
1. **Feature subset** - May encourage users to self-host for full features
|
||||
2. **Documentation needs** - Clear guidance on mode differences required
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### 1. External MCP Only
|
||||
|
||||
**Approach:** Only support self-hosted external MCP registration on Smithery.
|
||||
|
||||
**Rejected because:**
|
||||
- Higher barrier to entry for new users
|
||||
- Misses opportunity for Smithery marketplace visibility
|
||||
- Users want to try before committing to infrastructure
|
||||
|
||||
### 2. Embedded Vector DB (SQLite-vec)
|
||||
|
||||
**Approach:** Use SQLite with vector extensions for per-request indexing.
|
||||
|
||||
**Rejected because:**
|
||||
- No persistence between requests anyway
|
||||
- Indexing latency too high for synchronous requests
|
||||
- Complexity without benefit in stateless context
|
||||
|
||||
### 3. External Vector DB Service
|
||||
|
||||
**Approach:** Connect to Pinecone/Weaviate Cloud from Smithery container.
|
||||
|
||||
**Rejected because:**
|
||||
- Adds external dependency and cost
|
||||
- Per-user collections require complex multi-tenancy
|
||||
- Sync still impossible without background workers
|
||||
|
||||
### 4. Hybrid: Smithery + User's Qdrant
|
||||
|
||||
**Approach:** User provides their own Qdrant URL in session config.
|
||||
|
||||
**Considered for future:**
|
||||
- Could enable semantic search for advanced users
|
||||
- Adds complexity to session config
|
||||
- Sync still requires external trigger (manual or webhook)
|
||||
|
||||
## References
|
||||
|
||||
- [Smithery Documentation](https://smithery.ai/docs)
|
||||
- [Smithery Session Configuration](https://smithery.ai/docs/build/session-config)
|
||||
- [Smithery External MCPs](https://smithery.ai/docs/build/external)
|
||||
- [MCP Streamable HTTP Transport](https://modelcontextprotocol.io/docs/concepts/transports)
|
||||
- [Nextcloud App Passwords](https://docs.nextcloud.com/server/latest/user_manual/en/session_management.html#app-passwords)
|
||||
@@ -0,0 +1,506 @@
|
||||
# ADR-017: Add MCP Tool Annotations for Enhanced Client UX
|
||||
|
||||
## Status
|
||||
|
||||
Implemented
|
||||
|
||||
## Context
|
||||
|
||||
The MCP Python SDK supports tool annotations that provide behavioral hints and improved UX to MCP clients. Currently, our 101 tools across 10 modules lack these annotations, resulting in:
|
||||
|
||||
- Snake_case function names displayed to users (e.g., "nc_notes_create_note" instead of "Create Note")
|
||||
- No behavioral hints for clients about read-only, destructive, or idempotent operations
|
||||
- Missing parameter descriptions for better auto-completion and inline help
|
||||
- Clients cannot optimize caching, warn before destructive operations, or retry safely
|
||||
|
||||
### Available MCP Annotations
|
||||
|
||||
The MCP SDK provides three types of annotations:
|
||||
|
||||
#### 1. Tool Decorator Parameters
|
||||
```python
|
||||
@mcp.tool(
|
||||
title="Human-Readable Name",
|
||||
description="Tool description", # Can also come from docstring
|
||||
annotations=ToolAnnotations(...),
|
||||
icons=[Icon(...)] # Optional visual icons
|
||||
)
|
||||
```
|
||||
|
||||
#### 2. ToolAnnotations Behavioral Hints
|
||||
```python
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
ToolAnnotations(
|
||||
title="Alternative Title", # Decorator title takes precedence
|
||||
readOnlyHint=True, # Tool doesn't modify data
|
||||
destructiveHint=True, # Tool may delete/overwrite data
|
||||
idempotentHint=True, # Repeated calls with same args are safe
|
||||
openWorldHint=True # Interacts with external entities
|
||||
)
|
||||
```
|
||||
|
||||
#### 3. Parameter Descriptions
|
||||
```python
|
||||
from pydantic import Field
|
||||
|
||||
async def tool(
|
||||
param: str = Field(description="What this parameter does"),
|
||||
ctx: Context
|
||||
):
|
||||
```
|
||||
|
||||
### Idempotency Analysis
|
||||
|
||||
**Important**: Idempotency means calling with **the same inputs** produces the same result.
|
||||
|
||||
**NOT Idempotent** (different inputs each call):
|
||||
- **Updates with etag**: `update_note(id=1, title="X", etag="abc")` → etag changes to "def"
|
||||
- Second call: `update_note(id=1, title="X", etag="abc")` → fails (etag mismatch)
|
||||
- Different input (stale etag) → different result (error)
|
||||
- **Creates**: `create_note(title="X")` → creates note 1
|
||||
- Second call → creates note 2 (different result)
|
||||
- **Append operations**: `append_content(id=1, text="X")` → adds X once
|
||||
- Second call → adds X again (different result)
|
||||
|
||||
**Idempotent**:
|
||||
- **Deletes**: `delete_note(id=1)` → note deleted
|
||||
- Second call → 404 or success (same end state: note doesn't exist)
|
||||
- Note: May return different status code, but end state is identical
|
||||
- **Full resource PUT without version control**: `write_file(path="/test.txt", content="Hello")` → file has "Hello"
|
||||
- Second call → file still has "Hello" (same end state)
|
||||
- Example: `nc_webdav_write_file` uses HTTP PUT without etags/version control
|
||||
- **Set operations**: `set_property(id=1, value="X")` → property = X
|
||||
- Second call → property still = X (same result)
|
||||
- Note: Nextcloud updates with etags use version control, so not idempotent
|
||||
|
||||
**Read-Only** (always idempotent, never destructive):
|
||||
- All list, search, get operations
|
||||
|
||||
## Decision
|
||||
|
||||
Add annotations to all 101 tools in three phases:
|
||||
|
||||
### Phase 1: Titles (Quick Win)
|
||||
Add human-readable titles to all tools:
|
||||
|
||||
```python
|
||||
@mcp.tool(title="Create Note")
|
||||
async def nc_notes_create_note(...):
|
||||
```
|
||||
|
||||
**Effort**: 2-3 hours
|
||||
**Impact**: Immediate UX improvement
|
||||
|
||||
### Phase 2: ToolAnnotations (Behavioral Hints)
|
||||
Add annotations based on corrected categorization:
|
||||
|
||||
```python
|
||||
# Read-only tools
|
||||
@mcp.tool(
|
||||
title="Search Notes",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True # Nextcloud is external to MCP server
|
||||
)
|
||||
)
|
||||
|
||||
# Delete tools (idempotent: same end state)
|
||||
@mcp.tool(
|
||||
title="Delete Note",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True,
|
||||
idempotentHint=True, # Deleting deleted item = same end state
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
|
||||
# Create tools (not idempotent: creates multiple items)
|
||||
@mcp.tool(
|
||||
title="Create Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False,
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
|
||||
# Update tools with etag (not idempotent: etag changes)
|
||||
@mcp.tool(
|
||||
title="Update Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Etag required = different inputs each time
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
|
||||
# Append operations (not idempotent: adds content each time)
|
||||
@mcp.tool(
|
||||
title="Append to Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False,
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
**Effort**: 4-6 hours
|
||||
**Impact**: Better client behavior (caching, warnings, retry logic)
|
||||
|
||||
### Phase 3: Parameter Descriptions
|
||||
Add Field() descriptions to parameters:
|
||||
|
||||
```python
|
||||
from pydantic import Field
|
||||
|
||||
@mcp.tool(title="Create Note", annotations=ToolAnnotations(idempotentHint=False))
|
||||
async def nc_notes_create_note(
|
||||
title: str = Field(description="The title of the note"),
|
||||
content: str = Field(description="Markdown content of the note"),
|
||||
category: str = Field(description="Category or folder name for organizing"),
|
||||
ctx: Context
|
||||
) -> CreateNoteResponse:
|
||||
```
|
||||
|
||||
**Effort**: 6-8 hours
|
||||
**Impact**: Better auto-completion and inline help
|
||||
|
||||
## Tool Categorization
|
||||
|
||||
### Read-Only Tools (~40 tools)
|
||||
**Pattern**: List, search, get operations
|
||||
**Annotations**: `readOnlyHint=True`, `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_search_notes` → "Search Notes"
|
||||
- `nc_webdav_list_directory` → "List Files and Directories"
|
||||
- `nc_calendar_list_calendars` → "List Calendars"
|
||||
- `nc_contacts_get_contact` → "Get Contact"
|
||||
- `nc_semantic_search` → "Semantic Search"
|
||||
- `check_logged_in` → "Check Server Login Status"
|
||||
|
||||
### Create Tools (~20 tools)
|
||||
**Pattern**: Create new resources
|
||||
**Annotations**: `idempotentHint=False`, `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_create_note` → "Create Note"
|
||||
- `nc_calendar_create_event` → "Create Calendar Event"
|
||||
- `nc_contacts_create_contact` → "Create Contact"
|
||||
- `deck_create_card` → "Create Kanban Card"
|
||||
- `nc_tables_create_row` → "Create Table Row"
|
||||
|
||||
### Update Tools (~25 tools)
|
||||
**Pattern**: Modify existing resources with etag
|
||||
**Annotations**: `idempotentHint=False` (etag changes), `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_update_note` → "Update Note"
|
||||
- `nc_calendar_update_event` → "Update Calendar Event"
|
||||
- `nc_contacts_update_contact` → "Update Contact"
|
||||
- `deck_update_card` → "Update Kanban Card"
|
||||
|
||||
**Rationale**: Updates require etag, which changes after each update. Same parameters on second call will fail due to stale etag = NOT idempotent.
|
||||
|
||||
### Append/Accumulate Tools (~5 tools)
|
||||
**Pattern**: Add content without replacing
|
||||
**Annotations**: `idempotentHint=False`, `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_append_content` → "Append to Note"
|
||||
|
||||
**Rationale**: Each call adds content, changing the result = NOT idempotent.
|
||||
|
||||
### Delete Tools (~10 tools)
|
||||
**Pattern**: Remove resources
|
||||
**Annotations**: `destructiveHint=True`, `idempotentHint=True`, `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_delete_note` → "Delete Note"
|
||||
- `nc_webdav_delete_resource` → "Delete File or Directory"
|
||||
- `nc_calendar_delete_event` → "Delete Calendar Event"
|
||||
- `nc_contacts_delete_contact` → "Delete Contact"
|
||||
|
||||
**Rationale**: Deleting already-deleted item results in same end state (item doesn't exist) = idempotent. Status code may differ, but outcome is identical.
|
||||
|
||||
### Special Cases
|
||||
|
||||
#### OAuth Provisioning Tools
|
||||
```python
|
||||
# Not read-only but requires user interaction
|
||||
@mcp.tool(
|
||||
title="Grant Server Access to Nextcloud",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=False,
|
||||
idempotentHint=False, # Creates new OAuth session each time
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
async def provision_nextcloud_access(ctx: Context):
|
||||
```
|
||||
|
||||
#### Semantic Search (Closed World)
|
||||
```python
|
||||
@mcp.tool(
|
||||
title="Semantic Search",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=False # Searches only indexed Nextcloud data
|
||||
)
|
||||
)
|
||||
async def nc_semantic_search(query: str, ctx: Context):
|
||||
```
|
||||
|
||||
**Rationale**: Semantic search only queries pre-indexed Nextcloud content, not the "open world" like web search would.
|
||||
|
||||
## Tool Priority Matrix
|
||||
|
||||
### Critical Priority (~2 tools)
|
||||
OAuth tools required for server functionality:
|
||||
- `provision_nextcloud_access` → "Grant Server Access to Nextcloud"
|
||||
- `check_logged_in` → "Check Server Login Status"
|
||||
|
||||
### High Priority (~50 tools)
|
||||
Most commonly used modules:
|
||||
- **Notes** (14 tools): Create, read, update, delete notes
|
||||
- **WebDAV** (13 tools): File operations
|
||||
- **Calendar** (15 tools): Events and todos
|
||||
- **Semantic Search** (6 tools): AI-powered search
|
||||
- **Contacts** (9 tools): Address book operations
|
||||
|
||||
### Medium Priority (~35 tools)
|
||||
Secondary functionality:
|
||||
- **Deck** (9 tools): Kanban boards
|
||||
- **Tables** (7 tools): Structured data
|
||||
- **Sharing** (5 tools): File sharing
|
||||
|
||||
### Low Priority (~14 tools)
|
||||
Less frequently used:
|
||||
- **Cookbook** (8 tools): Recipe management
|
||||
- **News** (6 tools): RSS feeds
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Week 1: Phase 1 - Titles
|
||||
- Add human-readable titles to all 101 tools
|
||||
- Update tool name mapping in documentation
|
||||
- Manual test in MCP inspector
|
||||
|
||||
### Week 2: Phase 2 - ToolAnnotations (High Priority)
|
||||
- Add annotations to Critical and High priority tools (~52 tools)
|
||||
- Focus on Notes, WebDAV, Calendar, Semantic, OAuth
|
||||
- Add unit tests validating annotation presence
|
||||
|
||||
### Week 3: Phase 2 - ToolAnnotations (Medium/Low Priority)
|
||||
- Complete remaining tools (~49 tools)
|
||||
- Deck, Tables, Contacts, Cookbook, News
|
||||
- Update tool listings in README
|
||||
|
||||
### Week 4: Phase 3 - Parameter Descriptions
|
||||
- Add Field() descriptions to Critical/High priority tools
|
||||
- Start with OAuth, Notes, WebDAV modules
|
||||
- Incremental completion over time
|
||||
|
||||
## Benefits
|
||||
|
||||
### For Users
|
||||
- **Clearer UI**: "Create Note" vs "nc_notes_create_note"
|
||||
- **Safety**: Warnings before destructive operations
|
||||
- **Better help**: Parameter descriptions in auto-completion
|
||||
- **Confidence**: Know which operations are safe to retry
|
||||
|
||||
### For MCP Clients
|
||||
- **Caching**: Cache results from read-only tools
|
||||
- **Safety prompts**: Warn before destructiveHint=true
|
||||
- **Retry logic**: Safely retry idempotent operations
|
||||
- **UI organization**: Group by behavior (reads vs writes vs deletes)
|
||||
- **Performance**: Optimize based on hints
|
||||
|
||||
### For Developers
|
||||
- **Self-documenting**: Behavior is explicit
|
||||
- **Consistency**: Standard patterns across codebase
|
||||
- **Testing**: Validate annotations match implementation
|
||||
- **Maintenance**: Clear expectations for new tools
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
- Immediate UX improvement with minimal effort
|
||||
- Clients can make smarter decisions
|
||||
- Self-documenting code
|
||||
- Follows MCP best practices
|
||||
|
||||
### Negative
|
||||
- Initial effort to add annotations (12-15 hours total)
|
||||
- Must maintain annotations when adding new tools
|
||||
- Risk of incorrect annotations misleading clients
|
||||
|
||||
### Neutral
|
||||
- Annotations are hints, not guarantees
|
||||
- Clients may ignore annotations
|
||||
- Backward compatible (additive change)
|
||||
|
||||
### Mitigations
|
||||
- **Incorrect annotations**: Add tests validating behavior matches hints
|
||||
- **Maintenance burden**: Add to code review checklist and tool template
|
||||
- **Documentation**: Update CLAUDE.md with annotation guidelines
|
||||
|
||||
## Examples
|
||||
|
||||
### Complete Annotated Tool (Delete)
|
||||
|
||||
```python
|
||||
from mcp.types import ToolAnnotations
|
||||
from pydantic import Field
|
||||
|
||||
@mcp.tool(
|
||||
title="Delete Note",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, # Deletes data permanently
|
||||
idempotentHint=True, # Same end state (note doesn't exist)
|
||||
openWorldHint=True # Nextcloud is external
|
||||
)
|
||||
)
|
||||
@require_scopes("notes:write")
|
||||
@instrument_tool
|
||||
async def nc_notes_delete_note(
|
||||
note_id: int = Field(description="The ID of the note to delete permanently"),
|
||||
ctx: Context
|
||||
) -> DeleteNoteResponse:
|
||||
"""Delete a note permanently (requires notes:write scope)"""
|
||||
client = await get_client(ctx)
|
||||
# ... implementation ...
|
||||
```
|
||||
|
||||
### Complete Annotated Tool (Update)
|
||||
|
||||
```python
|
||||
@mcp.tool(
|
||||
title="Update Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # NOT idempotent: etag changes each update
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
@require_scopes("notes:write")
|
||||
@instrument_tool
|
||||
async def nc_notes_update_note(
|
||||
note_id: int = Field(description="The ID of the note to update"),
|
||||
title: str | None = Field(
|
||||
default=None,
|
||||
description="New title (omit to keep current)"
|
||||
),
|
||||
content: str | None = Field(
|
||||
default=None,
|
||||
description="New markdown content (omit to keep current)"
|
||||
),
|
||||
category: str | None = Field(
|
||||
default=None,
|
||||
description="New category/folder (omit to keep current)"
|
||||
),
|
||||
etag: str = Field(
|
||||
description="ETag from get_note (prevents concurrent modification)"
|
||||
),
|
||||
ctx: Context
|
||||
) -> UpdateNoteResponse:
|
||||
"""Update an existing note's title, content, or category.
|
||||
|
||||
The etag parameter is required to prevent overwriting concurrent changes.
|
||||
Get the current ETag by first calling nc_notes_get_note.
|
||||
If the note has been modified since you retrieved it, the update will fail.
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
# ... implementation ...
|
||||
```
|
||||
|
||||
### Complete Annotated Tool (Read-Only)
|
||||
|
||||
```python
|
||||
@mcp.tool(
|
||||
title="Search Notes",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Doesn't modify data
|
||||
openWorldHint=True # Queries Nextcloud
|
||||
)
|
||||
)
|
||||
@require_scopes("notes:read")
|
||||
@instrument_tool
|
||||
async def nc_notes_search_notes(
|
||||
query: str = Field(description="Search term to match in note titles or content"),
|
||||
ctx: Context
|
||||
) -> SearchNotesResponse:
|
||||
"""Search notes by title or content, returning id, title, and category.
|
||||
|
||||
This is a read-only operation that searches across all user notes.
|
||||
Use nc_notes_get_note to retrieve the full content of matching notes.
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
# ... implementation ...
|
||||
```
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Unit Tests
|
||||
Add tests validating annotation presence and correctness:
|
||||
|
||||
```python
|
||||
def test_notes_tools_have_annotations():
|
||||
"""Verify all notes tools have appropriate annotations."""
|
||||
tools = get_registered_tools(mcp)
|
||||
|
||||
# Check create tool
|
||||
create_tool = tools["nc_notes_create_note"]
|
||||
assert create_tool.title == "Create Note"
|
||||
assert create_tool.annotations.idempotentHint is False
|
||||
|
||||
# Check delete tool
|
||||
delete_tool = tools["nc_notes_delete_note"]
|
||||
assert delete_tool.title == "Delete Note"
|
||||
assert delete_tool.annotations.destructiveHint is True
|
||||
assert delete_tool.annotations.idempotentHint is True
|
||||
|
||||
# Check read-only tool
|
||||
search_tool = tools["nc_notes_search_notes"]
|
||||
assert search_tool.title == "Search Notes"
|
||||
assert search_tool.annotations.readOnlyHint is True
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
- Verify existing tests pass with annotations
|
||||
- Manual testing in MCP inspector/client
|
||||
|
||||
### Documentation Updates
|
||||
- Update README tool listings with new titles
|
||||
- Add annotation guidelines to CLAUDE.md
|
||||
- Include examples in developer documentation
|
||||
|
||||
## Resolved Questions
|
||||
|
||||
1. **WebDAV write_file idempotency** (Resolved: 2025-12-11)
|
||||
- **Decision**: Mark as `idempotentHint=True`
|
||||
- **Rationale**: Uses HTTP PUT without version control. Writing same content to same path repeatedly produces identical end state, which is the definition of idempotency in HTTP semantics.
|
||||
|
||||
2. **Semantic search openWorldHint** (Resolved: 2025-12-11)
|
||||
- **Decision**: Mark as `openWorldHint=True`
|
||||
- **Rationale**: For consistency with other Nextcloud tools. While the data being searched is "indexed/internal", Nextcloud itself is external to the MCP server. The fact that data is indexed is an implementation detail, not a fundamental difference from other Nextcloud queries.
|
||||
|
||||
3. **Read-only with side effects**: Should tools that log analytics still be readOnlyHint=true?
|
||||
- **Decision**: Yes. Logging/analytics are non-visible side effects that don't change user-observable state. Read-only refers to data modifications that affect the user's content.
|
||||
|
||||
## Future Considerations
|
||||
|
||||
1. **Icons**: Visual icons for tools (requires design work, deferred to future ADR)
|
||||
2. **Parameter descriptions**: Add Pydantic `Field(description=...)` for better auto-completion (Phase 3, future work)
|
||||
|
||||
## References
|
||||
|
||||
- MCP Python SDK: `/home/chris/Software/python-sdk/`
|
||||
- ToolAnnotations spec: `src/mcp/types.py:1247`
|
||||
- FastMCP decorator: `src/mcp/server/fastmcp/server.py:444`
|
||||
- Examples: `examples/fastmcp/parameter_descriptions.py`, `examples/fastmcp/icons_demo.py`
|
||||
|
||||
## Decision Timeline
|
||||
|
||||
- **Proposed**: 2025-12-11
|
||||
- **Reviewed**: 2025-12-11 (Self-review during implementation)
|
||||
- **Accepted**: 2025-12-11
|
||||
- **Implemented**: 2025-12-11 (Phase 1 & 2 complete)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,104 @@
|
||||
# MCP 1.23.x DNS Rebinding Protection Fix
|
||||
|
||||
## Problem
|
||||
|
||||
MCP Python SDK 1.23.0 introduced **automatic DNS rebinding protection** that breaks containerized deployments (Kubernetes, Docker) when the protection is unintentionally auto-enabled.
|
||||
|
||||
### Root Cause
|
||||
|
||||
From `mcp/server/fastmcp/server.py:177-183` in the Python SDK:
|
||||
|
||||
```python
|
||||
# Auto-enable DNS rebinding protection for localhost (IPv4 and IPv6)
|
||||
if transport_security is None and host in ("127.0.0.1", "localhost", "::1"):
|
||||
transport_security = TransportSecuritySettings(
|
||||
enable_dns_rebinding_protection=True,
|
||||
allowed_hosts=["127.0.0.1:*", "localhost:*", "[::1]:*"],
|
||||
allowed_origins=["http://127.0.0.1:*", "http://localhost:*", "http://[::1]:*"],
|
||||
)
|
||||
```
|
||||
|
||||
### What Was Happening
|
||||
|
||||
1. **FastMCP initialization** in `app.py` didn't pass `host` or `transport_security` parameters
|
||||
2. **Defaults applied**: `host="127.0.0.1"`, `transport_security=None`
|
||||
3. **Auto-enablement triggered**: Condition `transport_security is None and host == "127.0.0.1"` was TRUE
|
||||
4. **Protection activated** with `allowed_hosts=["127.0.0.1:*", "localhost:*", "[::1]:*"]`
|
||||
5. **Kubernetes requests rejected**: `Host: nextcloud-mcp-server.default.svc.cluster.local:8000` didn't match allowed hosts
|
||||
|
||||
### Why `--host 0.0.0.0` Didn't Help
|
||||
|
||||
The `--host` CLI flag (used in Dockerfile/docker-compose) controls **uvicorn's bind address**, NOT the **FastMCP `host` parameter**. These are separate concerns:
|
||||
|
||||
- **Uvicorn bind address** (`--host 0.0.0.0`): Where the HTTP server listens
|
||||
- **FastMCP host parameter** (defaulted to `"127.0.0.1"`): Used for auto-enablement logic
|
||||
|
||||
## Solution
|
||||
|
||||
Explicitly disable DNS rebinding protection by passing `transport_security=TransportSecuritySettings(enable_dns_rebinding_protection=False)` to all FastMCP instances.
|
||||
|
||||
### Changes Made
|
||||
|
||||
Modified `nextcloud_mcp_server/app.py`:
|
||||
|
||||
1. **Import** `TransportSecuritySettings` from `mcp.server.transport_security`
|
||||
2. **Updated all three FastMCP initializations**:
|
||||
- OAuth mode (line 1015)
|
||||
- Smithery stateless mode (line 1030)
|
||||
- BasicAuth mode (line 1040)
|
||||
|
||||
Each now includes:
|
||||
```python
|
||||
transport_security=TransportSecuritySettings(enable_dns_rebinding_protection=False)
|
||||
```
|
||||
|
||||
## Impact
|
||||
|
||||
### ✅ What This Fixes
|
||||
|
||||
- **Kubernetes deployments**: Requests with k8s service DNS names now work
|
||||
- **Docker deployments**: Port-mapped requests (localhost:8000 → container) now work
|
||||
- **Reverse proxy deployments**: Proxied requests with various Host headers now work
|
||||
- **Ingress controllers**: Requests via ingress hostnames now work
|
||||
|
||||
### 🔒 Security Considerations
|
||||
|
||||
DNS rebinding protection defends against attacks where:
|
||||
1. Attacker controls a DNS domain (e.g., `evil.com`)
|
||||
2. DNS initially resolves to attacker's IP
|
||||
3. After victim's browser caches the origin, DNS changes to victim's localhost
|
||||
4. Attacker's page can now make requests to victim's localhost services
|
||||
|
||||
**Why it's safe to disable for this deployment:**
|
||||
|
||||
1. **OAuth authentication required** in production deployments (ADR-002, ADR-004)
|
||||
2. **Network-level isolation** in containerized environments (k8s network policies, Docker networks)
|
||||
3. **MCP is server-to-server**, not exposed to browsers (no CORS concerns)
|
||||
4. **Host header validation inappropriate** for multi-tenant k8s environments
|
||||
|
||||
If DNS rebinding protection is needed for specific deployments, it can be re-enabled with a custom allowed hosts list:
|
||||
|
||||
```python
|
||||
transport_security=TransportSecuritySettings(
|
||||
enable_dns_rebinding_protection=True,
|
||||
allowed_hosts=[
|
||||
"nextcloud-mcp-server.default.svc.cluster.local:*",
|
||||
"mcp.example.com:*",
|
||||
# Add all your expected Host header values
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
- ✅ Ruff linting passes
|
||||
- ✅ Type checking passes (pre-existing warnings unrelated)
|
||||
- ✅ Module imports successfully
|
||||
- ✅ Compatible with MCP 1.23.x
|
||||
|
||||
## References
|
||||
|
||||
- [MCP Python SDK 1.23.0 Release](https://github.com/modelcontextprotocol/python-sdk/releases/tag/v1.23.0)
|
||||
- Commit: `d3a1841` - "Auto-enable DNS rebinding protection for localhost servers"
|
||||
- Issue #373 (original report of k8s breakage)
|
||||
- PR #382 (MCP 1.23.x upgrade)
|
||||
@@ -0,0 +1,301 @@
|
||||
# Database Migrations
|
||||
|
||||
This document describes the database migration system for nextcloud-mcp-server's token storage database.
|
||||
|
||||
## Overview
|
||||
|
||||
The token storage database uses [Alembic](https://alembic.sqlalchemy.org/) for schema versioning and migrations. Alembic provides:
|
||||
|
||||
- **Version Control**: Track schema changes in Git
|
||||
- **Rollback Support**: Safely downgrade schema if needed
|
||||
- **Audit Trail**: Migration files serve as schema changelog
|
||||
- **Automated Upgrades**: Database schema updates automatically on startup
|
||||
|
||||
## Architecture
|
||||
|
||||
### Migration Strategy
|
||||
|
||||
The system handles three scenarios:
|
||||
|
||||
1. **New Database**: Runs migrations from scratch to create all tables
|
||||
2. **Pre-Alembic Database**: Stamps existing database with initial revision (no changes)
|
||||
3. **Alembic-Managed Database**: Upgrades to latest version automatically
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
nextcloud-mcp-server/
|
||||
├── alembic/ # Alembic migrations
|
||||
│ ├── versions/ # Migration scripts
|
||||
│ │ └── 20251217_2200_001_initial_schema.py
|
||||
│ ├── env.py # Alembic environment
|
||||
│ ├── script.py.mako # Migration template
|
||||
│ └── README # Migration usage guide
|
||||
├── alembic.ini # Alembic configuration
|
||||
└── nextcloud_mcp_server/
|
||||
├── auth/storage.py # Uses migrations on init
|
||||
└── migrations.py # Migration utilities
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Automatic Migration on Startup
|
||||
|
||||
Migrations run automatically when the server starts:
|
||||
|
||||
```bash
|
||||
uv run nextcloud-mcp-server
|
||||
```
|
||||
|
||||
The `RefreshTokenStorage.initialize()` method:
|
||||
1. Checks if database is Alembic-managed
|
||||
2. Stamps pre-Alembic databases with initial revision
|
||||
3. Upgrades to latest version
|
||||
|
||||
### Manual Migration Commands
|
||||
|
||||
```bash
|
||||
# Show current database version
|
||||
uv run nextcloud-mcp-server db current
|
||||
|
||||
# Upgrade database to latest version
|
||||
uv run nextcloud-mcp-server db upgrade
|
||||
|
||||
# Show migration history
|
||||
uv run nextcloud-mcp-server db history
|
||||
|
||||
# Downgrade by one version (emergency use only)
|
||||
uv run nextcloud-mcp-server db downgrade
|
||||
|
||||
# Specify custom database path
|
||||
uv run nextcloud-mcp-server db current -d /path/to/tokens.db
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
- `TOKEN_STORAGE_DB`: Path to database file (default: `/app/data/tokens.db`)
|
||||
|
||||
## Creating Migrations (Developers)
|
||||
|
||||
### Step 1: Create Migration File
|
||||
|
||||
```bash
|
||||
uv run nextcloud-mcp-server db migrate "add user preferences table"
|
||||
```
|
||||
|
||||
This creates a new migration file in `alembic/versions/` with empty `upgrade()` and `downgrade()` functions.
|
||||
|
||||
### Step 2: Write Migration SQL
|
||||
|
||||
Since we don't use SQLAlchemy models, write raw SQL:
|
||||
|
||||
```python
|
||||
def upgrade() -> None:
|
||||
"""Add user preferences table."""
|
||||
op.execute("""
|
||||
CREATE TABLE user_preferences (
|
||||
user_id TEXT PRIMARY KEY,
|
||||
theme TEXT DEFAULT 'light',
|
||||
language TEXT DEFAULT 'en',
|
||||
created_at INTEGER NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
op.execute("""
|
||||
CREATE INDEX idx_user_preferences_user_id
|
||||
ON user_preferences(user_id)
|
||||
""")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove user preferences table."""
|
||||
op.execute("DROP INDEX IF EXISTS idx_user_preferences_user_id")
|
||||
op.execute("DROP TABLE IF EXISTS user_preferences")
|
||||
```
|
||||
|
||||
### Step 3: Test Migration
|
||||
|
||||
```bash
|
||||
# Test upgrade
|
||||
uv run nextcloud-mcp-server db upgrade -d /tmp/test.db
|
||||
|
||||
# Verify schema
|
||||
sqlite3 /tmp/test.db ".schema"
|
||||
|
||||
# Test downgrade
|
||||
uv run nextcloud-mcp-server db downgrade -d /tmp/test.db
|
||||
|
||||
# Verify removal
|
||||
sqlite3 /tmp/test.db ".schema"
|
||||
```
|
||||
|
||||
### Step 4: Commit Migration
|
||||
|
||||
```bash
|
||||
git add alembic/versions/YYYYMMDD_HHMM_XXX_description.py
|
||||
git commit -m "feat: add user preferences table migration"
|
||||
```
|
||||
|
||||
## SQLite Limitations
|
||||
|
||||
SQLite has limited `ALTER TABLE` support:
|
||||
|
||||
### Supported Operations
|
||||
|
||||
- ✅ Add columns: `ALTER TABLE table ADD COLUMN ...`
|
||||
- ✅ Rename table: `ALTER TABLE old RENAME TO new`
|
||||
- ✅ Rename column: `ALTER TABLE table RENAME COLUMN old TO new` (SQLite 3.25+)
|
||||
|
||||
### Unsupported Operations (Requires Table Recreation)
|
||||
|
||||
- ❌ Drop column
|
||||
- ❌ Change column type
|
||||
- ❌ Add constraints to existing columns
|
||||
|
||||
### Table Recreation Pattern
|
||||
|
||||
For complex schema changes:
|
||||
|
||||
```python
|
||||
def upgrade() -> None:
|
||||
# Create new table with desired schema
|
||||
op.execute("""
|
||||
CREATE TABLE refresh_tokens_new (
|
||||
user_id TEXT PRIMARY KEY,
|
||||
encrypted_token BLOB NOT NULL,
|
||||
new_field TEXT, -- New column
|
||||
expires_at INTEGER,
|
||||
created_at INTEGER NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
# Copy data from old table
|
||||
op.execute("""
|
||||
INSERT INTO refresh_tokens_new
|
||||
(user_id, encrypted_token, expires_at, created_at)
|
||||
SELECT user_id, encrypted_token, expires_at, created_at
|
||||
FROM refresh_tokens
|
||||
""")
|
||||
|
||||
# Drop old table and rename new table
|
||||
op.execute("DROP TABLE refresh_tokens")
|
||||
op.execute("ALTER TABLE refresh_tokens_new RENAME TO refresh_tokens")
|
||||
|
||||
# Recreate indexes
|
||||
op.execute("CREATE INDEX idx_user_id ON refresh_tokens(user_id)")
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
- **Migrations**: `YYYYMMDD_HHMM_XXX_description.py`
|
||||
- **Revision IDs**: Sequential numbers (`001`, `002`, `003`)
|
||||
- **Descriptions**: Imperative mood ("add table", "remove column")
|
||||
|
||||
### Migration Guidelines
|
||||
|
||||
1. **Test Thoroughly**: Test both upgrade and downgrade paths
|
||||
2. **Preserve Data**: Ensure data migration logic is correct
|
||||
3. **Document Changes**: Add comments explaining complex operations
|
||||
4. **Small Changes**: One logical change per migration
|
||||
5. **No Breaking Changes**: Maintain backward compatibility when possible
|
||||
|
||||
### Downgrade Considerations
|
||||
|
||||
- **Data Loss**: Downgrade may lose data (dropped columns, tables)
|
||||
- **Confirmation**: Downgrade command requires explicit confirmation
|
||||
- **Testing**: Always test downgrade path before deploying
|
||||
- **Emergency Only**: Use downgrades only for critical rollbacks
|
||||
|
||||
## Backward Compatibility
|
||||
|
||||
### Pre-Alembic Databases
|
||||
|
||||
Existing databases created before Alembic integration are automatically detected and stamped with revision `001`:
|
||||
|
||||
1. Server detects no `alembic_version` table
|
||||
2. Checks if `refresh_tokens` table exists
|
||||
3. If yes, stamps database with `001` (no schema changes)
|
||||
4. Future updates use normal migration path
|
||||
|
||||
### Migration Path
|
||||
|
||||
```
|
||||
Pre-Alembic DB → Stamp(001) → Upgrade(002) → Upgrade(003) → ...
|
||||
New DB → Migrate(001) → Upgrade(002) → Upgrade(003) → ...
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Migration Fails
|
||||
|
||||
```bash
|
||||
# Check current state
|
||||
uv run nextcloud-mcp-server db current -d /path/to/tokens.db
|
||||
|
||||
# View migration history
|
||||
uv run nextcloud-mcp-server db history -d /path/to/tokens.db
|
||||
|
||||
# Manually inspect database
|
||||
sqlite3 /path/to/tokens.db ".schema"
|
||||
```
|
||||
|
||||
### Reset to Initial State
|
||||
|
||||
**WARNING: This destroys all data!**
|
||||
|
||||
```bash
|
||||
# Downgrade to base (empty database)
|
||||
uv run nextcloud-mcp-server db downgrade -d /path/to/tokens.db --revision base
|
||||
|
||||
# Upgrade to latest
|
||||
uv run nextcloud-mcp-server db upgrade -d /path/to/tokens.db
|
||||
```
|
||||
|
||||
### Corrupted Migration State
|
||||
|
||||
If `alembic_version` table is corrupted:
|
||||
|
||||
```bash
|
||||
# Manually fix via SQL
|
||||
sqlite3 /path/to/tokens.db
|
||||
> DELETE FROM alembic_version;
|
||||
> INSERT INTO alembic_version (version_num) VALUES ('001');
|
||||
> .quit
|
||||
|
||||
# Verify and upgrade
|
||||
uv run nextcloud-mcp-server db current -d /path/to/tokens.db
|
||||
uv run nextcloud-mcp-server db upgrade -d /path/to/tokens.db
|
||||
```
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
### Pre-Deployment
|
||||
|
||||
```bash
|
||||
# Run migrations in test environment
|
||||
export TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
uv run nextcloud-mcp-server db upgrade
|
||||
|
||||
# Verify current version
|
||||
uv run nextcloud-mcp-server db current
|
||||
```
|
||||
|
||||
### Docker Deployment
|
||||
|
||||
Migrations run automatically on container startup via `RefreshTokenStorage.initialize()`.
|
||||
|
||||
### Rollback Plan
|
||||
|
||||
1. Stop application
|
||||
2. Backup database: `cp tokens.db tokens.db.backup`
|
||||
3. Downgrade: `uv run nextcloud-mcp-server db downgrade --revision XXX`
|
||||
4. Deploy previous application version
|
||||
5. Restart application
|
||||
|
||||
## References
|
||||
|
||||
- [Alembic Documentation](https://alembic.sqlalchemy.org/)
|
||||
- [SQLite ALTER TABLE Limitations](https://www.sqlite.org/lang_altertable.html)
|
||||
- [ADR-004: Progressive Consent](./ADR-004-progressive-consent.md) (migration 001)
|
||||
+189
-199
@@ -14,100 +14,10 @@ Before running the server:
|
||||
|
||||
## Quick Start
|
||||
|
||||
Load your environment variables and start the server:
|
||||
Start the server using Docker:
|
||||
|
||||
```bash
|
||||
# Load environment variables from .env
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
|
||||
# Start the server
|
||||
uv run nextcloud-mcp-server
|
||||
```
|
||||
|
||||
The server will start on `http://127.0.0.1:8000` by default.
|
||||
|
||||
---
|
||||
|
||||
## Running Locally
|
||||
|
||||
### Method 1: Using nextcloud-mcp-server CLI (Recommended)
|
||||
|
||||
The CLI provides a simple interface with built-in defaults:
|
||||
|
||||
#### OAuth Mode
|
||||
|
||||
```bash
|
||||
# Auto-detected when NEXTCLOUD_USERNAME/PASSWORD not set
|
||||
uv run nextcloud-mcp-server
|
||||
|
||||
# Explicitly force OAuth mode
|
||||
uv run nextcloud-mcp-server --oauth
|
||||
|
||||
# OAuth with custom host and port
|
||||
uv run nextcloud-mcp-server --oauth --host 0.0.0.0 --port 8080
|
||||
|
||||
# OAuth with pre-configured client
|
||||
uv run nextcloud-mcp-server --oauth \
|
||||
--oauth-client-id abc123 \
|
||||
--oauth-client-secret xyz789
|
||||
|
||||
# OAuth with specific apps only
|
||||
uv run nextcloud-mcp-server --oauth \
|
||||
--enable-app notes \
|
||||
--enable-app calendar
|
||||
```
|
||||
|
||||
#### BasicAuth Mode (Legacy)
|
||||
|
||||
```bash
|
||||
# Auto-detected when NEXTCLOUD_USERNAME/PASSWORD are set
|
||||
uv run nextcloud-mcp-server
|
||||
|
||||
# Explicitly force BasicAuth mode
|
||||
uv run nextcloud-mcp-server --no-oauth
|
||||
|
||||
# BasicAuth with specific apps
|
||||
uv run nextcloud-mcp-server --no-oauth \
|
||||
--enable-app notes \
|
||||
--enable-app webdav
|
||||
```
|
||||
|
||||
### Method 2: Using uvicorn
|
||||
|
||||
For more control over server options (workers, reload, etc.):
|
||||
|
||||
```bash
|
||||
# Load environment variables
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
|
||||
# Run with uvicorn
|
||||
uv run uvicorn nextcloud_mcp_server.app:get_app \
|
||||
--factory \
|
||||
--host 127.0.0.1 \
|
||||
--port 8000 \
|
||||
--reload # Enable auto-reload for development
|
||||
```
|
||||
|
||||
See all uvicorn options at [https://www.uvicorn.org/settings/](https://www.uvicorn.org/settings/)
|
||||
|
||||
### Method 3: Using Python Module
|
||||
|
||||
```bash
|
||||
# Load environment variables
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
|
||||
# Run as Python module
|
||||
python -m nextcloud_mcp_server.app --oauth --port 8000
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Running with Docker
|
||||
|
||||
### Basic Docker Run
|
||||
|
||||
```bash
|
||||
# OAuth mode
|
||||
# OAuth mode (recommended)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
@@ -116,11 +26,56 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
|
||||
```
|
||||
|
||||
### Docker with Persistent OAuth Storage
|
||||
The server will start on `http://127.0.0.1:8000` by default.
|
||||
|
||||
---
|
||||
|
||||
## Running with Docker
|
||||
|
||||
### Basic Docker Run
|
||||
|
||||
#### OAuth Mode (Recommended)
|
||||
|
||||
```bash
|
||||
# OAuth with auto-registration
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# OAuth with custom port
|
||||
docker run -p 127.0.0.1:8080:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# OAuth with pre-configured client
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
-e NEXTCLOUD_OIDC_CLIENT_ID=abc123 \
|
||||
-e NEXTCLOUD_OIDC_CLIENT_SECRET=xyz789 \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# OAuth with specific apps only
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--enable-app notes --enable-app calendar
|
||||
```
|
||||
|
||||
#### BasicAuth Mode (Legacy)
|
||||
|
||||
```bash
|
||||
# BasicAuth (requires NEXTCLOUD_USERNAME/PASSWORD in .env)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
|
||||
|
||||
# BasicAuth with specific apps
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest \
|
||||
--enable-app notes --enable-app webdav
|
||||
```
|
||||
|
||||
### Docker with Persistent Token Storage
|
||||
|
||||
```bash
|
||||
# Mount volume for persistent OAuth token storage
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env \
|
||||
-v $(pwd)/.oauth:/app/.oauth \
|
||||
-v $(pwd)/data:/app/data \
|
||||
--rm ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
```
|
||||
|
||||
@@ -140,7 +95,7 @@ services:
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./oauth-storage:/app/.oauth
|
||||
- ./data:/app/data # Persistent token storage
|
||||
restart: unless-stopped
|
||||
```
|
||||
|
||||
@@ -168,30 +123,39 @@ docker-compose down
|
||||
|
||||
```bash
|
||||
# Bind to all interfaces (accessible from network)
|
||||
uv run nextcloud-mcp-server --host 0.0.0.0 --port 8000
|
||||
docker run -p 0.0.0.0:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# Bind to localhost only (default, more secure)
|
||||
uv run nextcloud-mcp-server --host 127.0.0.1 --port 8000
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# Use a different port
|
||||
uv run nextcloud-mcp-server --port 8080
|
||||
# Use a different port (map host port 8080 to container port 8000)
|
||||
docker run -p 127.0.0.1:8080:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
```
|
||||
|
||||
**Security Note:** Using `--host 0.0.0.0` exposes the server to your network. Only use this if you understand the security implications.
|
||||
**Security Note:** Binding to `0.0.0.0` exposes the server to your network. Only use this if you understand the security implications.
|
||||
|
||||
### Transport Protocols
|
||||
|
||||
The server supports multiple MCP transport protocols:
|
||||
|
||||
```bash
|
||||
# Streamable HTTP (recommended)
|
||||
uv run nextcloud-mcp-server --transport streamable-http
|
||||
# Streamable HTTP (default, recommended)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--transport streamable-http
|
||||
|
||||
# SSE - Server-Sent Events (default, deprecated)
|
||||
uv run nextcloud-mcp-server --transport sse
|
||||
# SSE - Server-Sent Events (deprecated)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--transport sse
|
||||
|
||||
# HTTP
|
||||
uv run nextcloud-mcp-server --transport http
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--transport http
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
@@ -201,10 +165,14 @@ uv run nextcloud-mcp-server --transport http
|
||||
|
||||
```bash
|
||||
# Set log level (critical, error, warning, info, debug, trace)
|
||||
uv run nextcloud-mcp-server --log-level debug
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--log-level debug
|
||||
|
||||
# Production: use warning or error
|
||||
uv run nextcloud-mcp-server --log-level warning
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--log-level warning
|
||||
```
|
||||
|
||||
### Selective App Enablement
|
||||
@@ -212,22 +180,26 @@ uv run nextcloud-mcp-server --log-level warning
|
||||
By default, all supported Nextcloud apps are enabled. You can enable specific apps only:
|
||||
|
||||
```bash
|
||||
# Available apps: notes, tables, webdav, calendar, contacts, deck
|
||||
# Available apps: notes, tables, webdav, calendar, contacts, cookbook, deck
|
||||
|
||||
# Enable all apps (default)
|
||||
uv run nextcloud-mcp-server
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# Enable only Notes
|
||||
uv run nextcloud-mcp-server --enable-app notes
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--enable-app notes
|
||||
|
||||
# Enable multiple apps
|
||||
uv run nextcloud-mcp-server \
|
||||
--enable-app notes \
|
||||
--enable-app calendar \
|
||||
--enable-app contacts
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--enable-app notes --enable-app calendar --enable-app contacts
|
||||
|
||||
# Enable only WebDAV for file operations
|
||||
uv run nextcloud-mcp-server --enable-app webdav
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--enable-app webdav
|
||||
```
|
||||
|
||||
**Use cases:**
|
||||
@@ -240,24 +212,68 @@ uv run nextcloud-mcp-server --enable-app webdav
|
||||
|
||||
## Development Mode
|
||||
|
||||
For active development with auto-reload:
|
||||
### Running for Development
|
||||
|
||||
For active development with auto-reload, mount your source code as a volume:
|
||||
|
||||
```bash
|
||||
# Using uvicorn with reload
|
||||
uv run uvicorn nextcloud_mcp_server.app:get_app \
|
||||
--factory \
|
||||
--reload \
|
||||
--host 127.0.0.1 \
|
||||
--port 8000 \
|
||||
# Development mode with source code mounted
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
-v $(pwd):/app \
|
||||
-v $(pwd)/data:/app/data \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--log-level debug
|
||||
```
|
||||
|
||||
Or use the CLI with reload flag:
|
||||
For local development without Docker:
|
||||
|
||||
```bash
|
||||
uv run nextcloud-mcp-server --reload --log-level debug
|
||||
# Load environment variables
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
|
||||
# Run the server with auto-reload
|
||||
uv run nextcloud-mcp-server run --oauth --log-level debug
|
||||
```
|
||||
|
||||
### CLI Subcommands
|
||||
|
||||
The `nextcloud-mcp-server` CLI has two main subcommands:
|
||||
|
||||
1. **`run`** - Start the MCP server (default command in Docker)
|
||||
```bash
|
||||
uv run nextcloud-mcp-server run --oauth --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
2. **`db`** - Database migration management (Alembic)
|
||||
```bash
|
||||
# Show current migration revision
|
||||
uv run nextcloud-mcp-server db current
|
||||
|
||||
# Upgrade to latest migration
|
||||
uv run nextcloud-mcp-server db upgrade
|
||||
|
||||
# Show migration history
|
||||
uv run nextcloud-mcp-server db history
|
||||
|
||||
# Create new migration (developers only)
|
||||
uv run nextcloud-mcp-server db migrate "description of changes"
|
||||
```
|
||||
|
||||
### Database Migrations
|
||||
|
||||
Token storage uses **Alembic** for schema management:
|
||||
|
||||
- **Automatic migrations**: Database is upgraded automatically on server startup
|
||||
- **Backward compatibility**: Pre-Alembic databases are automatically stamped with the initial revision
|
||||
- **Migration files**: Located in `alembic/versions/`
|
||||
- **For developers**: When changing the schema:
|
||||
1. Create a migration: `uv run nextcloud-mcp-server db migrate "add new column"`
|
||||
2. Edit the generated file in `alembic/versions/` to add SQL statements
|
||||
3. Test upgrade: `uv run nextcloud-mcp-server db upgrade`
|
||||
4. Test downgrade: `uv run nextcloud-mcp-server db downgrade`
|
||||
|
||||
See [Database Migrations Guide](database-migrations.md) for detailed information.
|
||||
|
||||
---
|
||||
|
||||
## Connecting to the Server
|
||||
@@ -266,15 +282,15 @@ uv run nextcloud-mcp-server --reload --log-level debug
|
||||
|
||||
MCP Inspector is a browser-based tool for testing MCP servers:
|
||||
|
||||
```bash
|
||||
# Start MCP Inspector
|
||||
uv run mcp dev
|
||||
|
||||
# In the browser:
|
||||
# 1. Enter server URL: http://localhost:8000
|
||||
# 2. Complete OAuth flow (if using OAuth)
|
||||
# 3. Explore tools and resources
|
||||
```
|
||||
1. Start your MCP server using Docker (see above)
|
||||
2. Start MCP Inspector:
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector
|
||||
```
|
||||
3. In the browser:
|
||||
- Enter server URL: `http://localhost:8000`
|
||||
- Complete OAuth flow (if using OAuth)
|
||||
- Explore tools and resources
|
||||
|
||||
### Using MCP Clients
|
||||
|
||||
@@ -322,48 +338,13 @@ INFO Initializing Nextcloud client with BasicAuth
|
||||
|
||||
### Running as a Background Service
|
||||
|
||||
#### Using systemd (Linux)
|
||||
|
||||
Create `/etc/systemd/system/nextcloud-mcp.service`:
|
||||
|
||||
```ini
|
||||
[Unit]
|
||||
Description=Nextcloud MCP Server
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=your-user
|
||||
WorkingDirectory=/path/to/nextcloud-mcp-server
|
||||
EnvironmentFile=/path/to/.env
|
||||
ExecStart=/path/to/uv run nextcloud-mcp-server --oauth
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
Enable and start:
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable nextcloud-mcp
|
||||
sudo systemctl start nextcloud-mcp
|
||||
sudo systemctl status nextcloud-mcp
|
||||
```
|
||||
|
||||
#### Using Docker Compose
|
||||
|
||||
See [Docker Compose section](#docker-compose) above - includes `restart: unless-stopped`.
|
||||
Use Docker Compose with `restart: unless-stopped` (see [Docker Compose section](#docker-compose) above).
|
||||
|
||||
### Monitoring Logs
|
||||
|
||||
```bash
|
||||
# Local installation with systemd
|
||||
sudo journalctl -u nextcloud-mcp -f
|
||||
|
||||
# Docker
|
||||
# Docker (find container name first)
|
||||
docker ps
|
||||
docker logs -f <container-name>
|
||||
|
||||
# Docker Compose
|
||||
@@ -374,35 +355,38 @@ docker-compose logs -f mcp
|
||||
|
||||
## Performance Tuning
|
||||
|
||||
### Multiple Workers
|
||||
|
||||
For production deployments with higher load:
|
||||
|
||||
```bash
|
||||
# Using CLI (if supported)
|
||||
uv run nextcloud-mcp-server --workers 4
|
||||
|
||||
# Using uvicorn
|
||||
uv run uvicorn nextcloud_mcp_server.app:get_app \
|
||||
--factory \
|
||||
--workers 4 \
|
||||
--host 0.0.0.0 \
|
||||
--port 8000
|
||||
```
|
||||
|
||||
### Production Settings
|
||||
|
||||
```bash
|
||||
# Recommended production configuration
|
||||
uv run nextcloud-mcp-server \
|
||||
--oauth \
|
||||
--host 127.0.0.1 \
|
||||
--port 8000 \
|
||||
--log-level warning \
|
||||
--transport streamable-http \
|
||||
--workers 2
|
||||
For production deployments, use Docker Compose with the recommended settings:
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
mcp:
|
||||
image: ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
|
||||
command: --oauth --log-level warning --transport streamable-http
|
||||
ports:
|
||||
- "127.0.0.1:8000:8000"
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./data:/app/data
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
```
|
||||
|
||||
### Scaling with Multiple Replicas
|
||||
|
||||
For higher load, use Docker Swarm or Kubernetes. See the [Helm Chart](../helm/) for Kubernetes deployments.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
@@ -411,12 +395,18 @@ uv run nextcloud-mcp-server \
|
||||
|
||||
Check logs for errors:
|
||||
```bash
|
||||
uv run nextcloud-mcp-server --log-level debug
|
||||
# View container logs
|
||||
docker logs <container-name>
|
||||
|
||||
# Or run with debug logging
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--log-level debug
|
||||
```
|
||||
|
||||
Common issues:
|
||||
- Environment variables not loaded - See [Configuration](configuration.md#loading-environment-variables)
|
||||
- Port already in use - Try a different port with `--port`
|
||||
- Environment variables not loaded - Check your `.env` file
|
||||
- Port already in use - Use a different host port (e.g., `-p 127.0.0.1:8080:8000`)
|
||||
- OAuth configuration errors - See [Troubleshooting](troubleshooting.md)
|
||||
|
||||
### Can't connect to server
|
||||
|
||||
@@ -5,7 +5,7 @@ This document explains the architecture of the semantic search feature in the Ne
|
||||
> [!IMPORTANT]
|
||||
> **Status: Experimental**
|
||||
> - Disabled by default (`VECTOR_SYNC_ENABLED=false`)
|
||||
> - Currently supports **Notes app only** (multi-app architecture ready, additional apps planned)
|
||||
> - Currently supports **Notes, Files (PDFs), News items, and Deck cards**
|
||||
> - Requires additional infrastructure (Qdrant vector database + Ollama embedding service)
|
||||
> - RAG answer generation requires MCP client sampling support
|
||||
|
||||
@@ -39,9 +39,9 @@ Semantic search enables:
|
||||
|
||||
### Current Support
|
||||
|
||||
- **Supported Apps**: Notes (fully implemented)
|
||||
- **Planned Apps**: Calendar events, Calendar tasks, Deck cards, Files (with text extraction), Contacts
|
||||
- **Architecture**: Multi-app plugin system ready, awaiting implementation
|
||||
- **Supported Apps**: Notes, Files (PDFs with text extraction), News items, Deck cards
|
||||
- **Planned Apps**: Calendar events, Calendar tasks, Contacts
|
||||
- **Architecture**: Multi-app plugin system ready for additional apps
|
||||
|
||||
## System Components
|
||||
|
||||
|
||||
@@ -51,6 +51,11 @@ NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
|
||||
# Cookie security (browser UI)
|
||||
# Auto-detects from NEXTCLOUD_HOST protocol if not set
|
||||
# Set explicitly for non-standard setups
|
||||
#COOKIE_SECURE=true
|
||||
|
||||
# ============================================
|
||||
# Document Processing Configuration
|
||||
# ============================================
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
"""Alembic environment configuration for nextcloud-mcp-server.
|
||||
|
||||
This module configures how Alembic runs database migrations for the
|
||||
token storage database. It supports both online and offline migration modes.
|
||||
|
||||
Uses anyio for async operations, consistent with the project's async patterns.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import anyio
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.ext.asyncio import async_engine_from_config
|
||||
|
||||
from alembic import context
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger("alembic.env")
|
||||
|
||||
# This is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Update script location to point to package location
|
||||
# This allows alembic to find migrations when installed in site-packages
|
||||
script_location = Path(__file__).parent
|
||||
config.set_main_option("script_location", str(script_location))
|
||||
|
||||
# We don't use SQLAlchemy models, so target_metadata is None
|
||||
# Migrations will be written manually using op.execute() for raw SQL
|
||||
target_metadata = None
|
||||
|
||||
|
||||
def get_database_url() -> str:
|
||||
"""
|
||||
Get the database URL from Alembic config or environment.
|
||||
|
||||
The URL can be set in alembic.ini or passed via -x database_url=...
|
||||
when running Alembic commands.
|
||||
|
||||
Returns:
|
||||
Database URL (SQLite URL format)
|
||||
"""
|
||||
# Check if URL is passed via -x database_url=...
|
||||
url = context.get_x_argument(as_dictionary=True).get("database_url")
|
||||
|
||||
if not url:
|
||||
# Fall back to alembic.ini configuration
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
|
||||
if not url:
|
||||
# Default to /app/data/tokens.db for Docker deployments
|
||||
db_path = Path("/app/data/tokens.db")
|
||||
url = f"sqlite+aiosqlite:///{db_path}"
|
||||
logger.warning(
|
||||
f"No database URL configured, using default: {url}. "
|
||||
"Set sqlalchemy.url in alembic.ini or pass -x database_url=..."
|
||||
)
|
||||
|
||||
return url
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL and not an Engine,
|
||||
though an Engine is acceptable here as well. By skipping the
|
||||
Engine creation we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
This mode is useful for generating SQL scripts without database access.
|
||||
"""
|
||||
url = get_database_url()
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection: Connection) -> None:
|
||||
"""Execute migrations within a database connection."""
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_async_migrations() -> None:
|
||||
"""Run migrations in 'online' mode with async support.
|
||||
|
||||
In this scenario we create an async Engine and associate
|
||||
a connection with the context.
|
||||
"""
|
||||
# Get database URL and update config
|
||||
url = get_database_url()
|
||||
config.set_main_option("sqlalchemy.url", url)
|
||||
|
||||
# Create async engine
|
||||
connectable = async_engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool, # Don't pool connections for migrations
|
||||
)
|
||||
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
|
||||
await connectable.dispose()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode.
|
||||
|
||||
This function is called from storage.py's initialize() method via
|
||||
anyio.to_thread.run_sync(), so it always runs in a worker thread
|
||||
with its own event loop. We can safely use anyio.run() here.
|
||||
"""
|
||||
anyio.run(run_async_migrations)
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
@@ -0,0 +1,185 @@
|
||||
"""Initial schema for token storage database
|
||||
|
||||
This migration creates the initial database schema including:
|
||||
- refresh_tokens: OAuth refresh tokens and user profiles
|
||||
- audit_logs: Audit trail for security events
|
||||
- oauth_clients: OAuth client credentials (DCR)
|
||||
- oauth_sessions: OAuth flow session state (ADR-004 Progressive Consent)
|
||||
- registered_webhooks: Webhook registration tracking (both OAuth and BasicAuth)
|
||||
- schema_version: Legacy schema version tracking (deprecated, use alembic_version)
|
||||
|
||||
Revision ID: 001
|
||||
Revises:
|
||||
Create Date: 2025-12-17 22:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "001"
|
||||
down_revision = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Create initial database schema."""
|
||||
|
||||
# Refresh tokens table (OAuth mode only, for background jobs)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS refresh_tokens (
|
||||
user_id TEXT PRIMARY KEY,
|
||||
encrypted_token BLOB NOT NULL,
|
||||
expires_at INTEGER,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
-- ADR-004 Progressive Consent fields
|
||||
flow_type TEXT DEFAULT 'hybrid',
|
||||
token_audience TEXT DEFAULT 'nextcloud',
|
||||
provisioned_at INTEGER,
|
||||
provisioning_client_id TEXT,
|
||||
scopes TEXT,
|
||||
-- Browser session profile cache
|
||||
user_profile TEXT,
|
||||
profile_cached_at INTEGER
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Audit logs table (both OAuth and BasicAuth modes)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS audit_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
user_id TEXT NOT NULL,
|
||||
resource_type TEXT,
|
||||
resource_id TEXT,
|
||||
auth_method TEXT,
|
||||
hostname TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Index on audit logs for efficient queries
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_audit_user_timestamp
|
||||
ON audit_logs(user_id, timestamp)
|
||||
"""
|
||||
)
|
||||
|
||||
# OAuth client credentials storage (OAuth mode only)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS oauth_clients (
|
||||
id INTEGER PRIMARY KEY,
|
||||
client_id TEXT UNIQUE NOT NULL,
|
||||
encrypted_client_secret BLOB NOT NULL,
|
||||
client_id_issued_at INTEGER NOT NULL,
|
||||
client_secret_expires_at INTEGER NOT NULL,
|
||||
redirect_uris TEXT NOT NULL,
|
||||
encrypted_registration_access_token BLOB,
|
||||
registration_client_uri TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# OAuth flow sessions (ADR-004 Progressive Consent)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS oauth_sessions (
|
||||
session_id TEXT PRIMARY KEY,
|
||||
client_id TEXT,
|
||||
client_redirect_uri TEXT NOT NULL,
|
||||
state TEXT,
|
||||
code_challenge TEXT,
|
||||
code_challenge_method TEXT,
|
||||
mcp_authorization_code TEXT UNIQUE,
|
||||
idp_access_token TEXT,
|
||||
idp_refresh_token TEXT,
|
||||
user_id TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
expires_at INTEGER NOT NULL,
|
||||
-- ADR-004 Progressive Consent fields
|
||||
flow_type TEXT DEFAULT 'hybrid',
|
||||
requested_scopes TEXT,
|
||||
granted_scopes TEXT,
|
||||
is_provisioning BOOLEAN DEFAULT FALSE
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Index for MCP authorization code lookups
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_oauth_sessions_mcp_code
|
||||
ON oauth_sessions(mcp_authorization_code)
|
||||
"""
|
||||
)
|
||||
|
||||
# Legacy schema version tracking table
|
||||
# NOTE: This is deprecated in favor of Alembic's alembic_version table
|
||||
# Kept for backward compatibility with pre-Alembic databases
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
version INTEGER PRIMARY KEY,
|
||||
applied_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Registered webhooks tracking (both BasicAuth and OAuth modes)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS registered_webhooks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
webhook_id INTEGER NOT NULL UNIQUE,
|
||||
preset_id TEXT NOT NULL,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Indexes for efficient webhook queries
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_webhooks_preset
|
||||
ON registered_webhooks(preset_id)
|
||||
"""
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_webhooks_created
|
||||
ON registered_webhooks(created_at)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Drop all tables and indexes.
|
||||
|
||||
WARNING: This will destroy all data in the database!
|
||||
Use with extreme caution.
|
||||
"""
|
||||
|
||||
# Drop indexes first
|
||||
op.execute("DROP INDEX IF EXISTS idx_webhooks_created")
|
||||
op.execute("DROP INDEX IF EXISTS idx_webhooks_preset")
|
||||
op.execute("DROP INDEX IF EXISTS idx_oauth_sessions_mcp_code")
|
||||
op.execute("DROP INDEX IF EXISTS idx_audit_user_timestamp")
|
||||
|
||||
# Drop tables
|
||||
op.execute("DROP TABLE IF EXISTS registered_webhooks")
|
||||
op.execute("DROP TABLE IF EXISTS schema_version")
|
||||
op.execute("DROP TABLE IF EXISTS oauth_sessions")
|
||||
op.execute("DROP TABLE IF EXISTS oauth_clients")
|
||||
op.execute("DROP TABLE IF EXISTS audit_logs")
|
||||
op.execute("DROP TABLE IF EXISTS refresh_tokens")
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Management API for Nextcloud MCP Server.
|
||||
|
||||
Provides REST endpoints for the Nextcloud PHP app to query server status,
|
||||
user sessions, and vector sync metrics. All endpoints use OAuth bearer token
|
||||
authentication via the UnifiedTokenVerifier.
|
||||
"""
|
||||
File diff suppressed because it is too large
Load Diff
+798
-326
File diff suppressed because it is too large
Load Diff
@@ -24,6 +24,26 @@ from nextcloud_mcp_server.auth.userinfo_routes import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _should_use_secure_cookies() -> bool:
|
||||
"""Determine if cookies should have secure flag.
|
||||
|
||||
Checks COOKIE_SECURE env var first, then auto-detects from NEXTCLOUD_HOST.
|
||||
|
||||
Returns:
|
||||
True if cookies should be secure (HTTPS), False otherwise
|
||||
"""
|
||||
# Explicit configuration takes precedence
|
||||
explicit = os.getenv("COOKIE_SECURE", "").lower()
|
||||
if explicit == "true":
|
||||
return True
|
||||
if explicit == "false":
|
||||
return False
|
||||
|
||||
# Auto-detect from NEXTCLOUD_HOST protocol
|
||||
nextcloud_host = os.getenv("NEXTCLOUD_HOST", "")
|
||||
return nextcloud_host.startswith("https://")
|
||||
|
||||
|
||||
async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
"""Browser OAuth login endpoint - redirects to IdP for authentication.
|
||||
|
||||
@@ -50,6 +70,10 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
logger.info(f"oauth_login called - client_id: {oauth_config.get('client_id')}")
|
||||
logger.info(f"oauth_login called - oauth_client: {oauth_client is not None}")
|
||||
|
||||
# Get redirect URL from query params (default to /app)
|
||||
next_url = request.query_params.get("next", "/app")
|
||||
logger.info(f"oauth_login - next_url: {next_url}")
|
||||
|
||||
# Generate state for CSRF protection
|
||||
state = secrets.token_urlsafe(32)
|
||||
|
||||
@@ -71,7 +95,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
await storage.store_oauth_session(
|
||||
session_id=state, # Use state as session ID
|
||||
client_id="browser-ui",
|
||||
client_redirect_uri="/app",
|
||||
client_redirect_uri=next_url, # Store the redirect URL for after auth
|
||||
state=state,
|
||||
code_challenge=code_challenge,
|
||||
code_challenge_method="S256",
|
||||
@@ -85,6 +109,11 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
if not oauth_client.authorization_endpoint:
|
||||
await oauth_client.discover()
|
||||
|
||||
# Get Nextcloud resource URI for audience (background sync needs Nextcloud-scoped tokens)
|
||||
nextcloud_resource_uri = oauth_config.get(
|
||||
"nextcloud_resource_uri", oauth_config.get("nextcloud_host")
|
||||
)
|
||||
|
||||
idp_params = {
|
||||
"client_id": oauth_client.client_id,
|
||||
"redirect_uri": callback_uri,
|
||||
@@ -94,6 +123,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
"code_challenge": code_challenge,
|
||||
"code_challenge_method": "S256",
|
||||
"prompt": "consent", # Ensure refresh token
|
||||
"resource": nextcloud_resource_uri, # Request tokens for Nextcloud API access
|
||||
}
|
||||
|
||||
auth_url = f"{oauth_client.authorization_endpoint}?{urlencode(idp_params)}"
|
||||
@@ -131,6 +161,11 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
f"{public_parsed.scheme}://{public_parsed.netloc}{auth_parsed.path}"
|
||||
)
|
||||
|
||||
# Get Nextcloud resource URI for audience (background sync needs Nextcloud-scoped tokens)
|
||||
nextcloud_resource_uri = oauth_config.get(
|
||||
"nextcloud_resource_uri", oauth_config.get("nextcloud_host")
|
||||
)
|
||||
|
||||
idp_params = {
|
||||
"client_id": oauth_config["client_id"],
|
||||
"redirect_uri": callback_uri,
|
||||
@@ -140,6 +175,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
"code_challenge": code_challenge,
|
||||
"code_challenge_method": "S256",
|
||||
"prompt": "consent", # Ensure refresh token
|
||||
"resource": nextcloud_resource_uri, # Request tokens for Nextcloud API access
|
||||
}
|
||||
|
||||
# Debug: Log full parameters
|
||||
@@ -214,12 +250,15 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
|
||||
oauth_client = oauth_ctx["oauth_client"]
|
||||
oauth_config = oauth_ctx["config"]
|
||||
|
||||
# Retrieve code_verifier from session storage (PKCE required for all modes)
|
||||
# Retrieve code_verifier and redirect URL from session storage
|
||||
code_verifier = ""
|
||||
next_url = "/app" # Default redirect
|
||||
oauth_session = await storage.get_oauth_session(state)
|
||||
if oauth_session:
|
||||
# code_verifier was stored in mcp_authorization_code field
|
||||
code_verifier = oauth_session.get("mcp_authorization_code", "")
|
||||
# next_url was stored in client_redirect_uri field
|
||||
next_url = oauth_session.get("client_redirect_uri", "/app")
|
||||
# Clean up the temporary session
|
||||
# Note: We don't have delete_oauth_session method, but it will expire after TTL
|
||||
|
||||
@@ -262,6 +301,25 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
|
||||
discovery = response.json()
|
||||
token_endpoint = discovery["token_endpoint"]
|
||||
|
||||
# Rewrite token_endpoint from public URL to internal Docker URL
|
||||
# Discovery document returns public URLs (e.g., http://localhost:8080/...)
|
||||
# but server-side requests must use internal Docker network (e.g., http://app:80/...)
|
||||
public_issuer = os.getenv("NEXTCLOUD_PUBLIC_ISSUER_URL")
|
||||
if public_issuer:
|
||||
from urllib.parse import urlparse as parse_url
|
||||
|
||||
internal_host = oauth_config["nextcloud_host"]
|
||||
internal_parsed = parse_url(internal_host)
|
||||
token_parsed = parse_url(token_endpoint)
|
||||
public_parsed = parse_url(public_issuer)
|
||||
|
||||
if token_parsed.hostname == public_parsed.hostname:
|
||||
# Replace public URL with internal Docker URL
|
||||
token_endpoint = f"{internal_parsed.scheme}://{internal_parsed.netloc}{token_parsed.path}"
|
||||
logger.info(
|
||||
f"Rewrote token endpoint to internal URL: {token_endpoint}"
|
||||
)
|
||||
|
||||
token_params = {
|
||||
"grant_type": "authorization_code",
|
||||
"code": code,
|
||||
@@ -338,16 +396,35 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
|
||||
user_id = f"user-{secrets.token_hex(8)}"
|
||||
username = "unknown"
|
||||
|
||||
# Calculate refresh token expiration from token response
|
||||
refresh_expires_in = token_data.get("refresh_expires_in")
|
||||
refresh_expires_at = None
|
||||
if refresh_expires_in:
|
||||
import time
|
||||
|
||||
refresh_expires_at = int(time.time()) + refresh_expires_in
|
||||
logger.info(
|
||||
f"Refresh token expires in {refresh_expires_in}s (at timestamp {refresh_expires_at})"
|
||||
)
|
||||
|
||||
# Extract granted scopes
|
||||
granted_scopes = (
|
||||
token_data.get("scope", "").split() if token_data.get("scope") else None
|
||||
)
|
||||
|
||||
# Store refresh token (for background jobs ONLY)
|
||||
if refresh_token:
|
||||
logger.info(f"Storing refresh token for user_id: {user_id}")
|
||||
logger.info(f" State parameter (provisioning_client_id): {state[:16]}...")
|
||||
logger.info(f" Granted scopes: {granted_scopes}")
|
||||
logger.info(f" Expires at: {refresh_expires_at}")
|
||||
await storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
refresh_token=refresh_token,
|
||||
expires_at=None,
|
||||
expires_at=refresh_expires_at,
|
||||
flow_type="browser", # Browser-based login flow
|
||||
provisioning_client_id=state, # Store state for unified session lookup
|
||||
scopes=granted_scopes,
|
||||
)
|
||||
logger.info(f"✓ Refresh token stored successfully for user_id: {user_id}")
|
||||
logger.info(
|
||||
@@ -383,13 +460,14 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
|
||||
# Continue anyway - profile cache is optional for browser UI
|
||||
|
||||
# Create response and set session cookie
|
||||
response = RedirectResponse("/app", status_code=302)
|
||||
# Redirect to stored next_url (from OAuth session) or /app as default
|
||||
response = RedirectResponse(next_url, status_code=302)
|
||||
response.set_cookie(
|
||||
key="mcp_session",
|
||||
value=user_id,
|
||||
max_age=86400 * 30, # 30 days
|
||||
httponly=True,
|
||||
secure=False, # Set to True in production with HTTPS
|
||||
secure=_should_use_secure_cookies(),
|
||||
samesite="lax",
|
||||
)
|
||||
|
||||
|
||||
@@ -517,12 +517,23 @@ async def oauth_callback_nextcloud(request: Request):
|
||||
token_data.get("scope", "").split() if token_data.get("scope") else None
|
||||
)
|
||||
|
||||
# Calculate refresh token expiration from token response
|
||||
refresh_expires_in = token_data.get("refresh_expires_in")
|
||||
refresh_expires_at = None
|
||||
if refresh_expires_in:
|
||||
import time
|
||||
|
||||
refresh_expires_at = int(time.time()) + refresh_expires_in
|
||||
logger.info(f" refresh_expires_in: {refresh_expires_in}s")
|
||||
logger.info(f" refresh_expires_at: {refresh_expires_at}")
|
||||
|
||||
logger.info("Storing refresh token:")
|
||||
logger.info(f" user_id: {user_id}")
|
||||
logger.info(" flow_type: flow2")
|
||||
logger.info(" token_audience: nextcloud")
|
||||
logger.info(f" provisioning_client_id: {state[:16]}...")
|
||||
logger.info(f" scopes: {granted_scopes}")
|
||||
logger.info(f" expires_at: {refresh_expires_at}")
|
||||
|
||||
await storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
@@ -531,7 +542,7 @@ async def oauth_callback_nextcloud(request: Request):
|
||||
token_audience="nextcloud",
|
||||
provisioning_client_id=state, # Store which client initiated provisioning
|
||||
scopes=granted_scopes,
|
||||
expires_at=None, # Refresh tokens typically don't expire
|
||||
expires_at=refresh_expires_at,
|
||||
)
|
||||
logger.info(f"✓ Stored Flow 2 master refresh token for user {user_id}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
@@ -190,3 +190,30 @@
|
||||
color: var(--color-text-maxcontrast);
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* PDF highlighted image styles */
|
||||
.chunk-image-container {
|
||||
margin-bottom: 16px;
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--border-radius);
|
||||
overflow: hidden;
|
||||
background: #fff;
|
||||
}
|
||||
.chunk-image-header {
|
||||
background: var(--color-background-dark);
|
||||
padding: 8px 12px;
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
color: var(--color-text-maxcontrast);
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
font-family: var(--font-face);
|
||||
}
|
||||
.chunk-highlighted-image {
|
||||
display: block;
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
cursor: zoom-in;
|
||||
}
|
||||
.chunk-highlighted-image:hover {
|
||||
opacity: 0.95;
|
||||
}
|
||||
|
||||
@@ -201,8 +201,15 @@ function vizApp() {
|
||||
return `${baseUrl}/apps/calendar`;
|
||||
case 'contact':
|
||||
return `${baseUrl}/apps/contacts`;
|
||||
case 'deck':
|
||||
case 'deck_card':
|
||||
// URL pattern: /apps/deck/board/:boardId/card/:cardId
|
||||
if (result.metadata && result.metadata.board_id) {
|
||||
return `${baseUrl}/apps/deck/board/${result.metadata.board_id}/card/${result.id}`;
|
||||
}
|
||||
// Fallback if board_id not available
|
||||
return `${baseUrl}/apps/deck`;
|
||||
case 'news_item':
|
||||
return `${baseUrl}/apps/news/item/${result.id}`;
|
||||
default:
|
||||
return `${baseUrl}`;
|
||||
}
|
||||
@@ -217,7 +224,7 @@ function vizApp() {
|
||||
},
|
||||
|
||||
async toggleChunk(result) {
|
||||
const resultKey = `${result.doc_type}_${result.id}`;
|
||||
const resultKey = `${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`;
|
||||
|
||||
if (this.isChunkExpanded(resultKey)) {
|
||||
delete this.expandedChunks[resultKey];
|
||||
|
||||
@@ -117,7 +117,14 @@ class RefreshTokenStorage:
|
||||
return cls(db_path=db_path, encryption_key=encryption_key)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize database schema"""
|
||||
"""
|
||||
Initialize database schema using Alembic migrations.
|
||||
|
||||
This method handles three scenarios:
|
||||
1. New database: Run migrations from scratch
|
||||
2. Pre-Alembic database: Stamp with initial revision (no changes)
|
||||
3. Alembic-managed database: Upgrade to latest version
|
||||
"""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
@@ -125,137 +132,59 @@ class RefreshTokenStorage:
|
||||
db_dir = Path(self.db_path).parent
|
||||
db_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Set restrictive permissions on database file
|
||||
# Set restrictive permissions on database file if it exists
|
||||
if Path(self.db_path).exists():
|
||||
os.chmod(self.db_path, 0o600)
|
||||
|
||||
# Check database state and run appropriate migration strategy
|
||||
async with aiosqlite.connect(self.db_path) as db:
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS refresh_tokens (
|
||||
user_id TEXT PRIMARY KEY,
|
||||
encrypted_token BLOB NOT NULL,
|
||||
expires_at INTEGER,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
-- ADR-004 Progressive Consent fields
|
||||
flow_type TEXT DEFAULT 'hybrid', -- 'hybrid', 'flow1', 'flow2'
|
||||
token_audience TEXT DEFAULT 'nextcloud', -- 'mcp-server' or 'nextcloud'
|
||||
provisioned_at INTEGER, -- When Flow 2 was completed
|
||||
provisioning_client_id TEXT, -- Which MCP client initiated Flow 1
|
||||
scopes TEXT, -- JSON array of granted scopes
|
||||
-- Browser session profile cache
|
||||
user_profile TEXT, -- JSON cache of IdP user profile (for browser UI only)
|
||||
profile_cached_at INTEGER -- When profile was last cached
|
||||
# Check if database is managed by Alembic
|
||||
cursor = await db.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='alembic_version'"
|
||||
)
|
||||
has_alembic = await cursor.fetchone() is not None
|
||||
|
||||
if not has_alembic:
|
||||
# Check if this is a pre-Alembic database with existing schema
|
||||
cursor = await db.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='refresh_tokens'"
|
||||
)
|
||||
"""
|
||||
)
|
||||
has_schema = await cursor.fetchone() is not None
|
||||
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS audit_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
user_id TEXT NOT NULL,
|
||||
resource_type TEXT,
|
||||
resource_id TEXT,
|
||||
auth_method TEXT,
|
||||
hostname TEXT
|
||||
if has_schema:
|
||||
logger.info(
|
||||
f"Detected pre-Alembic database at {self.db_path}, "
|
||||
"stamping with initial revision"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Initializing new database at {self.db_path} with migrations"
|
||||
)
|
||||
|
||||
# Run migrations in a worker thread using anyio.to_thread
|
||||
# This allows Alembic to run its own async operations in a separate context
|
||||
from anyio import to_thread
|
||||
|
||||
from nextcloud_mcp_server.migrations import stamp_database, upgrade_database
|
||||
|
||||
if not has_alembic:
|
||||
if has_schema:
|
||||
# Stamp existing database without running migrations
|
||||
await to_thread.run_sync(stamp_database, self.db_path, "001")
|
||||
logger.info(
|
||||
"Pre-Alembic database stamped successfully. "
|
||||
"Future schema changes will use migrations."
|
||||
)
|
||||
"""
|
||||
)
|
||||
else:
|
||||
# New database - run migrations
|
||||
await to_thread.run_sync(upgrade_database, self.db_path, "head")
|
||||
logger.info("Database initialized with migrations")
|
||||
else:
|
||||
# Alembic-managed database - upgrade to latest
|
||||
await to_thread.run_sync(upgrade_database, self.db_path, "head")
|
||||
logger.info("Database upgraded to latest version")
|
||||
|
||||
# Create index on audit logs for efficient queries
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_audit_user_timestamp "
|
||||
"ON audit_logs(user_id, timestamp)"
|
||||
)
|
||||
|
||||
# OAuth client credentials storage
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS oauth_clients (
|
||||
id INTEGER PRIMARY KEY,
|
||||
client_id TEXT UNIQUE NOT NULL,
|
||||
encrypted_client_secret BLOB NOT NULL,
|
||||
client_id_issued_at INTEGER NOT NULL,
|
||||
client_secret_expires_at INTEGER NOT NULL,
|
||||
redirect_uris TEXT NOT NULL,
|
||||
encrypted_registration_access_token BLOB,
|
||||
registration_client_uri TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# OAuth flow sessions (ADR-004 Progressive Consent)
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS oauth_sessions (
|
||||
session_id TEXT PRIMARY KEY,
|
||||
client_id TEXT,
|
||||
client_redirect_uri TEXT NOT NULL,
|
||||
state TEXT,
|
||||
code_challenge TEXT,
|
||||
code_challenge_method TEXT,
|
||||
mcp_authorization_code TEXT UNIQUE,
|
||||
idp_access_token TEXT,
|
||||
idp_refresh_token TEXT,
|
||||
user_id TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
expires_at INTEGER NOT NULL,
|
||||
-- ADR-004 Progressive Consent fields
|
||||
flow_type TEXT DEFAULT 'hybrid', -- 'hybrid', 'flow1', 'flow2'
|
||||
requested_scopes TEXT, -- JSON array of requested scopes
|
||||
granted_scopes TEXT, -- JSON array of granted scopes
|
||||
is_provisioning BOOLEAN DEFAULT FALSE -- True if this is a Flow 2 provisioning session
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Create index for MCP authorization code lookups
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_oauth_sessions_mcp_code "
|
||||
"ON oauth_sessions(mcp_authorization_code)"
|
||||
)
|
||||
|
||||
# Schema version tracking
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
version INTEGER PRIMARY KEY,
|
||||
applied_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Registered webhooks tracking (both BasicAuth and OAuth modes)
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS registered_webhooks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
webhook_id INTEGER NOT NULL UNIQUE,
|
||||
preset_id TEXT NOT NULL,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Create indexes for efficient webhook queries
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_webhooks_preset "
|
||||
"ON registered_webhooks(preset_id)"
|
||||
)
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_webhooks_created "
|
||||
"ON registered_webhooks(created_at)"
|
||||
)
|
||||
|
||||
await db.commit()
|
||||
|
||||
# Set restrictive permissions after creation
|
||||
# Set restrictive permissions after initialization
|
||||
os.chmod(self.db_path, 0o600)
|
||||
|
||||
self._initialized = True
|
||||
@@ -287,6 +216,8 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
encrypted_token = self.cipher.encrypt(refresh_token.encode())
|
||||
now = int(time.time())
|
||||
scopes_json = json.dumps(scopes) if scopes else None
|
||||
@@ -432,6 +363,9 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
|
||||
start_time = time.time()
|
||||
try:
|
||||
async with aiosqlite.connect(self.db_path) as db:
|
||||
@@ -516,6 +450,9 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
|
||||
async with aiosqlite.connect(self.db_path) as db:
|
||||
async with db.execute(
|
||||
"""
|
||||
@@ -687,6 +624,9 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
|
||||
# Encrypt sensitive data
|
||||
encrypted_secret = self.cipher.encrypt(client_secret.encode())
|
||||
encrypted_reg_token = (
|
||||
@@ -757,6 +697,9 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
|
||||
async with aiosqlite.connect(self.db_path) as db:
|
||||
async with db.execute(
|
||||
"""
|
||||
|
||||
@@ -65,8 +65,12 @@
|
||||
<span>Contacts</span>
|
||||
</label>
|
||||
<label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
|
||||
<input type="checkbox" x-model="docTypes" value="deck" style="margin-right: 4px;">
|
||||
<span>Deck</span>
|
||||
<input type="checkbox" x-model="docTypes" value="deck_card" style="margin-right: 4px;">
|
||||
<span>Deck Cards</span>
|
||||
</label>
|
||||
<label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
|
||||
<input type="checkbox" x-model="docTypes" value="news_item" style="margin-right: 4px;">
|
||||
<span>News</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
@@ -117,12 +121,13 @@
|
||||
|
||||
<template x-if="!loading && results.length > 0">
|
||||
<div x-transition.opacity.duration.200ms>
|
||||
<template x-for="result in results" :key="result.id">
|
||||
<template x-for="result in results" :key="`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`">
|
||||
<div style="padding: 12px; border-bottom: 1px solid #eee;">
|
||||
<a :href="getNextcloudUrl(result)" target="_blank" style="font-weight: 500; color: #0066cc; text-decoration: none;">
|
||||
<span x-text="result.title"></span>
|
||||
</a>
|
||||
<div style="font-size: 14px; color: #666; margin-top: 4px;" x-text="result.excerpt"></div>
|
||||
<div style="font-size: 14px; color: #666; margin-top: 4px;"
|
||||
x-text="result.excerpt.length > 200 ? result.excerpt.substring(0, 200) + '...' : result.excerpt"></div>
|
||||
<div style="font-size: 12px; color: #999; margin-top: 4px;">
|
||||
Raw Score: <span x-text="result.original_score.toFixed(3)"></span>
|
||||
(<span x-text="(result.score * 100).toFixed(0)"></span>% relative) |
|
||||
@@ -134,22 +139,36 @@
|
||||
<button
|
||||
class="chunk-toggle-btn"
|
||||
@click="toggleChunk(result)"
|
||||
x-text="isChunkExpanded(`${result.doc_type}_${result.id}`) ? 'Hide Chunk' : 'Show Chunk'"
|
||||
x-text="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`) ? 'Hide Chunk' : 'Show Chunk'"
|
||||
></button>
|
||||
</template>
|
||||
|
||||
<!-- Chunk context (expanded inline) -->
|
||||
<template x-if="isChunkExpanded(`${result.doc_type}_${result.id}`)">
|
||||
<template x-if="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`)">
|
||||
<div class="chunk-context" x-transition.opacity.duration.200ms>
|
||||
<template x-if="chunkLoading[`${result.doc_type}_${result.id}`]">
|
||||
<template x-if="chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
|
||||
<div style="color: #666; font-style: italic;">Loading chunk...</div>
|
||||
</template>
|
||||
<template x-if="!chunkLoading[`${result.doc_type}_${result.id}`]">
|
||||
<template x-if="!chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
|
||||
<div>
|
||||
<template x-if="expandedChunks[`${result.doc_type}_${result.id}`]?.has_more_before">
|
||||
<!-- Highlighted page image for PDFs -->
|
||||
<template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image">
|
||||
<div class="chunk-image-container">
|
||||
<div class="chunk-image-header">
|
||||
<span>Page <span x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"></span></span>
|
||||
</div>
|
||||
<img
|
||||
:src="'data:image/png;base64,' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image"
|
||||
:alt="'Page ' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"
|
||||
class="chunk-highlighted-image"
|
||||
/>
|
||||
</div>
|
||||
</template>
|
||||
<!-- Text context -->
|
||||
<template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_before">
|
||||
<span class="chunk-ellipsis">...</span>
|
||||
</template>
|
||||
<span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}`]?.before_context"></span><span class="chunk-matched" x-text="expandedChunks[`${result.doc_type}_${result.id}`]?.chunk_text"></span><span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}`]?.after_context"></span><template x-if="expandedChunks[`${result.doc_type}_${result.id}`]?.has_more_after">
|
||||
<span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.before_context"></span><span class="chunk-matched" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.chunk_text"></span><span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.after_context"></span><template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_after">
|
||||
<span class="chunk-ellipsis">...</span>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
@@ -21,7 +21,6 @@ from typing import Dict, Optional, Tuple
|
||||
import anyio
|
||||
import httpx
|
||||
import jwt
|
||||
from cryptography.fernet import Fernet
|
||||
|
||||
from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
|
||||
from nextcloud_mcp_server.auth.token_exchange import exchange_token_for_delegation
|
||||
@@ -104,7 +103,8 @@ class TokenBrokerService:
|
||||
storage: RefreshTokenStorage,
|
||||
oidc_discovery_url: str,
|
||||
nextcloud_host: str,
|
||||
encryption_key: str,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
cache_ttl: int = 300,
|
||||
cache_early_refresh: int = 30,
|
||||
):
|
||||
@@ -112,23 +112,25 @@ class TokenBrokerService:
|
||||
Initialize the Token Broker Service.
|
||||
|
||||
Args:
|
||||
storage: Database storage for refresh tokens
|
||||
storage: Database storage for refresh tokens (handles encryption internally)
|
||||
oidc_discovery_url: OIDC provider discovery URL
|
||||
nextcloud_host: Nextcloud server URL
|
||||
encryption_key: Fernet key for token encryption
|
||||
client_id: OAuth client ID for token operations
|
||||
client_secret: OAuth client secret for token operations
|
||||
cache_ttl: Cache TTL in seconds (default: 5 minutes)
|
||||
cache_early_refresh: Early refresh threshold in seconds (default: 30 seconds)
|
||||
"""
|
||||
self.storage = storage
|
||||
self.oidc_discovery_url = oidc_discovery_url
|
||||
self.nextcloud_host = nextcloud_host
|
||||
self.fernet = Fernet(
|
||||
encryption_key.encode()
|
||||
if isinstance(encryption_key, str)
|
||||
else encryption_key
|
||||
)
|
||||
self.client_id = client_id
|
||||
self.client_secret = client_secret
|
||||
self.cache = TokenCache(cache_ttl, cache_early_refresh)
|
||||
self._oidc_config = None
|
||||
|
||||
# Per-user locks for token refresh operations (prevents race conditions)
|
||||
self._user_refresh_locks: dict[str, anyio.Lock] = {}
|
||||
self._locks_lock = anyio.Lock() # Protects the locks dict itself
|
||||
self._http_client = None
|
||||
|
||||
async def _get_http_client(self) -> httpx.AsyncClient:
|
||||
@@ -139,6 +141,24 @@ class TokenBrokerService:
|
||||
)
|
||||
return self._http_client
|
||||
|
||||
async def _get_user_refresh_lock(self, user_id: str) -> anyio.Lock:
|
||||
"""
|
||||
Get or create a lock for a specific user's refresh operations.
|
||||
|
||||
This prevents race conditions when multiple concurrent requests
|
||||
attempt to refresh the same user's token simultaneously.
|
||||
|
||||
Args:
|
||||
user_id: User ID to get lock for
|
||||
|
||||
Returns:
|
||||
anyio.Lock for this user's refresh operations
|
||||
"""
|
||||
async with self._locks_lock:
|
||||
if user_id not in self._user_refresh_locks:
|
||||
self._user_refresh_locks[user_id] = anyio.Lock()
|
||||
return self._user_refresh_locks[user_id]
|
||||
|
||||
async def _get_oidc_config(self) -> dict:
|
||||
"""Get OIDC configuration from discovery endpoint."""
|
||||
if self._oidc_config is None:
|
||||
@@ -148,6 +168,37 @@ class TokenBrokerService:
|
||||
self._oidc_config = response.json()
|
||||
return self._oidc_config
|
||||
|
||||
def _rewrite_token_endpoint(self, token_endpoint: str) -> str:
|
||||
"""Rewrite token endpoint from public URL to internal Docker URL.
|
||||
|
||||
OIDC discovery documents return public URLs (e.g., http://localhost:8080/...)
|
||||
but server-side requests must use internal Docker network (e.g., http://app:80/...).
|
||||
|
||||
Args:
|
||||
token_endpoint: Token endpoint URL from discovery document
|
||||
|
||||
Returns:
|
||||
Rewritten URL using internal Docker host
|
||||
"""
|
||||
import os
|
||||
from urllib.parse import urlparse
|
||||
|
||||
public_issuer = os.getenv("NEXTCLOUD_PUBLIC_ISSUER_URL")
|
||||
if not public_issuer:
|
||||
return token_endpoint
|
||||
|
||||
internal_parsed = urlparse(self.nextcloud_host)
|
||||
token_parsed = urlparse(token_endpoint)
|
||||
public_parsed = urlparse(public_issuer)
|
||||
|
||||
if token_parsed.hostname == public_parsed.hostname:
|
||||
# Replace public URL with internal Docker URL
|
||||
rewritten = f"{internal_parsed.scheme}://{internal_parsed.netloc}{token_parsed.path}"
|
||||
logger.info(f"Rewrote token endpoint: {token_endpoint} -> {rewritten}")
|
||||
return rewritten
|
||||
|
||||
return token_endpoint
|
||||
|
||||
async def get_nextcloud_token(self, user_id: str) -> Optional[str]:
|
||||
"""
|
||||
Get a valid Nextcloud access token for the user.
|
||||
@@ -180,9 +231,8 @@ class TokenBrokerService:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Decrypt refresh token
|
||||
encrypted_token = refresh_data["refresh_token"]
|
||||
refresh_token = self.fernet.decrypt(encrypted_token.encode()).decode()
|
||||
# storage.get_refresh_token() returns already-decrypted token
|
||||
refresh_token = refresh_data["refresh_token"]
|
||||
|
||||
# Exchange refresh token for new access token
|
||||
access_token, expires_in = await self._refresh_access_token(refresh_token)
|
||||
@@ -271,41 +321,79 @@ class TokenBrokerService:
|
||||
"""
|
||||
# Check cache first (background tokens can be cached)
|
||||
cache_key = f"{user_id}:background:{','.join(sorted(required_scopes))}"
|
||||
refresh_in_progress_key = f"{user_id}:refresh_in_progress"
|
||||
|
||||
cached_token = await self.cache.get(cache_key)
|
||||
if cached_token:
|
||||
return cached_token
|
||||
|
||||
# Get stored refresh token
|
||||
refresh_data = await self.storage.get_refresh_token(user_id)
|
||||
if not refresh_data:
|
||||
logger.info(f"No refresh token found for user {user_id}")
|
||||
return None
|
||||
# Acquire per-user lock BEFORE refresh operation to prevent race conditions
|
||||
refresh_lock = await self._get_user_refresh_lock(user_id)
|
||||
async with refresh_lock:
|
||||
# Double-check cache after acquiring lock
|
||||
# (another thread may have refreshed while we waited)
|
||||
cached_token = await self.cache.get(cache_key)
|
||||
if cached_token:
|
||||
logger.debug(
|
||||
f"Token found in cache after lock acquisition for user {user_id}"
|
||||
)
|
||||
return cached_token
|
||||
|
||||
try:
|
||||
# Decrypt refresh token
|
||||
encrypted_token = refresh_data["refresh_token"]
|
||||
refresh_token = self.fernet.decrypt(encrypted_token.encode()).decode()
|
||||
# Check if another thread is currently refreshing
|
||||
if await self.cache.get(refresh_in_progress_key):
|
||||
logger.debug(f"Refresh in progress for user {user_id}, waiting briefly")
|
||||
await anyio.sleep(0.1) # Brief wait for in-progress refresh
|
||||
# Check cache one more time after wait
|
||||
cached_token = await self.cache.get(cache_key)
|
||||
if cached_token:
|
||||
logger.debug(
|
||||
f"Token refreshed by another thread for user {user_id}"
|
||||
)
|
||||
return cached_token
|
||||
|
||||
# Get token with specific scopes for background operation
|
||||
access_token, expires_in = await self._refresh_access_token_with_scopes(
|
||||
refresh_token, required_scopes
|
||||
)
|
||||
# Mark refresh as in-progress
|
||||
await self.cache.set(refresh_in_progress_key, "true", expires_in=5)
|
||||
|
||||
# Cache the background token
|
||||
await self.cache.set(cache_key, access_token, expires_in)
|
||||
try:
|
||||
# Get stored refresh token
|
||||
refresh_data = await self.storage.get_refresh_token(user_id)
|
||||
if not refresh_data:
|
||||
logger.info(f"No refresh token found for user {user_id}")
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
f"Generated background token for user {user_id} with scopes: {required_scopes}"
|
||||
)
|
||||
# storage.get_refresh_token() returns already-decrypted token
|
||||
refresh_token = refresh_data["refresh_token"]
|
||||
|
||||
return access_token
|
||||
# Get token with specific scopes for background operation
|
||||
# Pass user_id to enable refresh token rotation storage
|
||||
access_token, expires_in = await self._refresh_access_token_with_scopes(
|
||||
refresh_token, required_scopes, user_id=user_id
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get background token for user {user_id}: {e}")
|
||||
await self.cache.invalidate(cache_key)
|
||||
return None
|
||||
# Cache the background token
|
||||
await self.cache.set(cache_key, access_token, expires_in)
|
||||
|
||||
async def _refresh_access_token(self, refresh_token: str) -> Tuple[str, int]:
|
||||
logger.info(
|
||||
f"Generated background token for user {user_id} with scopes: {required_scopes}"
|
||||
)
|
||||
|
||||
return access_token
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get background token for user {user_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
await self.cache.invalidate(cache_key)
|
||||
return None
|
||||
|
||||
finally:
|
||||
# Always clear the in-progress marker
|
||||
await self.cache.invalidate(refresh_in_progress_key)
|
||||
|
||||
async def _refresh_access_token(
|
||||
self, refresh_token: str, user_id: str | None = None
|
||||
) -> Tuple[str, int]:
|
||||
"""
|
||||
Exchange refresh token for new access token.
|
||||
|
||||
@@ -313,20 +401,24 @@ class TokenBrokerService:
|
||||
|
||||
Args:
|
||||
refresh_token: The refresh token
|
||||
user_id: If provided, store the rotated refresh token for this user
|
||||
|
||||
Returns:
|
||||
Tuple of (access_token, expires_in_seconds)
|
||||
"""
|
||||
config = await self._get_oidc_config()
|
||||
token_endpoint = config["token_endpoint"]
|
||||
token_endpoint = self._rewrite_token_endpoint(config["token_endpoint"])
|
||||
|
||||
client = await self._get_http_client()
|
||||
|
||||
# Request new access token using refresh token
|
||||
# Include client credentials as required by most OAuth servers
|
||||
data = {
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"scope": "openid profile email notes:read notes:write calendar:read calendar:write",
|
||||
"scope": "openid profile email offline_access notes:read notes:write calendar:read calendar:write",
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
@@ -345,42 +437,69 @@ class TokenBrokerService:
|
||||
access_token = token_data["access_token"]
|
||||
expires_in = token_data.get("expires_in", 3600) # Default 1 hour
|
||||
|
||||
# Validate audience
|
||||
await self._validate_token_audience(access_token, "nextcloud")
|
||||
# Handle refresh token rotation (Nextcloud OIDC rotates on every use)
|
||||
new_refresh_token = token_data.get("refresh_token")
|
||||
if user_id and new_refresh_token and new_refresh_token != refresh_token:
|
||||
# Calculate expiry as Unix timestamp (90 days from now)
|
||||
expires_at = int(
|
||||
(datetime.now(timezone.utc) + timedelta(days=90)).timestamp()
|
||||
)
|
||||
await self.storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
refresh_token=new_refresh_token,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
logger.info(f"Stored rotated refresh token for user {user_id}")
|
||||
|
||||
# Note: Nextcloud validates token audience on API calls - no need to pre-validate here
|
||||
|
||||
logger.info(f"Refreshed access token (expires in {expires_in}s)")
|
||||
return access_token, expires_in
|
||||
|
||||
async def _refresh_access_token_with_scopes(
|
||||
self, refresh_token: str, required_scopes: list[str]
|
||||
self, refresh_token: str, required_scopes: list[str], user_id: str | None = None
|
||||
) -> Tuple[str, int]:
|
||||
"""
|
||||
Exchange refresh token for new access token with specific scopes.
|
||||
|
||||
This method implements scope downscoping for least privilege.
|
||||
|
||||
IMPORTANT: Nextcloud OIDC rotates refresh tokens on every use (one-time use).
|
||||
When user_id is provided, this method stores the new refresh token returned
|
||||
by Nextcloud to ensure subsequent refresh operations succeed.
|
||||
|
||||
Args:
|
||||
refresh_token: The refresh token
|
||||
required_scopes: Minimal scopes needed for this operation
|
||||
user_id: If provided, store the rotated refresh token for this user
|
||||
|
||||
Returns:
|
||||
Tuple of (access_token, expires_in_seconds)
|
||||
"""
|
||||
config = await self._get_oidc_config()
|
||||
token_endpoint = config["token_endpoint"]
|
||||
token_endpoint = self._rewrite_token_endpoint(config["token_endpoint"])
|
||||
|
||||
client = await self._get_http_client()
|
||||
|
||||
# Always include basic OpenID scopes
|
||||
scopes = list(set(["openid", "profile", "email"] + required_scopes))
|
||||
# Always include basic OpenID scopes + offline_access to get new refresh token
|
||||
scopes = list(
|
||||
set(["openid", "profile", "email", "offline_access"] + required_scopes)
|
||||
)
|
||||
|
||||
# Request new access token with specific scopes
|
||||
# Include client credentials as required by most OAuth servers
|
||||
data = {
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"scope": " ".join(scopes),
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Token refresh request to {token_endpoint} with client_id={self.client_id[:16]}..."
|
||||
)
|
||||
|
||||
response = await client.post(
|
||||
token_endpoint,
|
||||
data=data,
|
||||
@@ -391,14 +510,29 @@ class TokenBrokerService:
|
||||
logger.error(
|
||||
f"Token refresh with scopes failed: {response.status_code} - {response.text}"
|
||||
)
|
||||
logger.error(f" client_id used: {self.client_id[:16]}...")
|
||||
raise Exception(f"Token refresh failed: {response.status_code}")
|
||||
|
||||
token_data = response.json()
|
||||
access_token = token_data["access_token"]
|
||||
expires_in = token_data.get("expires_in", 3600) # Default 1 hour
|
||||
|
||||
# Validate audience
|
||||
await self._validate_token_audience(access_token, "nextcloud")
|
||||
# Handle refresh token rotation (Nextcloud OIDC rotates on every use)
|
||||
new_refresh_token = token_data.get("refresh_token")
|
||||
if user_id and new_refresh_token and new_refresh_token != refresh_token:
|
||||
# Store the new refresh token for future use
|
||||
# Calculate expiry as Unix timestamp (90 days from now)
|
||||
expires_at = int(
|
||||
(datetime.now(timezone.utc) + timedelta(days=90)).timestamp()
|
||||
)
|
||||
await self.storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
refresh_token=new_refresh_token,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
logger.info(f"Stored rotated refresh token for user {user_id}")
|
||||
|
||||
# Note: Nextcloud validates token audience on API calls - no need to pre-validate here
|
||||
|
||||
logger.info(
|
||||
f"Refreshed access token with scopes {scopes} (expires in {expires_in}s)"
|
||||
@@ -453,11 +587,8 @@ class TokenBrokerService:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Decrypt current refresh token
|
||||
encrypted_token = refresh_data["refresh_token"]
|
||||
current_refresh_token = self.fernet.decrypt(
|
||||
encrypted_token.encode()
|
||||
).decode()
|
||||
# storage.get_refresh_token() returns already-decrypted token
|
||||
current_refresh_token = refresh_data["refresh_token"]
|
||||
|
||||
# Get OIDC configuration
|
||||
config = await self._get_oidc_config()
|
||||
@@ -486,13 +617,15 @@ class TokenBrokerService:
|
||||
new_refresh_token = token_data.get("refresh_token")
|
||||
|
||||
if new_refresh_token and new_refresh_token != current_refresh_token:
|
||||
# Encrypt and store new refresh token
|
||||
encrypted_new = self.fernet.encrypt(new_refresh_token.encode()).decode()
|
||||
# storage.store_refresh_token() handles encryption internally
|
||||
# Convert datetime to Unix timestamp (int) for database storage
|
||||
expires_at = int(
|
||||
(datetime.now(timezone.utc) + timedelta(days=90)).timestamp()
|
||||
)
|
||||
await self.storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
refresh_token=encrypted_new,
|
||||
expires_at=datetime.now(timezone.utc)
|
||||
+ timedelta(days=90), # 90-day expiry
|
||||
refresh_token=new_refresh_token,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
logger.info(f"Rotated master refresh token for user {user_id}")
|
||||
|
||||
@@ -536,11 +669,8 @@ class TokenBrokerService:
|
||||
refresh_data = await self.storage.get_refresh_token(user_id)
|
||||
if refresh_data:
|
||||
try:
|
||||
# Attempt to revoke at IdP
|
||||
encrypted_token = refresh_data["refresh_token"]
|
||||
refresh_token = self.fernet.decrypt(
|
||||
encrypted_token.encode()
|
||||
).decode()
|
||||
# storage.get_refresh_token() returns already-decrypted token
|
||||
refresh_token = refresh_data["refresh_token"]
|
||||
await self._revoke_token_at_idp(refresh_token)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to revoke at IdP: {e}")
|
||||
|
||||
@@ -303,10 +303,13 @@ class UnifiedTokenVerifier(TokenVerifier):
|
||||
|
||||
try:
|
||||
# Introspection requires client authentication
|
||||
client_id = self.settings.oidc_client_id
|
||||
client_secret = self.settings.oidc_client_secret
|
||||
assert client_id is not None and client_secret is not None
|
||||
response = await self.http_client.post(
|
||||
self.introspection_uri,
|
||||
data={"token": token},
|
||||
auth=(self.settings.oidc_client_id, self.settings.oidc_client_secret),
|
||||
auth=(client_id, client_secret),
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
|
||||
@@ -18,6 +18,8 @@ from starlette.authentication import requires
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import HTMLResponse, JSONResponse
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Setup Jinja2 environment for templates
|
||||
@@ -25,14 +27,20 @@ _template_dir = Path(__file__).parent / "templates"
|
||||
_jinja_env = Environment(loader=FileSystemLoader(_template_dir))
|
||||
|
||||
|
||||
async def _get_authenticated_client_for_userinfo(request: Request) -> httpx.AsyncClient:
|
||||
"""Get an authenticated HTTP client for user info page operations.
|
||||
async def _get_authenticated_client_for_userinfo(request: Request) -> NextcloudClient:
|
||||
"""Get an authenticated Nextcloud client for user info page operations.
|
||||
|
||||
This is a shared helper for authenticated routes that need to access
|
||||
Nextcloud APIs. It handles both BasicAuth and OAuth authentication modes.
|
||||
|
||||
Args:
|
||||
request: Starlette request object
|
||||
|
||||
Returns:
|
||||
Authenticated httpx.AsyncClient
|
||||
Authenticated NextcloudClient
|
||||
|
||||
Raises:
|
||||
RuntimeError: If credentials/session not configured
|
||||
"""
|
||||
oauth_ctx = getattr(request.app.state, "oauth_context", None)
|
||||
|
||||
@@ -45,11 +53,15 @@ async def _get_authenticated_client_for_userinfo(request: Request) -> httpx.Asyn
|
||||
if not all([nextcloud_host, username, password]):
|
||||
raise RuntimeError("BasicAuth credentials not configured")
|
||||
|
||||
assert nextcloud_host is not None # Type narrowing for type checker
|
||||
return httpx.AsyncClient(
|
||||
from httpx import BasicAuth
|
||||
|
||||
assert nextcloud_host is not None
|
||||
assert username is not None
|
||||
assert password is not None
|
||||
return NextcloudClient(
|
||||
base_url=nextcloud_host,
|
||||
auth=(username, password),
|
||||
timeout=30.0,
|
||||
username=username,
|
||||
auth=BasicAuth(username, password),
|
||||
)
|
||||
|
||||
# OAuth mode - get token from session
|
||||
@@ -64,15 +76,14 @@ async def _get_authenticated_client_for_userinfo(request: Request) -> httpx.Asyn
|
||||
raise RuntimeError("No access token found in session")
|
||||
|
||||
access_token = token_data["access_token"]
|
||||
username = token_data.get("username")
|
||||
nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
|
||||
|
||||
if not nextcloud_host:
|
||||
raise RuntimeError("Nextcloud host not configured")
|
||||
if not nextcloud_host or not username:
|
||||
raise RuntimeError("Nextcloud host or username not configured")
|
||||
|
||||
return httpx.AsyncClient(
|
||||
base_url=nextcloud_host,
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=30.0,
|
||||
return NextcloudClient.from_token(
|
||||
base_url=nextcloud_host, token=access_token, username=username
|
||||
)
|
||||
|
||||
|
||||
@@ -423,10 +434,10 @@ async def user_info_html(request: Request) -> HTMLResponse:
|
||||
try:
|
||||
from nextcloud_mcp_server.auth.permissions import is_nextcloud_admin
|
||||
|
||||
# Get authenticated HTTP client
|
||||
http_client = await _get_authenticated_client_for_userinfo(request)
|
||||
is_admin = await is_nextcloud_admin(request, http_client)
|
||||
await http_client.aclose()
|
||||
# Get authenticated Nextcloud client
|
||||
nc_client = await _get_authenticated_client_for_userinfo(request)
|
||||
is_admin = await is_nextcloud_admin(request, nc_client._client)
|
||||
await nc_client.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to check admin status: {e}")
|
||||
# Default to not admin if check fails
|
||||
|
||||
@@ -22,11 +22,13 @@ from starlette.requests import Request
|
||||
from starlette.responses import HTMLResponse, JSONResponse
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.observability.tracing import trace_operation
|
||||
from nextcloud_mcp_server.search import (
|
||||
BM25HybridSearchAlgorithm,
|
||||
SemanticSearchAlgorithm,
|
||||
)
|
||||
from nextcloud_mcp_server.vector.pca import PCA
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -138,7 +140,10 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
_get_authenticated_client_for_userinfo,
|
||||
)
|
||||
|
||||
async with await _get_authenticated_client_for_userinfo(request) as http_client: # noqa: F841
|
||||
with trace_operation("vector_viz.get_auth_client"):
|
||||
auth_client_ctx = await _get_authenticated_client_for_userinfo(request)
|
||||
|
||||
async with auth_client_ctx as nc_client: # noqa: F841
|
||||
# Create search algorithm (no client needed - verification removed)
|
||||
if algorithm == "semantic":
|
||||
search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
|
||||
@@ -158,24 +163,40 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
all_results = []
|
||||
if doc_types is None or len(doc_types) == 0:
|
||||
# Cross-app search - search all indexed types
|
||||
unverified_results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=username,
|
||||
limit=limit * 2, # Buffer for verification filtering
|
||||
doc_type=None, # Search all types
|
||||
score_threshold=score_threshold,
|
||||
)
|
||||
all_results.extend(unverified_results)
|
||||
else:
|
||||
# Search each document type and combine
|
||||
for doc_type in doc_types:
|
||||
with trace_operation(
|
||||
"vector_viz.search_execute",
|
||||
attributes={
|
||||
"search.algorithm": algorithm,
|
||||
"search.limit": limit * 2,
|
||||
"search.doc_type": "all",
|
||||
},
|
||||
):
|
||||
unverified_results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=username,
|
||||
limit=limit * 2, # Buffer for verification filtering
|
||||
doc_type=doc_type,
|
||||
doc_type=None, # Search all types
|
||||
score_threshold=score_threshold,
|
||||
)
|
||||
all_results.extend(unverified_results)
|
||||
else:
|
||||
# Search each document type and combine
|
||||
for doc_type in doc_types:
|
||||
with trace_operation(
|
||||
"vector_viz.search_execute",
|
||||
attributes={
|
||||
"search.algorithm": algorithm,
|
||||
"search.limit": limit * 2,
|
||||
"search.doc_type": doc_type,
|
||||
},
|
||||
):
|
||||
unverified_results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=username,
|
||||
limit=limit * 2, # Buffer for verification filtering
|
||||
doc_type=doc_type,
|
||||
score_threshold=score_threshold,
|
||||
)
|
||||
all_results.extend(unverified_results)
|
||||
# Sort by score before verification
|
||||
all_results.sort(key=lambda r: r.score, reverse=True)
|
||||
@@ -189,22 +210,26 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
# Store original scores and normalize for visualization
|
||||
# (best result = 1.0, worst result = 0.0 within THIS result set)
|
||||
# This makes visual encoding meaningful regardless of RRF normalization
|
||||
if search_results:
|
||||
scores = [r.score for r in search_results]
|
||||
min_score, max_score = min(scores), max(scores)
|
||||
score_range = max_score - min_score if max_score > min_score else 1.0
|
||||
with trace_operation(
|
||||
"vector_viz.score_normalize",
|
||||
attributes={"normalize.num_results": len(search_results)},
|
||||
):
|
||||
if search_results:
|
||||
scores = [r.score for r in search_results]
|
||||
min_score, max_score = min(scores), max(scores)
|
||||
score_range = max_score - min_score if max_score > min_score else 1.0
|
||||
|
||||
logger.info(
|
||||
f"Normalizing scores for viz: original range [{min_score:.3f}, {max_score:.3f}] "
|
||||
f"→ [0.0, 1.0]"
|
||||
)
|
||||
logger.info(
|
||||
f"Normalizing scores for viz: original range [{min_score:.3f}, {max_score:.3f}] "
|
||||
f"→ [0.0, 1.0]"
|
||||
)
|
||||
|
||||
# Store original score and rescale to 0-1 for visualization
|
||||
for r in search_results:
|
||||
# Store original score before normalization
|
||||
r.original_score = r.score
|
||||
# Rescale for visual encoding
|
||||
r.score = (r.score - min_score) / score_range
|
||||
# Store original score and rescale to 0-1 for visualization
|
||||
for r in search_results:
|
||||
# Store original score before normalization
|
||||
r.original_score = r.score
|
||||
# Rescale for visual encoding
|
||||
r.score = (r.score - min_score) / score_range
|
||||
|
||||
if not search_results:
|
||||
return JSONResponse(
|
||||
@@ -212,75 +237,57 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
"success": True,
|
||||
"results": [],
|
||||
"coordinates_3d": [],
|
||||
"query_coords": None,
|
||||
"query_coords": [],
|
||||
"message": "No results found",
|
||||
}
|
||||
)
|
||||
|
||||
# Fetch vectors for matching results from Qdrant
|
||||
# Fetch vectors for specific matching chunks from Qdrant using batch retrieve
|
||||
vector_fetch_start = time.perf_counter()
|
||||
qdrant_client = await get_qdrant_client()
|
||||
doc_ids = [r.id for r in search_results]
|
||||
|
||||
# Retrieve vectors for the matching documents
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchAny
|
||||
with trace_operation("vector_viz.get_qdrant_client"):
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
points_response = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(
|
||||
key="doc_id",
|
||||
match=MatchAny(any=[str(doc_id) for doc_id in doc_ids]),
|
||||
),
|
||||
FieldCondition(
|
||||
key="user_id",
|
||||
match={"value": username},
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=len(doc_ids) * 2, # Account for multiple chunks per doc
|
||||
with_vectors=["dense"], # Only fetch dense vectors for visualization
|
||||
with_payload=["doc_id"], # Need doc_id to map vectors to results
|
||||
)
|
||||
chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector
|
||||
|
||||
points = points_response[0]
|
||||
# Collect point IDs from search results for batch retrieval
|
||||
# point_id is the Qdrant internal ID returned by search algorithms
|
||||
point_ids = [r.point_id for r in search_results if r.point_id]
|
||||
|
||||
if not points:
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
"results": [],
|
||||
"coordinates_2d": [],
|
||||
"message": "No vectors found for results",
|
||||
}
|
||||
)
|
||||
if point_ids:
|
||||
# Single batch retrieve call instead of N sequential scroll calls
|
||||
# This is ~50x faster for 50 results (1 HTTP request vs 50)
|
||||
with trace_operation(
|
||||
"vector_viz.vector_retrieve",
|
||||
attributes={"retrieve.num_points": len(point_ids)},
|
||||
):
|
||||
points_response = await qdrant_client.retrieve(
|
||||
collection_name=settings.get_collection_name(),
|
||||
ids=point_ids,
|
||||
with_vectors=["dense"],
|
||||
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
|
||||
)
|
||||
|
||||
# Extract dense vectors and group by document
|
||||
def extract_dense_vector(point):
|
||||
if point.vector is None:
|
||||
return None
|
||||
# If named vectors (dict), extract "dense"
|
||||
if isinstance(point.vector, dict):
|
||||
return point.vector.get("dense")
|
||||
# If unnamed vector (array), use directly
|
||||
return point.vector
|
||||
# Build chunk_vectors_map from batch response
|
||||
for point in points_response:
|
||||
if point.vector is not None:
|
||||
# Extract dense vector (handle both named and unnamed vectors)
|
||||
if isinstance(point.vector, dict):
|
||||
vector = point.vector.get("dense")
|
||||
else:
|
||||
vector = point.vector
|
||||
|
||||
# Group chunk vectors by doc_id
|
||||
from collections import defaultdict
|
||||
|
||||
doc_chunks = defaultdict(list)
|
||||
for point in points:
|
||||
if point.payload:
|
||||
doc_id = int(point.payload.get("doc_id", 0))
|
||||
vector = extract_dense_vector(point)
|
||||
if vector is not None:
|
||||
doc_chunks[doc_id].append(vector)
|
||||
if vector is not None and point.payload:
|
||||
doc_id = point.payload.get("doc_id")
|
||||
chunk_start = point.payload.get("chunk_start_offset")
|
||||
chunk_end = point.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, chunk_start, chunk_end)
|
||||
chunk_vectors_map[chunk_key] = vector
|
||||
|
||||
vector_fetch_duration = time.perf_counter() - vector_fetch_start
|
||||
|
||||
if len(doc_chunks) < 2:
|
||||
# Not enough documents for PCA
|
||||
if len(chunk_vectors_map) < 2:
|
||||
# Not enough chunks for PCA
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
@@ -291,20 +298,21 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
"title": r.title,
|
||||
"excerpt": r.excerpt,
|
||||
"score": r.score,
|
||||
"metadata": r.metadata,
|
||||
}
|
||||
for r in search_results
|
||||
],
|
||||
"coordinates_3d": [[0, 0, 0]] * len(search_results),
|
||||
"query_coords": [0, 0, 0],
|
||||
"message": "Not enough documents for PCA",
|
||||
"message": "Not enough chunks for PCA",
|
||||
}
|
||||
)
|
||||
|
||||
# Detect embedding dimension from first available vector
|
||||
embedding_dim = None
|
||||
for chunks in doc_chunks.values():
|
||||
if chunks:
|
||||
embedding_dim = len(chunks[0])
|
||||
for vector in chunk_vectors_map.values():
|
||||
if vector is not None:
|
||||
embedding_dim = len(vector)
|
||||
break
|
||||
|
||||
if embedding_dim is None:
|
||||
@@ -318,37 +326,42 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
|
||||
logger.info(f"Detected embedding dimension: {embedding_dim}")
|
||||
|
||||
# Average chunk vectors per document to create document-level embeddings
|
||||
# Maintain order of search_results for coordinate mapping
|
||||
doc_vectors = []
|
||||
# Build chunk vectors array in search_results order (1:1 mapping)
|
||||
chunk_vectors = []
|
||||
for result in search_results:
|
||||
if result.id in doc_chunks:
|
||||
# Average all chunk embeddings for this document
|
||||
chunk_vectors = np.array(doc_chunks[result.id])
|
||||
avg_vector = np.mean(chunk_vectors, axis=0)
|
||||
doc_vectors.append(avg_vector)
|
||||
logger.debug(f"Doc {result.id}: averaged {len(chunk_vectors)} chunks")
|
||||
chunk_key = (result.id, result.chunk_start_offset, result.chunk_end_offset)
|
||||
if chunk_key in chunk_vectors_map:
|
||||
chunk_vectors.append(chunk_vectors_map[chunk_key])
|
||||
else:
|
||||
# Document not found in vectors (shouldn't happen)
|
||||
logger.warning(f"Doc {result.id} not found in fetched vectors")
|
||||
# Use zero vector as fallback with detected dimension
|
||||
doc_vectors.append(np.zeros(embedding_dim))
|
||||
# Chunk not found in vectors (shouldn't happen)
|
||||
logger.warning(
|
||||
f"Chunk {chunk_key} not found in fetched vectors, using zero vector"
|
||||
)
|
||||
# Use zero vector as fallback
|
||||
chunk_vectors.append(np.zeros(embedding_dim))
|
||||
|
||||
doc_vectors = np.array(doc_vectors)
|
||||
chunk_vectors = np.array(chunk_vectors)
|
||||
|
||||
# Generate query embedding for visualization
|
||||
# Reuse query embedding from search algorithm (avoids redundant embedding call)
|
||||
query_embed_start = time.perf_counter()
|
||||
from nextcloud_mcp_server.embedding.service import get_embedding_service
|
||||
if search_algo.query_embedding is not None:
|
||||
query_embedding = search_algo.query_embedding
|
||||
logger.info(
|
||||
f"Reusing query embedding from search algorithm "
|
||||
f"(dimension={len(query_embedding)})"
|
||||
)
|
||||
else:
|
||||
# Fallback: generate embedding if not available from search
|
||||
from nextcloud_mcp_server.embedding.service import get_embedding_service
|
||||
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
|
||||
query_embed_duration = time.perf_counter() - query_embed_start
|
||||
|
||||
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
|
||||
|
||||
# Combine query vector with document vectors for PCA
|
||||
# Combine query vector with chunk vectors for PCA
|
||||
# Query will be the last point in the array
|
||||
all_vectors = np.vstack([doc_vectors, np.array([query_embedding])])
|
||||
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
|
||||
|
||||
# Normalize vectors to unit length (L2 normalization)
|
||||
# This is critical because Qdrant uses COSINE distance, which only measures
|
||||
@@ -375,9 +388,26 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
)
|
||||
|
||||
# Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
|
||||
# Run in thread pool to avoid blocking the event loop (CPU-bound)
|
||||
pca_start = time.perf_counter()
|
||||
pca = PCA(n_components=3)
|
||||
coords_3d = pca.fit_transform(all_vectors_normalized)
|
||||
|
||||
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
|
||||
pca = PCA(n_components=3)
|
||||
coords = pca.fit_transform(vectors)
|
||||
return coords, pca
|
||||
|
||||
import anyio
|
||||
|
||||
with trace_operation(
|
||||
"vector_viz.pca_compute",
|
||||
attributes={
|
||||
"pca.num_vectors": len(all_vectors_normalized),
|
||||
"pca.embedding_dim": embedding_dim,
|
||||
},
|
||||
):
|
||||
coords_3d, pca = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
lambda: _compute_pca(all_vectors_normalized)
|
||||
)
|
||||
pca_duration = time.perf_counter() - pca_start
|
||||
|
||||
# After fit, these attributes are guaranteed to be set
|
||||
@@ -394,17 +424,12 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
# Replace NaN with 0 to allow JSON serialization
|
||||
coords_3d = np.nan_to_num(coords_3d, nan=0.0)
|
||||
|
||||
# Split query coords from document coords
|
||||
# Split query coords from chunk coords
|
||||
# Round to 2 decimal places for cleaner display
|
||||
query_coords_3d = [
|
||||
round(float(x), 2) for x in coords_3d[-1]
|
||||
] # Last point is query
|
||||
doc_coords_3d = coords_3d[:-1] # All but last are documents
|
||||
|
||||
total_chunks = sum(len(chunks) for chunks in doc_chunks.values())
|
||||
avg_chunks_per_doc = (
|
||||
total_chunks / len(doc_vectors) if doc_vectors.size > 0 else 0
|
||||
)
|
||||
chunk_coords_3d = coords_3d[:-1] # All but last are chunks
|
||||
|
||||
logger.info(
|
||||
f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, "
|
||||
@@ -412,13 +437,14 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
f"PC3={pca.explained_variance_ratio_[2]:.3f}"
|
||||
)
|
||||
logger.info(
|
||||
f"Embedding stats: documents={len(doc_vectors)}, "
|
||||
f"total_chunks={total_chunks}, avg_chunks_per_doc={avg_chunks_per_doc:.1f}, "
|
||||
f"query_dim={len(query_embedding)}, doc_vector_dim={doc_vectors.shape[1] if doc_vectors.size > 0 else 0}"
|
||||
f"Embedding stats: chunks={len(chunk_vectors)}, "
|
||||
f"query_dim={len(query_embedding)}, chunk_vector_dim={chunk_vectors.shape[1] if chunk_vectors.size > 0 else 0}"
|
||||
)
|
||||
|
||||
# Coordinates already match search_results order (1:1 mapping)
|
||||
result_coords = [[round(float(x), 2) for x in coord] for coord in doc_coords_3d]
|
||||
result_coords = [
|
||||
[round(float(x), 2) for x in coord] for coord in chunk_coords_3d
|
||||
]
|
||||
|
||||
# Build response
|
||||
response_results = [
|
||||
@@ -433,6 +459,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
), # Raw score from algorithm
|
||||
"chunk_start_offset": r.chunk_start_offset,
|
||||
"chunk_end_offset": r.chunk_end_offset,
|
||||
"metadata": r.metadata, # Include metadata (e.g., board_id for deck_card)
|
||||
}
|
||||
for r in search_results
|
||||
]
|
||||
@@ -447,7 +474,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
f"vector_fetch={vector_fetch_duration * 1000:.1f}ms ({vector_fetch_duration / total_duration * 100:.1f}%), "
|
||||
f"query_embed={query_embed_duration * 1000:.1f}ms ({query_embed_duration / total_duration * 100:.1f}%), "
|
||||
f"pca={pca_duration * 1000:.1f}ms ({pca_duration / total_duration * 100:.1f}%), "
|
||||
f"results={len(search_results)}, doc_vectors={len(doc_vectors)}"
|
||||
f"results={len(search_results)}, chunk_vectors={len(chunk_vectors)}"
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
@@ -468,7 +495,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
"query_embed_ms": round(query_embed_duration * 1000, 2),
|
||||
"pca_ms": round(pca_duration * 1000, 2),
|
||||
"num_results": len(search_results),
|
||||
"num_doc_vectors": len(doc_vectors),
|
||||
"num_chunk_vectors": len(chunk_vectors),
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -517,77 +544,118 @@ async def chunk_context_endpoint(request: Request) -> JSONResponse:
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
# Type assertions - we validated these above
|
||||
assert doc_type is not None
|
||||
assert doc_id is not None
|
||||
assert start_str is not None
|
||||
assert end_str is not None
|
||||
|
||||
start = int(start_str)
|
||||
end = int(end_str)
|
||||
# Convert doc_id to int (all document types use int IDs)
|
||||
doc_id_int = int(doc_id)
|
||||
|
||||
# Currently only support notes
|
||||
if doc_type != "note":
|
||||
return JSONResponse(
|
||||
{"success": False, "error": f"Unsupported doc_type: {doc_type}"},
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
# Get authenticated HTTP client and fetch note
|
||||
# Get authenticated Nextcloud client
|
||||
from nextcloud_mcp_server.auth.userinfo_routes import (
|
||||
_get_authenticated_client_for_userinfo,
|
||||
)
|
||||
from nextcloud_mcp_server.client.notes import NotesClient
|
||||
from nextcloud_mcp_server.search.context import get_chunk_with_context
|
||||
|
||||
# Get username from request auth
|
||||
username = (
|
||||
request.user.display_name
|
||||
if hasattr(request.user, "display_name")
|
||||
else "unknown"
|
||||
)
|
||||
# Use context expansion module to fetch chunk with surrounding context
|
||||
async with await _get_authenticated_client_for_userinfo(request) as nc_client:
|
||||
chunk_context = await get_chunk_with_context(
|
||||
nc_client=nc_client,
|
||||
user_id=request.user.display_name, # User ID from auth
|
||||
doc_id=doc_id_int,
|
||||
doc_type=doc_type,
|
||||
chunk_start=start,
|
||||
chunk_end=end,
|
||||
context_chars=context_chars,
|
||||
)
|
||||
|
||||
# Create notes client with authenticated HTTP client
|
||||
http_client = await _get_authenticated_client_for_userinfo(request)
|
||||
notes_client = NotesClient(http_client, username)
|
||||
|
||||
# Fetch full note content
|
||||
note = await notes_client.get_note(int(doc_id))
|
||||
full_content = f"{note['title']}\n\n{note['content']}"
|
||||
|
||||
# Validate offsets
|
||||
if start < 0 or end > len(full_content) or start >= end:
|
||||
# Check if context expansion succeeded
|
||||
if chunk_context is None:
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"Invalid offsets: start={start}, end={end}, content_length={len(full_content)}",
|
||||
"error": f"Failed to fetch chunk context for {doc_type} {doc_id}",
|
||||
},
|
||||
status_code=400,
|
||||
status_code=404,
|
||||
)
|
||||
|
||||
# Extract chunk
|
||||
chunk_text = full_content[start:end]
|
||||
|
||||
# Extract context before and after
|
||||
before_start = max(0, start - context_chars)
|
||||
before_context = full_content[before_start:start]
|
||||
|
||||
after_end = min(len(full_content), end + context_chars)
|
||||
after_context = full_content[end:after_end]
|
||||
|
||||
# Determine if there's more content
|
||||
has_more_before = before_start > 0
|
||||
has_more_after = after_end < len(full_content)
|
||||
|
||||
logger.info(
|
||||
f"Fetched chunk context for {doc_type}_{doc_id}: "
|
||||
f"chunk_len={len(chunk_text)}, before_len={len(before_context)}, "
|
||||
f"after_len={len(after_context)}"
|
||||
f"chunk_len={len(chunk_context.chunk_text)}, "
|
||||
f"before_len={len(chunk_context.before_context)}, "
|
||||
f"after_len={len(chunk_context.after_context)}"
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
"chunk_text": chunk_text,
|
||||
"before_context": before_context,
|
||||
"after_context": after_context,
|
||||
"has_more_before": has_more_before,
|
||||
"has_more_after": has_more_after,
|
||||
}
|
||||
)
|
||||
# For PDF files, also fetch the highlighted page image from Qdrant
|
||||
highlighted_page_image = None
|
||||
page_number = None
|
||||
if doc_type == "file":
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
settings = get_settings()
|
||||
qdrant_client = await get_qdrant_client()
|
||||
username = request.user.display_name
|
||||
|
||||
# Query for this specific chunk's highlighted image
|
||||
points_response = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
get_placeholder_filter(),
|
||||
FieldCondition(
|
||||
key="doc_id", match=MatchValue(value=doc_id_int)
|
||||
),
|
||||
FieldCondition(
|
||||
key="user_id", match=MatchValue(value=username)
|
||||
),
|
||||
FieldCondition(
|
||||
key="chunk_start_offset", match=MatchValue(value=start)
|
||||
),
|
||||
FieldCondition(
|
||||
key="chunk_end_offset", match=MatchValue(value=end)
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_vectors=False,
|
||||
with_payload=["highlighted_page_image", "page_number"],
|
||||
)
|
||||
|
||||
points = points_response[0]
|
||||
if points and points[0].payload:
|
||||
highlighted_page_image = points[0].payload.get(
|
||||
"highlighted_page_image"
|
||||
)
|
||||
page_number = points[0].payload.get("page_number")
|
||||
if highlighted_page_image:
|
||||
logger.info(
|
||||
f"Found highlighted image for chunk: "
|
||||
f"page={page_number}, image_size={len(highlighted_page_image)}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch highlighted image: {e}")
|
||||
|
||||
# Return response compatible with frontend expectations
|
||||
response_data: dict = {
|
||||
"success": True,
|
||||
"chunk_text": chunk_context.chunk_text,
|
||||
"before_context": chunk_context.before_context,
|
||||
"after_context": chunk_context.after_context,
|
||||
"has_more_before": chunk_context.has_before_truncation,
|
||||
"has_more_after": chunk_context.has_after_truncation,
|
||||
}
|
||||
|
||||
# Add image data if available
|
||||
if highlighted_page_image:
|
||||
response_data["highlighted_page_image"] = highlighted_page_image
|
||||
response_data["page_number"] = page_number
|
||||
|
||||
return JSONResponse(response_data)
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"Invalid parameter format: {e}")
|
||||
|
||||
@@ -139,6 +139,7 @@ async def _get_authenticated_client(request: Request) -> httpx.AsyncClient:
|
||||
raise RuntimeError("BasicAuth credentials not configured")
|
||||
|
||||
assert nextcloud_host is not None # Type narrowing for type checker
|
||||
assert username is not None and password is not None # Type narrowing
|
||||
return httpx.AsyncClient(
|
||||
base_url=nextcloud_host,
|
||||
auth=(username, password),
|
||||
|
||||
+193
-3
@@ -29,9 +29,9 @@ from .app import get_app
|
||||
@click.option(
|
||||
"--transport",
|
||||
"-t",
|
||||
default="sse",
|
||||
default="streamable-http",
|
||||
show_default=True,
|
||||
type=click.Choice(["sse", "streamable-http", "http"]),
|
||||
type=click.Choice(["streamable-http", "http"]),
|
||||
help="MCP transport protocol",
|
||||
)
|
||||
@click.option(
|
||||
@@ -253,5 +253,195 @@ def run(
|
||||
)
|
||||
|
||||
|
||||
@click.group()
|
||||
def db():
|
||||
"""Database migration management commands."""
|
||||
pass
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.option(
|
||||
"--database-path",
|
||||
"-d",
|
||||
envvar="TOKEN_STORAGE_DB",
|
||||
default="/app/data/tokens.db",
|
||||
show_default=True,
|
||||
help="Path to token storage database (can also use TOKEN_STORAGE_DB env var)",
|
||||
)
|
||||
@click.option(
|
||||
"--revision",
|
||||
"-r",
|
||||
default="head",
|
||||
show_default=True,
|
||||
help="Target revision (default: head for latest)",
|
||||
)
|
||||
def upgrade(database_path: str, revision: str):
|
||||
"""Upgrade database to a specific revision.
|
||||
|
||||
\b
|
||||
Examples:
|
||||
# Upgrade to latest version
|
||||
$ nextcloud-mcp-server db upgrade
|
||||
|
||||
# Upgrade to specific revision
|
||||
$ nextcloud-mcp-server db upgrade --revision 001
|
||||
|
||||
# Use custom database path
|
||||
$ nextcloud-mcp-server db upgrade -d /path/to/tokens.db
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import upgrade_database
|
||||
|
||||
try:
|
||||
click.echo(f"Upgrading database to revision: {revision}")
|
||||
upgrade_database(database_path, revision)
|
||||
click.echo(click.style("✓ Database upgraded successfully", fg="green"))
|
||||
except Exception as e:
|
||||
click.echo(click.style(f"✗ Upgrade failed: {e}", fg="red"), err=True)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.option(
|
||||
"--database-path",
|
||||
"-d",
|
||||
envvar="TOKEN_STORAGE_DB",
|
||||
default="/app/data/tokens.db",
|
||||
show_default=True,
|
||||
help="Path to token storage database",
|
||||
)
|
||||
@click.option(
|
||||
"--revision",
|
||||
"-r",
|
||||
default="-1",
|
||||
show_default=True,
|
||||
help="Target revision (default: -1 for previous version)",
|
||||
)
|
||||
@click.confirmation_option(
|
||||
prompt="Are you sure you want to downgrade the database? This may result in data loss."
|
||||
)
|
||||
def downgrade(database_path: str, revision: str):
|
||||
"""Downgrade database to a specific revision.
|
||||
|
||||
WARNING: This may result in data loss! Use with caution.
|
||||
|
||||
\b
|
||||
Examples:
|
||||
# Downgrade by one version
|
||||
$ nextcloud-mcp-server db downgrade
|
||||
|
||||
# Downgrade to specific revision
|
||||
$ nextcloud-mcp-server db downgrade --revision 001
|
||||
|
||||
# Downgrade to base (empty database)
|
||||
$ nextcloud-mcp-server db downgrade --revision base
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import downgrade_database
|
||||
|
||||
try:
|
||||
click.echo(f"Downgrading database to revision: {revision}")
|
||||
downgrade_database(database_path, revision)
|
||||
click.echo(click.style("✓ Database downgraded successfully", fg="green"))
|
||||
except Exception as e:
|
||||
click.echo(click.style(f"✗ Downgrade failed: {e}", fg="red"), err=True)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.option(
|
||||
"--database-path",
|
||||
"-d",
|
||||
envvar="TOKEN_STORAGE_DB",
|
||||
default="/app/data/tokens.db",
|
||||
show_default=True,
|
||||
help="Path to token storage database",
|
||||
)
|
||||
def current(database_path: str):
|
||||
"""Show current database revision.
|
||||
|
||||
\b
|
||||
Example:
|
||||
$ nextcloud-mcp-server db current
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import get_current_revision
|
||||
|
||||
try:
|
||||
revision = get_current_revision(database_path)
|
||||
if revision:
|
||||
click.echo(f"Current revision: {click.style(revision, fg='cyan')}")
|
||||
else:
|
||||
click.echo(
|
||||
click.style(
|
||||
"Database is not versioned (no alembic_version table)", fg="yellow"
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
click.echo(
|
||||
click.style(f"✗ Failed to get current revision: {e}", fg="red"), err=True
|
||||
)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.option(
|
||||
"--database-path",
|
||||
"-d",
|
||||
envvar="TOKEN_STORAGE_DB",
|
||||
default="/app/data/tokens.db",
|
||||
show_default=True,
|
||||
help="Path to token storage database",
|
||||
)
|
||||
def history(database_path: str):
|
||||
"""Show migration history.
|
||||
|
||||
\b
|
||||
Example:
|
||||
$ nextcloud-mcp-server db history
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import show_migration_history
|
||||
|
||||
try:
|
||||
click.echo("Migration history:")
|
||||
show_migration_history(database_path)
|
||||
except Exception as e:
|
||||
click.echo(click.style(f"✗ Failed to show history: {e}", fg="red"), err=True)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.argument("message")
|
||||
def migrate(message: str):
|
||||
"""Create a new migration script (developers only).
|
||||
|
||||
The MESSAGE argument describes the changes in this migration.
|
||||
|
||||
\b
|
||||
Examples:
|
||||
$ nextcloud-mcp-server db migrate "add user preferences table"
|
||||
$ nextcloud-mcp-server db migrate "add index on refresh_tokens.user_id"
|
||||
|
||||
Note: You must manually edit the generated migration file to add SQL statements.
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import create_migration
|
||||
|
||||
try:
|
||||
click.echo(f"Creating new migration: {message}")
|
||||
create_migration(message)
|
||||
click.echo(click.style("✓ Migration created successfully", fg="green"))
|
||||
click.echo(
|
||||
"Edit the migration file in alembic/versions/ to add upgrade/downgrade SQL."
|
||||
)
|
||||
except Exception as e:
|
||||
click.echo(
|
||||
click.style(f"✗ Failed to create migration: {e}", fg="red"), err=True
|
||||
)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
# Create CLI group with subcommands
|
||||
cli = click.Group()
|
||||
cli.add_command(run)
|
||||
cli.add_command(db)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
cli()
|
||||
|
||||
@@ -18,6 +18,7 @@ from .contacts import ContactsClient
|
||||
from .cookbook import CookbookClient
|
||||
from .deck import DeckClient
|
||||
from .groups import GroupsClient
|
||||
from .news import NewsClient
|
||||
from .notes import NotesClient
|
||||
from .sharing import SharingClient
|
||||
from .tables import TablesClient
|
||||
@@ -81,6 +82,7 @@ class NextcloudClient:
|
||||
self.contacts = ContactsClient(self._client, username)
|
||||
self.cookbook = CookbookClient(self._client, username)
|
||||
self.deck = DeckClient(self._client, username)
|
||||
self.news = NewsClient(self._client, username)
|
||||
self.users = UsersClient(self._client, username)
|
||||
self.groups = GroupsClient(self._client, username)
|
||||
self.sharing = SharingClient(self._client, username)
|
||||
@@ -130,10 +132,75 @@ class NextcloudClient:
|
||||
all_notes = self.notes.get_all_notes()
|
||||
return await self._notes_search.search_notes(all_notes, query)
|
||||
|
||||
async def find_files_by_tag(
|
||||
self, tag_name: str, mime_type_filter: str | None = None
|
||||
) -> list[dict]:
|
||||
"""Find files by system tag name, optionally filtered by MIME type.
|
||||
|
||||
This method coordinates tag lookup and file retrieval via WebDAV:
|
||||
1. Look up the tag ID by name
|
||||
2. Get all files with that tag (via REPORT with full metadata)
|
||||
3. Optionally filter by MIME type
|
||||
|
||||
Args:
|
||||
tag_name: Name of the system tag to search for (e.g., "vector-index")
|
||||
mime_type_filter: Optional MIME type filter (e.g., "application/pdf")
|
||||
|
||||
Returns:
|
||||
List of file dictionaries with WebDAV properties (path, size, content_type, etc.)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If tag lookup or file query fails
|
||||
|
||||
Examples:
|
||||
# Find all files with "vector-index" tag
|
||||
files = await nc_client.find_files_by_tag("vector-index")
|
||||
|
||||
# Find only PDFs with the tag
|
||||
pdfs = await nc_client.find_files_by_tag("vector-index", "application/pdf")
|
||||
"""
|
||||
# Look up tag by name using WebDAV
|
||||
tag = await self.webdav.get_tag_by_name(tag_name)
|
||||
if not tag:
|
||||
logger.debug(f"Tag '{tag_name}' not found, returning empty list")
|
||||
return []
|
||||
|
||||
# Get files with this tag (returns full file info from REPORT)
|
||||
files = await self.webdav.get_files_by_tag(tag["id"])
|
||||
if not files:
|
||||
logger.debug(f"No files found with tag '{tag_name}'")
|
||||
return []
|
||||
|
||||
logger.debug(f"Found {len(files)} files with tag '{tag_name}'")
|
||||
|
||||
# Apply MIME type filter if specified
|
||||
if mime_type_filter:
|
||||
filtered_files = [
|
||||
f
|
||||
for f in files
|
||||
if f.get("content_type", "").startswith(mime_type_filter)
|
||||
]
|
||||
logger.info(
|
||||
f"Returning {len(filtered_files)} files with tag '{tag_name}' (filtered by {mime_type_filter})"
|
||||
)
|
||||
return filtered_files
|
||||
|
||||
logger.info(f"Returning {len(files)} files with tag '{tag_name}'")
|
||||
return files
|
||||
|
||||
def _get_webdav_base_path(self) -> str:
|
||||
"""Helper to get the base WebDAV path for the authenticated user."""
|
||||
return f"/remote.php/dav/files/{self.username}"
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Async context manager entry."""
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Async context manager exit - closes all clients."""
|
||||
await self.close()
|
||||
return False # Don't suppress exceptions
|
||||
|
||||
async def close(self):
|
||||
"""Close the HTTP client and CalDAV client."""
|
||||
await self._client.aclose()
|
||||
|
||||
@@ -0,0 +1,394 @@
|
||||
"""Client for Nextcloud News app operations."""
|
||||
|
||||
import logging
|
||||
from enum import IntEnum
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseNextcloudClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NewsItemType(IntEnum):
|
||||
"""Type constants for News API item queries."""
|
||||
|
||||
FEED = 0 # Single feed
|
||||
FOLDER = 1 # Folder and its feeds
|
||||
STARRED = 2 # All starred items
|
||||
ALL = 3 # All items
|
||||
|
||||
|
||||
class NewsClient(BaseNextcloudClient):
|
||||
"""Client for Nextcloud News app operations."""
|
||||
|
||||
app_name = "news"
|
||||
API_BASE = "/apps/news/api/v1-3"
|
||||
|
||||
# --- Folders ---
|
||||
|
||||
async def get_folders(self) -> list[dict[str, Any]]:
|
||||
"""Get all folders."""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/folders")
|
||||
return response.json().get("folders", [])
|
||||
|
||||
async def create_folder(self, name: str) -> dict[str, Any]:
|
||||
"""Create a new folder.
|
||||
|
||||
Args:
|
||||
name: Folder name
|
||||
|
||||
Returns:
|
||||
Created folder data
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 409 if folder name already exists,
|
||||
422 if name is empty
|
||||
"""
|
||||
response = await self._make_request(
|
||||
"POST", f"{self.API_BASE}/folders", json={"name": name}
|
||||
)
|
||||
folders = response.json().get("folders", [])
|
||||
return folders[0] if folders else {}
|
||||
|
||||
async def rename_folder(self, folder_id: int, name: str) -> None:
|
||||
"""Rename a folder.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
name: New folder name
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found, 409 if name exists
|
||||
"""
|
||||
await self._make_request(
|
||||
"PUT", f"{self.API_BASE}/folders/{folder_id}", json={"name": name}
|
||||
)
|
||||
|
||||
async def delete_folder(self, folder_id: int) -> None:
|
||||
"""Delete a folder and all its feeds/items.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found
|
||||
"""
|
||||
await self._make_request("DELETE", f"{self.API_BASE}/folders/{folder_id}")
|
||||
|
||||
async def mark_folder_read(self, folder_id: int, newest_item_id: int) -> None:
|
||||
"""Mark all items in a folder as read.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
newest_item_id: ID of newest item to mark read (prevents marking
|
||||
items user hasn't seen yet)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/folders/{folder_id}/read",
|
||||
json={"newestItemId": newest_item_id},
|
||||
)
|
||||
|
||||
# --- Feeds ---
|
||||
|
||||
async def get_feeds(self) -> dict[str, Any]:
|
||||
"""Get all feeds with metadata.
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
- feeds: List of feed objects
|
||||
- starredCount: Number of starred items
|
||||
- newestItemId: ID of newest item (omitted if no items)
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/feeds")
|
||||
return response.json()
|
||||
|
||||
async def create_feed(
|
||||
self, url: str, folder_id: int | None = None
|
||||
) -> dict[str, Any]:
|
||||
"""Subscribe to a new feed.
|
||||
|
||||
Args:
|
||||
url: Feed URL
|
||||
folder_id: Optional folder ID (None for root)
|
||||
|
||||
Returns:
|
||||
Created feed data
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 409 if feed already exists, 422 if URL is invalid
|
||||
"""
|
||||
body: dict[str, Any] = {"url": url}
|
||||
if folder_id is not None:
|
||||
body["folderId"] = folder_id
|
||||
response = await self._make_request("POST", f"{self.API_BASE}/feeds", json=body)
|
||||
data = response.json()
|
||||
feeds = data.get("feeds", [])
|
||||
return feeds[0] if feeds else {}
|
||||
|
||||
async def delete_feed(self, feed_id: int) -> None:
|
||||
"""Unsubscribe from a feed (deletes all items).
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request("DELETE", f"{self.API_BASE}/feeds/{feed_id}")
|
||||
|
||||
async def move_feed(self, feed_id: int, folder_id: int | None) -> None:
|
||||
"""Move a feed to a different folder.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
folder_id: Target folder ID (None for root)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/move",
|
||||
json={"folderId": folder_id},
|
||||
)
|
||||
|
||||
async def rename_feed(self, feed_id: int, title: str) -> None:
|
||||
"""Rename a feed.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
title: New feed title
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/rename",
|
||||
json={"feedTitle": title},
|
||||
)
|
||||
|
||||
async def mark_feed_read(self, feed_id: int, newest_item_id: int) -> None:
|
||||
"""Mark all items in a feed as read.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
newest_item_id: ID of newest item to mark read
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/read",
|
||||
json={"newestItemId": newest_item_id},
|
||||
)
|
||||
|
||||
# --- Items ---
|
||||
|
||||
async def get_items(
|
||||
self,
|
||||
batch_size: int = 50,
|
||||
offset: int = 0,
|
||||
type_: int = NewsItemType.ALL,
|
||||
id_: int = 0,
|
||||
get_read: bool = True,
|
||||
oldest_first: bool = False,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Get items (articles) with filtering.
|
||||
|
||||
Args:
|
||||
batch_size: Number of items to return (-1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
type_: Item type filter (NewsItemType)
|
||||
id_: Feed/folder ID (ignored for STARRED/ALL types)
|
||||
get_read: Include read items
|
||||
oldest_first: Sort oldest first instead of newest
|
||||
|
||||
Returns:
|
||||
List of item objects
|
||||
"""
|
||||
params: dict[str, Any] = {
|
||||
"batchSize": batch_size,
|
||||
"offset": offset,
|
||||
"type": type_,
|
||||
"id": id_,
|
||||
"getRead": str(get_read).lower(),
|
||||
"oldestFirst": str(oldest_first).lower(),
|
||||
}
|
||||
response = await self._make_request(
|
||||
"GET", f"{self.API_BASE}/items", params=params
|
||||
)
|
||||
return response.json().get("items", [])
|
||||
|
||||
async def get_item(self, item_id: int) -> dict[str, Any]:
|
||||
"""Get a specific item by ID.
|
||||
|
||||
Note: The News API doesn't have a direct single-item endpoint,
|
||||
so we fetch all items and filter. For efficiency, consider
|
||||
caching or using get_items with specific feed if known.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Returns:
|
||||
Item data
|
||||
|
||||
Raises:
|
||||
ValueError: If item not found
|
||||
"""
|
||||
# Fetch all items and find the one we need
|
||||
# This is inefficient but the API doesn't provide a direct endpoint
|
||||
items = await self.get_items(batch_size=-1, get_read=True)
|
||||
for item in items:
|
||||
if item.get("id") == item_id:
|
||||
return item
|
||||
raise ValueError(f"Item {item_id} not found")
|
||||
|
||||
async def get_updated_items(
|
||||
self,
|
||||
last_modified: int,
|
||||
type_: int = NewsItemType.ALL,
|
||||
id_: int = 0,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Get items modified since a timestamp (for delta sync).
|
||||
|
||||
Args:
|
||||
last_modified: Unix timestamp (seconds or microseconds)
|
||||
type_: Item type filter
|
||||
id_: Feed/folder ID
|
||||
|
||||
Returns:
|
||||
List of modified items (includes deleted items)
|
||||
"""
|
||||
params: dict[str, Any] = {
|
||||
"lastModified": last_modified,
|
||||
"type": type_,
|
||||
"id": id_,
|
||||
}
|
||||
response = await self._make_request(
|
||||
"GET", f"{self.API_BASE}/items/updated", params=params
|
||||
)
|
||||
return response.json().get("items", [])
|
||||
|
||||
async def mark_item_read(self, item_id: int) -> None:
|
||||
"""Mark a single item as read.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/read")
|
||||
|
||||
async def mark_item_unread(self, item_id: int) -> None:
|
||||
"""Mark a single item as unread.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unread")
|
||||
|
||||
async def star_item(self, item_id: int) -> None:
|
||||
"""Star (favorite) a single item.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/star")
|
||||
|
||||
async def unstar_item(self, item_id: int) -> None:
|
||||
"""Unstar a single item.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unstar")
|
||||
|
||||
async def mark_items_read(self, item_ids: list[int]) -> None:
|
||||
"""Mark multiple items as read.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/read/multiple", json={"itemIds": item_ids}
|
||||
)
|
||||
|
||||
async def mark_items_unread(self, item_ids: list[int]) -> None:
|
||||
"""Mark multiple items as unread.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/items/unread/multiple",
|
||||
json={"itemIds": item_ids},
|
||||
)
|
||||
|
||||
async def star_items(self, item_ids: list[int]) -> None:
|
||||
"""Star multiple items.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/star/multiple", json={"itemIds": item_ids}
|
||||
)
|
||||
|
||||
async def unstar_items(self, item_ids: list[int]) -> None:
|
||||
"""Unstar multiple items.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/items/unstar/multiple",
|
||||
json={"itemIds": item_ids},
|
||||
)
|
||||
|
||||
async def mark_all_read(self, newest_item_id: int) -> None:
|
||||
"""Mark all items as read.
|
||||
|
||||
Args:
|
||||
newest_item_id: ID of newest item to mark read
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/read", json={"newestItemId": newest_item_id}
|
||||
)
|
||||
|
||||
# --- Status ---
|
||||
|
||||
async def get_status(self) -> dict[str, Any]:
|
||||
"""Get News app status and configuration.
|
||||
|
||||
Returns:
|
||||
Dict with version and warnings
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/status")
|
||||
return response.json()
|
||||
|
||||
async def get_version(self) -> str:
|
||||
"""Get News app version.
|
||||
|
||||
Returns:
|
||||
Version string (e.g., "25.0.0")
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/version")
|
||||
return response.json().get("version", "")
|
||||
@@ -821,6 +821,20 @@ class WebDAVClient(BaseNextcloudClient):
|
||||
item["file_id"] = int(value) if value else None
|
||||
elif tag == "favorite":
|
||||
item["is_favorite"] = value == "1"
|
||||
elif tag == "tags":
|
||||
# Tags can be comma-separated or have multiple child elements
|
||||
if value:
|
||||
# Handle comma-separated tags
|
||||
item["tags"] = [
|
||||
t.strip() for t in value.split(",") if t.strip()
|
||||
]
|
||||
else:
|
||||
# Check for child tag elements (alternative format)
|
||||
tag_elements = child.findall(".//{http://owncloud.org/ns}tag")
|
||||
if tag_elements:
|
||||
item["tags"] = [t.text for t in tag_elements if t.text]
|
||||
else:
|
||||
item["tags"] = []
|
||||
elif tag == "permissions":
|
||||
item["permissions"] = value
|
||||
elif tag == "size":
|
||||
@@ -948,3 +962,576 @@ class WebDAVClient(BaseNextcloudClient):
|
||||
properties=properties,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
async def find_by_tag(
|
||||
self, tag_name: str, scope: str = "", limit: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Find files by tag name.
|
||||
|
||||
DEPRECATED: Use NextcloudClient.find_files_by_tag() instead, which uses
|
||||
the proper OCS Tags API rather than WebDAV SEARCH.
|
||||
|
||||
Args:
|
||||
tag_name: Tag to filter by (e.g., "vector-index")
|
||||
scope: Directory path to search in (empty string for user root)
|
||||
limit: Maximum number of results to return
|
||||
|
||||
Returns:
|
||||
List of files/directories with the specified tag
|
||||
|
||||
Examples:
|
||||
# Find all files tagged with "vector-index"
|
||||
results = await find_by_tag("vector-index")
|
||||
|
||||
# Find tagged files in a specific folder
|
||||
results = await find_by_tag("vector-index", scope="Documents")
|
||||
"""
|
||||
# Use LIKE for tag matching since tags can be comma-separated
|
||||
where_conditions = f"""
|
||||
<d:like>
|
||||
<d:prop>
|
||||
<oc:tags/>
|
||||
</d:prop>
|
||||
<d:literal>%{tag_name}%</d:literal>
|
||||
</d:like>
|
||||
"""
|
||||
|
||||
# Request tag property along with standard properties
|
||||
properties = [
|
||||
"displayname",
|
||||
"getcontentlength",
|
||||
"getcontenttype",
|
||||
"getlastmodified",
|
||||
"resourcetype",
|
||||
"getetag",
|
||||
"fileid",
|
||||
"tags",
|
||||
]
|
||||
|
||||
return await self.search_files(
|
||||
scope=scope,
|
||||
where_conditions=where_conditions,
|
||||
properties=properties,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
async def _get_file_info_by_id(self, file_id: int) -> Dict[str, Any]:
|
||||
"""Get file information by Nextcloud file ID using WebDAV.
|
||||
|
||||
Args:
|
||||
file_id: Nextcloud internal file ID
|
||||
|
||||
Returns:
|
||||
File information dictionary with path, size, content_type, etc.
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If file not found or request fails
|
||||
"""
|
||||
# Nextcloud allows accessing files by ID via special meta endpoint
|
||||
meta_path = f"/remote.php/dav/meta/{file_id}/"
|
||||
|
||||
propfind_body = """<?xml version="1.0"?>
|
||||
<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:prop>
|
||||
<d:displayname/>
|
||||
<d:getcontentlength/>
|
||||
<d:getcontenttype/>
|
||||
<d:getlastmodified/>
|
||||
<d:resourcetype/>
|
||||
<d:getetag/>
|
||||
<oc:fileid/>
|
||||
</d:prop>
|
||||
</d:propfind>"""
|
||||
|
||||
headers = {"Depth": "0", "Content-Type": "text/xml", "OCS-APIRequest": "true"}
|
||||
|
||||
response = await self._make_request(
|
||||
"PROPFIND", meta_path, content=propfind_body, headers=headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse the XML response
|
||||
root = ET.fromstring(response.content)
|
||||
responses = root.findall(".//{DAV:}response")
|
||||
|
||||
if not responses:
|
||||
raise RuntimeError(f"File ID {file_id} not found")
|
||||
|
||||
response_elem = responses[0]
|
||||
href = response_elem.find(".//{DAV:}href")
|
||||
if href is None:
|
||||
raise RuntimeError(f"No href in response for file ID {file_id}")
|
||||
|
||||
propstat = response_elem.find(".//{DAV:}propstat")
|
||||
if propstat is None:
|
||||
raise RuntimeError(f"No propstat for file ID {file_id}")
|
||||
|
||||
prop = propstat.find(".//{DAV:}prop")
|
||||
if prop is None:
|
||||
raise RuntimeError(f"No prop for file ID {file_id}")
|
||||
|
||||
# Extract file path from displayname or construct from file ID
|
||||
displayname_elem = prop.find(".//{DAV:}displayname")
|
||||
name = (
|
||||
displayname_elem.text if displayname_elem is not None else f"file_{file_id}"
|
||||
)
|
||||
|
||||
# Get file properties
|
||||
size_elem = prop.find(".//{DAV:}getcontentlength")
|
||||
size = int(size_elem.text) if size_elem is not None and size_elem.text else 0
|
||||
|
||||
content_type_elem = prop.find(".//{DAV:}getcontenttype")
|
||||
content_type = content_type_elem.text if content_type_elem is not None else None
|
||||
|
||||
modified_elem = prop.find(".//{DAV:}getlastmodified")
|
||||
modified = modified_elem.text if modified_elem is not None else None
|
||||
|
||||
etag_elem = prop.find(".//{DAV:}getetag")
|
||||
etag = (
|
||||
etag_elem.text.strip('"')
|
||||
if etag_elem is not None and etag_elem.text
|
||||
else None
|
||||
)
|
||||
|
||||
# Check if it's a directory
|
||||
resourcetype = prop.find(".//{DAV:}resourcetype")
|
||||
is_directory = (
|
||||
resourcetype is not None
|
||||
and resourcetype.find(".//{DAV:}collection") is not None
|
||||
)
|
||||
|
||||
# Try to get actual file path - meta endpoint doesn't give us the real path
|
||||
# so we'll construct a reasonable path from the name
|
||||
# The calling code in NextcloudClient will have the context to determine the actual path
|
||||
file_info = {
|
||||
"name": name,
|
||||
"path": f"/{name}", # Placeholder - caller should use WebDAV to get real path if needed
|
||||
"size": size,
|
||||
"content_type": content_type,
|
||||
"last_modified": modified,
|
||||
"etag": etag,
|
||||
"is_directory": is_directory,
|
||||
"file_id": file_id,
|
||||
}
|
||||
|
||||
logger.debug(f"Retrieved file info for ID {file_id}: {name}")
|
||||
return file_info
|
||||
|
||||
async def get_tag_by_name(self, tag_name: str) -> dict[str, Any] | None:
|
||||
"""Get a system tag by its name via WebDAV.
|
||||
|
||||
Args:
|
||||
tag_name: Name of the tag to find (case-sensitive)
|
||||
|
||||
Returns:
|
||||
Tag dictionary if found, None otherwise
|
||||
"""
|
||||
# Use WebDAV PROPFIND to list all systemtags
|
||||
propfind_body = """<?xml version="1.0"?>
|
||||
<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:prop>
|
||||
<oc:id/>
|
||||
<oc:display-name/>
|
||||
<oc:user-visible/>
|
||||
<oc:user-assignable/>
|
||||
</d:prop>
|
||||
</d:propfind>"""
|
||||
|
||||
response = await self._client.request(
|
||||
"PROPFIND",
|
||||
"/remote.php/dav/systemtags/",
|
||||
headers={"Depth": "1"},
|
||||
content=propfind_body,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse XML response
|
||||
root = ET.fromstring(response.content)
|
||||
ns = {
|
||||
"d": "DAV:",
|
||||
"oc": "http://owncloud.org/ns",
|
||||
}
|
||||
|
||||
for response_elem in root.findall("d:response", ns):
|
||||
href = response_elem.find("d:href", ns)
|
||||
if href is None or href.text == "/remote.php/dav/systemtags/":
|
||||
# Skip the collection itself
|
||||
continue
|
||||
|
||||
propstat = response_elem.find("d:propstat", ns)
|
||||
if propstat is None:
|
||||
continue
|
||||
|
||||
prop = propstat.find("d:prop", ns)
|
||||
if prop is None:
|
||||
continue
|
||||
|
||||
# Extract tag properties
|
||||
tag_id_elem = prop.find("oc:id", ns)
|
||||
display_name_elem = prop.find("oc:display-name", ns)
|
||||
user_visible_elem = prop.find("oc:user-visible", ns)
|
||||
user_assignable_elem = prop.find("oc:user-assignable", ns)
|
||||
|
||||
if display_name_elem is not None and display_name_elem.text == tag_name:
|
||||
tag_info = {
|
||||
"id": int(tag_id_elem.text)
|
||||
if tag_id_elem is not None and tag_id_elem.text is not None
|
||||
else None,
|
||||
"name": display_name_elem.text,
|
||||
"userVisible": user_visible_elem.text.lower() == "true"
|
||||
if user_visible_elem is not None
|
||||
and user_visible_elem.text is not None
|
||||
else True,
|
||||
"userAssignable": user_assignable_elem.text.lower() == "true"
|
||||
if user_assignable_elem is not None
|
||||
and user_assignable_elem.text is not None
|
||||
else True,
|
||||
}
|
||||
logger.debug(f"Found tag '{tag_name}' with ID {tag_info['id']}")
|
||||
return tag_info
|
||||
|
||||
logger.debug(f"Tag '{tag_name}' not found")
|
||||
return None
|
||||
|
||||
async def get_files_by_tag(self, tag_id: int) -> list[dict[str, Any]]:
|
||||
"""Get all files tagged with a specific system tag via WebDAV REPORT.
|
||||
|
||||
Args:
|
||||
tag_id: Numeric ID of the tag
|
||||
|
||||
Returns:
|
||||
List of file info dictionaries with path, size, content_type, etc.
|
||||
"""
|
||||
# Use WebDAV REPORT method with systemtag filter, requesting all properties
|
||||
report_body = f"""<?xml version="1.0"?>
|
||||
<oc:filter-files xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns" xmlns:nc="http://nextcloud.org/ns">
|
||||
<d:prop>
|
||||
<oc:fileid/>
|
||||
<d:displayname/>
|
||||
<d:getcontentlength/>
|
||||
<d:getcontenttype/>
|
||||
<d:getlastmodified/>
|
||||
<d:getetag/>
|
||||
</d:prop>
|
||||
<oc:filter-rules>
|
||||
<oc:systemtag>{tag_id}</oc:systemtag>
|
||||
</oc:filter-rules>
|
||||
</oc:filter-files>"""
|
||||
|
||||
response = await self._client.request(
|
||||
"REPORT",
|
||||
f"{self._get_webdav_base_path()}/",
|
||||
content=report_body,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse XML response
|
||||
root = ET.fromstring(response.content)
|
||||
ns = {
|
||||
"d": "DAV:",
|
||||
"oc": "http://owncloud.org/ns",
|
||||
}
|
||||
|
||||
files = []
|
||||
for response_elem in root.findall("d:response", ns):
|
||||
# Extract href (file path)
|
||||
href_elem = response_elem.find("d:href", ns)
|
||||
if href_elem is None or not href_elem.text:
|
||||
continue
|
||||
|
||||
propstat = response_elem.find("d:propstat", ns)
|
||||
if propstat is None:
|
||||
continue
|
||||
|
||||
prop = propstat.find("d:prop", ns)
|
||||
if prop is None:
|
||||
continue
|
||||
|
||||
# Extract all properties
|
||||
fileid_elem = prop.find("oc:fileid", ns)
|
||||
displayname_elem = prop.find("d:displayname", ns)
|
||||
contentlength_elem = prop.find("d:getcontentlength", ns)
|
||||
contenttype_elem = prop.find("d:getcontenttype", ns)
|
||||
lastmodified_elem = prop.find("d:getlastmodified", ns)
|
||||
etag_elem = prop.find("d:getetag", ns)
|
||||
|
||||
if fileid_elem is None or not fileid_elem.text:
|
||||
continue
|
||||
|
||||
# Decode href path and extract the file path
|
||||
from urllib.parse import unquote
|
||||
|
||||
href_path = unquote(href_elem.text)
|
||||
# Remove WebDAV prefix to get user-relative path
|
||||
webdav_prefix = f"/remote.php/dav/files/{self.username}/"
|
||||
file_path = href_path.replace(webdav_prefix, "/")
|
||||
|
||||
# Parse last modified timestamp
|
||||
last_modified_timestamp = None
|
||||
if lastmodified_elem is not None and lastmodified_elem.text:
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
try:
|
||||
dt = parsedate_to_datetime(lastmodified_elem.text)
|
||||
last_modified_timestamp = int(dt.timestamp())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
file_info = {
|
||||
"id": int(fileid_elem.text),
|
||||
"path": file_path,
|
||||
"name": displayname_elem.text
|
||||
if displayname_elem is not None
|
||||
else file_path.split("/")[-1],
|
||||
"size": int(contentlength_elem.text)
|
||||
if contentlength_elem is not None and contentlength_elem.text
|
||||
else 0,
|
||||
"content_type": contenttype_elem.text
|
||||
if contenttype_elem is not None
|
||||
else "",
|
||||
"last_modified": lastmodified_elem.text
|
||||
if lastmodified_elem is not None
|
||||
else None,
|
||||
"last_modified_timestamp": last_modified_timestamp,
|
||||
"etag": etag_elem.text if etag_elem is not None else None,
|
||||
}
|
||||
files.append(file_info)
|
||||
|
||||
logger.debug(f"Found {len(files)} files with tag ID {tag_id}")
|
||||
return files
|
||||
|
||||
async def get_file_info(self, path: str) -> dict[str, Any] | None:
|
||||
"""Get file info including file ID via WebDAV PROPFIND.
|
||||
|
||||
Args:
|
||||
path: Path to the file (relative to user's files directory)
|
||||
|
||||
Returns:
|
||||
File info dictionary with id, name, size, content_type, etc.
|
||||
Returns None if file not found.
|
||||
"""
|
||||
webdav_path = f"{self._get_webdav_base_path()}/{path.lstrip('/')}"
|
||||
|
||||
propfind_body = """<?xml version="1.0"?>
|
||||
<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:prop>
|
||||
<oc:fileid/>
|
||||
<d:displayname/>
|
||||
<d:getcontentlength/>
|
||||
<d:getcontenttype/>
|
||||
<d:getlastmodified/>
|
||||
<d:getetag/>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
</d:propfind>"""
|
||||
|
||||
try:
|
||||
response = await self._client.request(
|
||||
"PROPFIND",
|
||||
webdav_path,
|
||||
headers={"Depth": "0"},
|
||||
content=propfind_body,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
logger.debug(f"File not found: {path}")
|
||||
return None
|
||||
raise
|
||||
|
||||
# Parse XML response
|
||||
root = ET.fromstring(response.content)
|
||||
ns = {
|
||||
"d": "DAV:",
|
||||
"oc": "http://owncloud.org/ns",
|
||||
}
|
||||
|
||||
response_elem = root.find("d:response", ns)
|
||||
if response_elem is None:
|
||||
return None
|
||||
|
||||
propstat = response_elem.find("d:propstat", ns)
|
||||
if propstat is None:
|
||||
return None
|
||||
|
||||
prop = propstat.find("d:prop", ns)
|
||||
if prop is None:
|
||||
return None
|
||||
|
||||
# Extract properties
|
||||
fileid_elem = prop.find("oc:fileid", ns)
|
||||
displayname_elem = prop.find("d:displayname", ns)
|
||||
contentlength_elem = prop.find("d:getcontentlength", ns)
|
||||
contenttype_elem = prop.find("d:getcontenttype", ns)
|
||||
lastmodified_elem = prop.find("d:getlastmodified", ns)
|
||||
etag_elem = prop.find("d:getetag", ns)
|
||||
resourcetype_elem = prop.find("d:resourcetype", ns)
|
||||
|
||||
is_directory = (
|
||||
resourcetype_elem is not None
|
||||
and resourcetype_elem.find("d:collection", ns) is not None
|
||||
)
|
||||
|
||||
file_info = {
|
||||
"id": int(fileid_elem.text)
|
||||
if fileid_elem is not None and fileid_elem.text is not None
|
||||
else None,
|
||||
"path": path,
|
||||
"name": displayname_elem.text
|
||||
if displayname_elem is not None
|
||||
else path.split("/")[-1],
|
||||
"size": int(contentlength_elem.text)
|
||||
if contentlength_elem is not None and contentlength_elem.text
|
||||
else 0,
|
||||
"content_type": contenttype_elem.text
|
||||
if contenttype_elem is not None
|
||||
else "",
|
||||
"last_modified": lastmodified_elem.text
|
||||
if lastmodified_elem is not None
|
||||
else None,
|
||||
"etag": etag_elem.text.strip('"')
|
||||
if etag_elem is not None and etag_elem.text
|
||||
else None,
|
||||
"is_directory": is_directory,
|
||||
}
|
||||
|
||||
logger.debug(f"Got file info for '{path}': id={file_info['id']}")
|
||||
return file_info
|
||||
|
||||
async def create_tag(
|
||||
self,
|
||||
name: str,
|
||||
user_visible: bool = True,
|
||||
user_assignable: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Create a system tag via WebDAV.
|
||||
|
||||
Args:
|
||||
name: Name of the tag to create
|
||||
user_visible: Whether the tag is visible to users
|
||||
user_assignable: Whether users can assign this tag
|
||||
|
||||
Returns:
|
||||
Tag dictionary with id, name, userVisible, userAssignable
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag creation fails (409 if already exists)
|
||||
"""
|
||||
# Use WebDAV POST with JSON body to create tag
|
||||
response = await self._client.post(
|
||||
"/remote.php/dav/systemtags/",
|
||||
headers={"Content-Type": "application/json"},
|
||||
json={
|
||||
"name": name,
|
||||
"userVisible": user_visible,
|
||||
"userAssignable": user_assignable,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Extract tag ID from Content-Location header (e.g., /remote.php/dav/systemtags/42)
|
||||
content_location = response.headers.get("Content-Location", "")
|
||||
tag_id = None
|
||||
if content_location:
|
||||
# Extract the numeric ID from the path
|
||||
try:
|
||||
tag_id = int(content_location.rstrip("/").split("/")[-1])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
tag_info = {
|
||||
"id": tag_id,
|
||||
"name": name,
|
||||
"userVisible": user_visible,
|
||||
"userAssignable": user_assignable,
|
||||
}
|
||||
|
||||
logger.info(f"Created tag '{name}' with ID {tag_info['id']}")
|
||||
return tag_info
|
||||
|
||||
async def get_or_create_tag(
|
||||
self,
|
||||
name: str,
|
||||
user_visible: bool = True,
|
||||
user_assignable: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Get a tag by name, creating it if it doesn't exist.
|
||||
|
||||
Args:
|
||||
name: Name of the tag
|
||||
user_visible: Whether the tag is visible to users (for creation)
|
||||
user_assignable: Whether users can assign this tag (for creation)
|
||||
|
||||
Returns:
|
||||
Tag dictionary with id, name, userVisible, userAssignable
|
||||
"""
|
||||
# First try to get existing tag
|
||||
existing_tag = await self.get_tag_by_name(name)
|
||||
if existing_tag:
|
||||
logger.debug(f"Tag '{name}' already exists with ID {existing_tag['id']}")
|
||||
return existing_tag
|
||||
|
||||
# Create new tag
|
||||
try:
|
||||
return await self.create_tag(name, user_visible, user_assignable)
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 409:
|
||||
# Tag was created between our check and creation, fetch it
|
||||
existing_tag = await self.get_tag_by_name(name)
|
||||
if existing_tag:
|
||||
return existing_tag
|
||||
raise
|
||||
|
||||
async def assign_tag_to_file(self, file_id: int, tag_id: int) -> bool:
|
||||
"""Assign a system tag to a file.
|
||||
|
||||
Args:
|
||||
file_id: Numeric file ID
|
||||
tag_id: Numeric tag ID
|
||||
|
||||
Returns:
|
||||
True if tag was assigned successfully (or already assigned)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag assignment fails
|
||||
"""
|
||||
response = await self._client.request(
|
||||
"PUT",
|
||||
f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
|
||||
headers={"Content-Length": "0"},
|
||||
content=b"",
|
||||
)
|
||||
|
||||
# 201 = Created (new assignment), 409 = Conflict (already assigned)
|
||||
if response.status_code in (201, 409):
|
||||
logger.info(f"Tagged file {file_id} with tag {tag_id}")
|
||||
return True
|
||||
|
||||
response.raise_for_status()
|
||||
return True
|
||||
|
||||
async def remove_tag_from_file(self, file_id: int, tag_id: int) -> bool:
|
||||
"""Remove a system tag from a file.
|
||||
|
||||
Args:
|
||||
file_id: Numeric file ID
|
||||
tag_id: Numeric tag ID
|
||||
|
||||
Returns:
|
||||
True if tag was removed successfully (or wasn't assigned)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag removal fails
|
||||
"""
|
||||
response = await self._client.request(
|
||||
"DELETE",
|
||||
f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
|
||||
)
|
||||
|
||||
# 204 = No Content (removed), 404 = Not Found (wasn't assigned)
|
||||
if response.status_code in (204, 404):
|
||||
logger.info(f"Removed tag {tag_id} from file {file_id}")
|
||||
return True
|
||||
|
||||
response.raise_for_status()
|
||||
return True
|
||||
|
||||
@@ -2,8 +2,37 @@ import logging
|
||||
import logging.config
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
class DeploymentMode(Enum):
|
||||
"""Deployment mode for the MCP server.
|
||||
|
||||
SELF_HOSTED: Full features, environment-based configuration.
|
||||
Supports vector sync, semantic search, admin UI.
|
||||
|
||||
SMITHERY_STATELESS: Stateless mode for Smithery hosting.
|
||||
Session-based configuration, no persistent storage.
|
||||
Excludes semantic search, vector sync, admin UI.
|
||||
"""
|
||||
|
||||
SELF_HOSTED = "self_hosted"
|
||||
SMITHERY_STATELESS = "smithery"
|
||||
|
||||
|
||||
def get_deployment_mode() -> DeploymentMode:
|
||||
"""Detect deployment mode from environment.
|
||||
|
||||
Returns:
|
||||
DeploymentMode.SMITHERY_STATELESS if SMITHERY_DEPLOYMENT=true,
|
||||
otherwise DeploymentMode.SELF_HOSTED (default).
|
||||
"""
|
||||
if os.getenv("SMITHERY_DEPLOYMENT", "false").lower() == "true":
|
||||
return DeploymentMode.SMITHERY_STATELESS
|
||||
return DeploymentMode.SELF_HOSTED
|
||||
|
||||
|
||||
LOGGING_CONFIG = {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
@@ -102,6 +131,14 @@ def get_document_processor_config() -> dict[str, Any]:
|
||||
"lang": os.getenv("TESSERACT_LANG", "eng"),
|
||||
}
|
||||
|
||||
# PyMuPDF configuration (local PDF processing)
|
||||
if os.getenv("ENABLE_PYMUPDF", "true").lower() == "true": # Enabled by default
|
||||
config["processors"]["pymupdf"] = {
|
||||
"extract_images": os.getenv("PYMUPDF_EXTRACT_IMAGES", "true").lower()
|
||||
== "true",
|
||||
"image_dir": os.getenv("PYMUPDF_IMAGE_DIR"), # None = use temp directory
|
||||
}
|
||||
|
||||
# Custom processor (via HTTP API)
|
||||
if os.getenv("ENABLE_CUSTOM_PROCESSOR", "false").lower() == "true":
|
||||
custom_url = os.getenv("CUSTOM_PROCESSOR_URL")
|
||||
@@ -168,6 +205,7 @@ class Settings:
|
||||
vector_sync_scan_interval: int = 300 # seconds (5 minutes)
|
||||
vector_sync_processor_workers: int = 3
|
||||
vector_sync_queue_max_size: int = 10000
|
||||
vector_sync_user_poll_interval: int = 60 # seconds - OAuth mode user discovery
|
||||
|
||||
# Qdrant settings (mutually exclusive modes)
|
||||
qdrant_url: Optional[str] = None # Network mode: http://qdrant:6333
|
||||
@@ -180,6 +218,11 @@ class Settings:
|
||||
ollama_embedding_model: str = "nomic-embed-text"
|
||||
ollama_verify_ssl: bool = True
|
||||
|
||||
# OpenAI settings (for embeddings)
|
||||
openai_api_key: Optional[str] = None
|
||||
openai_base_url: Optional[str] = None
|
||||
openai_embedding_model: str = "text-embedding-3-small"
|
||||
|
||||
# Document chunking settings (for vector embeddings)
|
||||
document_chunk_size: int = 2048 # Characters per chunk
|
||||
document_chunk_overlap: int = 200 # Overlapping characters between chunks
|
||||
@@ -238,6 +281,29 @@ class Settings:
|
||||
f"DOCUMENT_CHUNK_OVERLAP ({self.document_chunk_overlap}) cannot be negative."
|
||||
)
|
||||
|
||||
def get_embedding_model_name(self) -> str:
|
||||
"""
|
||||
Get the active embedding model name based on provider priority.
|
||||
|
||||
Priority order (same as ProviderRegistry):
|
||||
1. OpenAI - if OPENAI_API_KEY is set
|
||||
2. Ollama - if OLLAMA_BASE_URL is set
|
||||
3. Simple - fallback (returns "simple-384")
|
||||
|
||||
Returns:
|
||||
Active embedding model name
|
||||
"""
|
||||
# Check OpenAI first (higher priority than Ollama in registry)
|
||||
if self.openai_api_key:
|
||||
return self.openai_embedding_model
|
||||
|
||||
# Check Ollama
|
||||
if self.ollama_base_url:
|
||||
return self.ollama_embedding_model
|
||||
|
||||
# Fallback to simple provider indicator
|
||||
return "simple-384"
|
||||
|
||||
def get_collection_name(self) -> str:
|
||||
"""
|
||||
Get Qdrant collection name.
|
||||
@@ -253,8 +319,9 @@ class Settings:
|
||||
Format: {deployment-id}-{model-name}
|
||||
|
||||
Examples:
|
||||
- "my-deployment-nomic-embed-text" (OTEL_SERVICE_NAME set)
|
||||
- "mcp-container-all-minilm" (hostname fallback)
|
||||
- "my-deployment-nomic-embed-text" (Ollama)
|
||||
- "my-deployment-text-embedding-3-small" (OpenAI)
|
||||
- "mcp-container-openai-text-embedding-3-small" (hostname fallback)
|
||||
|
||||
Returns:
|
||||
Collection name string
|
||||
@@ -274,7 +341,7 @@ class Settings:
|
||||
|
||||
# Sanitize deployment ID and model name
|
||||
deployment_id = deployment_id.lower().replace(" ", "-").replace("_", "-")
|
||||
model_name = self.ollama_embedding_model.replace("/", "-").replace(":", "-")
|
||||
model_name = self.get_embedding_model_name().replace("/", "-").replace(":", "-")
|
||||
|
||||
return f"{deployment_id}-{model_name}"
|
||||
|
||||
@@ -325,6 +392,9 @@ def get_settings() -> Settings:
|
||||
vector_sync_queue_max_size=int(
|
||||
os.getenv("VECTOR_SYNC_QUEUE_MAX_SIZE", "10000")
|
||||
),
|
||||
vector_sync_user_poll_interval=int(
|
||||
os.getenv("VECTOR_SYNC_USER_POLL_INTERVAL", "60")
|
||||
),
|
||||
# Qdrant settings
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_location=os.getenv("QDRANT_LOCATION"),
|
||||
@@ -334,6 +404,12 @@ def get_settings() -> Settings:
|
||||
ollama_base_url=os.getenv("OLLAMA_BASE_URL"),
|
||||
ollama_embedding_model=os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text"),
|
||||
ollama_verify_ssl=os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true",
|
||||
# OpenAI settings
|
||||
openai_api_key=os.getenv("OPENAI_API_KEY"),
|
||||
openai_base_url=os.getenv("OPENAI_BASE_URL"),
|
||||
openai_embedding_model=os.getenv(
|
||||
"OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
|
||||
),
|
||||
# Document chunking settings
|
||||
document_chunk_size=int(os.getenv("DOCUMENT_CHUNK_SIZE", "2048")),
|
||||
document_chunk_overlap=int(os.getenv("DOCUMENT_CHUNK_OVERLAP", "200")),
|
||||
|
||||
@@ -1,21 +1,37 @@
|
||||
"""Helper functions for accessing context in MCP tools."""
|
||||
|
||||
import logging
|
||||
|
||||
from httpx import BasicAuth
|
||||
from mcp.server.fastmcp import Context
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.config import (
|
||||
DeploymentMode,
|
||||
get_deployment_mode,
|
||||
get_settings,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def get_client(ctx: Context) -> NextcloudClient:
|
||||
"""
|
||||
Get the appropriate Nextcloud client based on authentication mode.
|
||||
|
||||
ADR-005 compliant implementation supporting two modes:
|
||||
1. BasicAuth mode: Returns shared client from lifespan context
|
||||
2. Multi-audience mode (ENABLE_TOKEN_EXCHANGE=false, default):
|
||||
Token already contains both MCP and Nextcloud audiences - use directly
|
||||
3. Token exchange mode (ENABLE_TOKEN_EXCHANGE=true):
|
||||
Exchange MCP token for Nextcloud token via RFC 8693
|
||||
ADR-016 compliant implementation supporting three deployment modes:
|
||||
|
||||
1. Smithery stateless mode (SMITHERY_DEPLOYMENT=true):
|
||||
Create client from session configuration (nextcloud_url, username, app_password)
|
||||
No persistent state - client created per-request from Smithery session config.
|
||||
|
||||
2. BasicAuth mode: Returns shared client from lifespan context
|
||||
|
||||
3. OAuth mode:
|
||||
a. Multi-audience mode (ENABLE_TOKEN_EXCHANGE=false, default):
|
||||
Token already contains both MCP and Nextcloud audiences - use directly
|
||||
b. Token exchange mode (ENABLE_TOKEN_EXCHANGE=true):
|
||||
Exchange MCP token for Nextcloud token via RFC 8693
|
||||
|
||||
SECURITY: Token passthrough has been REMOVED. All OAuth modes validate
|
||||
proper token audiences per MCP Security Best Practices specification.
|
||||
@@ -24,7 +40,7 @@ async def get_client(ctx: Context) -> NextcloudClient:
|
||||
by the MCP server via @require_scopes decorator, not by the IdP.
|
||||
|
||||
This function automatically detects the authentication mode by checking
|
||||
the type of the lifespan context.
|
||||
the deployment mode and type of the lifespan context.
|
||||
|
||||
Args:
|
||||
ctx: MCP request context
|
||||
@@ -34,6 +50,7 @@ async def get_client(ctx: Context) -> NextcloudClient:
|
||||
|
||||
Raises:
|
||||
AttributeError: If context doesn't contain expected data
|
||||
ValueError: If Smithery mode but session config is missing required fields
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -43,6 +60,12 @@ async def get_client(ctx: Context) -> NextcloudClient:
|
||||
return await client.capabilities()
|
||||
```
|
||||
"""
|
||||
deployment_mode = get_deployment_mode()
|
||||
|
||||
# ADR-016: Smithery stateless mode - create client from session config
|
||||
if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
|
||||
return _get_client_from_session_config(ctx)
|
||||
|
||||
settings = get_settings()
|
||||
lifespan_ctx = ctx.request_context.lifespan_context
|
||||
|
||||
@@ -75,3 +98,82 @@ async def get_client(ctx: Context) -> NextcloudClient:
|
||||
f"Lifespan context does not have 'client' or 'nextcloud_host' attribute. "
|
||||
f"Type: {type(lifespan_ctx)}"
|
||||
)
|
||||
|
||||
|
||||
def _get_client_from_session_config(ctx: Context) -> NextcloudClient:
|
||||
"""
|
||||
Create NextcloudClient from Smithery session configuration.
|
||||
|
||||
ADR-016: In Smithery stateless mode, each request includes session config
|
||||
with the user's Nextcloud credentials. This function creates a fresh client
|
||||
for each request - no state is persisted between requests.
|
||||
|
||||
For container runtime, config is extracted from URL query parameters by
|
||||
SmitheryConfigMiddleware and stored in a context variable.
|
||||
|
||||
Expected session config fields (from Smithery configSchema):
|
||||
- nextcloud_url: str - Nextcloud instance URL (required)
|
||||
- username: str - Nextcloud username (required)
|
||||
- app_password: str - Nextcloud app password (required)
|
||||
|
||||
Args:
|
||||
ctx: MCP request context (not used directly for Smithery config)
|
||||
|
||||
Returns:
|
||||
NextcloudClient configured with session credentials
|
||||
|
||||
Raises:
|
||||
ValueError: If required session config fields are missing
|
||||
"""
|
||||
# ADR-016: Get session config from context variable (set by SmitheryConfigMiddleware)
|
||||
from nextcloud_mcp_server.app import get_smithery_session_config
|
||||
|
||||
session_config = get_smithery_session_config()
|
||||
|
||||
if session_config is None:
|
||||
raise ValueError(
|
||||
"Session configuration required in Smithery mode. "
|
||||
"Ensure nextcloud_url, username, and app_password are provided as URL query parameters."
|
||||
)
|
||||
|
||||
# Extract required fields - config is always a dict from SmitheryConfigMiddleware
|
||||
nextcloud_url = session_config.get("nextcloud_url")
|
||||
username = session_config.get("username")
|
||||
app_password = session_config.get("app_password")
|
||||
|
||||
# Validate required fields
|
||||
missing_fields = []
|
||||
if not nextcloud_url:
|
||||
missing_fields.append("nextcloud_url")
|
||||
if not username:
|
||||
missing_fields.append("username")
|
||||
if not app_password:
|
||||
missing_fields.append("app_password")
|
||||
|
||||
if missing_fields:
|
||||
raise ValueError(
|
||||
f"Missing required session config fields: {', '.join(missing_fields)}. "
|
||||
f"Configure these in the Smithery connection settings."
|
||||
)
|
||||
|
||||
# Type assertions after validation (for type checker)
|
||||
# These are guaranteed to be str after the missing_fields check above
|
||||
assert nextcloud_url is not None
|
||||
assert username is not None
|
||||
assert app_password is not None
|
||||
|
||||
# Validate URL format
|
||||
if not nextcloud_url.startswith(("http://", "https://")):
|
||||
raise ValueError(
|
||||
f"Invalid nextcloud_url: {nextcloud_url}. "
|
||||
f"Must start with http:// or https://"
|
||||
)
|
||||
|
||||
logger.debug(f"Creating Smithery client for {nextcloud_url} as {username}")
|
||||
|
||||
# Create client with session credentials using BasicAuth
|
||||
return NextcloudClient(
|
||||
base_url=nextcloud_url,
|
||||
username=username,
|
||||
auth=BasicAuth(username, app_password),
|
||||
)
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
"""Document processing plugins for extracting text from various file formats."""
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
from .pymupdf import PyMuPDFProcessor
|
||||
from .registry import ProcessorRegistry, get_registry
|
||||
|
||||
# Register processors at module initialization
|
||||
_registry = get_registry()
|
||||
_registry.register(PyMuPDFProcessor(), priority=10)
|
||||
|
||||
__all__ = [
|
||||
"DocumentProcessor",
|
||||
"ProcessingResult",
|
||||
"ProcessorError",
|
||||
"ProcessorRegistry",
|
||||
"get_registry",
|
||||
"PyMuPDFProcessor",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,253 @@
|
||||
"""Document processor using PyMuPDF (fitz) library."""
|
||||
|
||||
import logging
|
||||
import pathlib
|
||||
import tempfile
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, Optional
|
||||
|
||||
# NOTE: Do NOT call pymupdf.layout.activate() here!
|
||||
# It changes the behavior of pymupdf4llm.to_markdown() when page_chunks=True,
|
||||
# causing it to return a string instead of a list[dict].
|
||||
# See: https://github.com/pymupdf/pymupdf4llm/issues/323
|
||||
import pymupdf
|
||||
import pymupdf4llm
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PyMuPDFProcessor(DocumentProcessor):
|
||||
"""Document processor using PyMuPDF library for PDF processing.
|
||||
|
||||
PyMuPDF (fitz) is a fast, local PDF processing library that extracts text,
|
||||
metadata, and images without requiring external API calls.
|
||||
|
||||
Features:
|
||||
- Fast text extraction with layout preservation
|
||||
- PDF metadata extraction (title, author, creation date, page count)
|
||||
- Image extraction for future multimodal support
|
||||
- Page number tracking for precise citations
|
||||
"""
|
||||
|
||||
SUPPORTED_TYPES = {
|
||||
"application/pdf",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
extract_images: bool = True,
|
||||
image_dir: Optional[str | pathlib.Path] = None,
|
||||
):
|
||||
"""Initialize PyMuPDF processor.
|
||||
|
||||
Args:
|
||||
extract_images: Whether to extract embedded images from PDFs
|
||||
image_dir: Directory to store extracted images (defaults to temp directory)
|
||||
"""
|
||||
self.extract_images = extract_images
|
||||
|
||||
if image_dir is None:
|
||||
self.image_dir = pathlib.Path(tempfile.gettempdir()) / "pdf-images"
|
||||
else:
|
||||
self.image_dir = pathlib.Path(image_dir)
|
||||
|
||||
# Create image directory if it doesn't exist
|
||||
if self.extract_images:
|
||||
self.image_dir.mkdir(exist_ok=True, parents=True)
|
||||
logger.info(
|
||||
f"Initialized PyMuPDFProcessor with image extraction to {self.image_dir}"
|
||||
)
|
||||
else:
|
||||
logger.info("Initialized PyMuPDFProcessor without image extraction")
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "pymupdf"
|
||||
|
||||
@property
|
||||
def supported_mime_types(self) -> set[str]:
|
||||
return self.SUPPORTED_TYPES
|
||||
|
||||
async def process(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
filename: Optional[str] = None,
|
||||
options: Optional[dict[str, Any]] = None,
|
||||
progress_callback: Optional[
|
||||
Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
|
||||
] = None,
|
||||
) -> ProcessingResult:
|
||||
"""Process a PDF document and extract text, metadata, and images.
|
||||
|
||||
Args:
|
||||
content: PDF document bytes
|
||||
content_type: MIME type (should be application/pdf)
|
||||
filename: Optional filename for better error messages
|
||||
options: Processing options (currently unused)
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
ProcessingResult with extracted text and metadata
|
||||
|
||||
Raises:
|
||||
ProcessorError: If PDF processing fails
|
||||
"""
|
||||
import anyio
|
||||
|
||||
try:
|
||||
if progress_callback:
|
||||
await progress_callback(0, 100, "Opening PDF document")
|
||||
|
||||
# Open document and extract metadata in thread
|
||||
doc = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
lambda: pymupdf.open("pdf", content)
|
||||
)
|
||||
|
||||
metadata = self._extract_metadata(doc, filename)
|
||||
metadata["file_size"] = len(content)
|
||||
page_count = doc.page_count
|
||||
|
||||
if progress_callback:
|
||||
await progress_callback(10, 100, f"Extracting {page_count} pages")
|
||||
|
||||
# Prepare image directory if needed
|
||||
pdf_image_dir = None
|
||||
if self.extract_images:
|
||||
pdf_id = filename.replace("/", "_") if filename else "unknown"
|
||||
pdf_image_dir = self.image_dir / pdf_id
|
||||
pdf_image_dir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# Extract all pages in a single call with page_chunks=True
|
||||
def do_extract() -> list[dict[str, Any]]:
|
||||
# When page_chunks=True, to_markdown returns list[dict] not str
|
||||
return pymupdf4llm.to_markdown( # type: ignore[return-value]
|
||||
doc,
|
||||
write_images=self.extract_images,
|
||||
image_path=pdf_image_dir if self.extract_images else None,
|
||||
page_chunks=True,
|
||||
)
|
||||
|
||||
page_chunks: list[dict[str, Any]] = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
do_extract
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
await progress_callback(90, 100, "Building result")
|
||||
|
||||
# Extract page texts and build boundaries from chunks
|
||||
page_texts: list[str] = []
|
||||
page_boundaries: list[dict[str, Any]] = []
|
||||
current_offset = 0
|
||||
for chunk in page_chunks:
|
||||
text = chunk.get("text", "")
|
||||
page_num = chunk.get("metadata", {}).get("page", len(page_texts) + 1)
|
||||
page_texts.append(text)
|
||||
page_boundaries.append(
|
||||
{
|
||||
"page": page_num,
|
||||
"start_offset": current_offset,
|
||||
"end_offset": current_offset + len(text),
|
||||
}
|
||||
)
|
||||
current_offset += len(text)
|
||||
|
||||
# Collect image paths
|
||||
image_paths = []
|
||||
if pdf_image_dir and pdf_image_dir.exists():
|
||||
image_paths = [str(p) for p in pdf_image_dir.glob("*")]
|
||||
|
||||
# Build final text and metadata
|
||||
md_text = "".join(page_texts)
|
||||
metadata["has_images"] = len(image_paths) > 0
|
||||
if image_paths:
|
||||
metadata["image_count"] = len(image_paths)
|
||||
metadata["image_paths"] = image_paths
|
||||
metadata["page_boundaries"] = page_boundaries
|
||||
|
||||
# Close document
|
||||
doc.close()
|
||||
|
||||
if progress_callback:
|
||||
await progress_callback(100, 100, "Processing complete")
|
||||
|
||||
logger.info(
|
||||
f"Successfully processed PDF {filename or '<bytes>'}: "
|
||||
f"{metadata['page_count']} pages, {len(md_text)} chars, "
|
||||
f"{metadata.get('image_count', 0)} images"
|
||||
)
|
||||
|
||||
return ProcessingResult(
|
||||
text=md_text,
|
||||
metadata=metadata,
|
||||
processor=self.name,
|
||||
success=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to process PDF {filename or '<bytes>'}: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
raise ProcessorError(error_msg) from e
|
||||
|
||||
def _extract_metadata(
|
||||
self, doc: pymupdf.Document, filename: Optional[str]
|
||||
) -> dict[str, Any]:
|
||||
"""Extract metadata from PDF document.
|
||||
|
||||
Args:
|
||||
doc: Opened PyMuPDF document
|
||||
filename: Optional filename
|
||||
|
||||
Returns:
|
||||
Dictionary with PDF metadata
|
||||
"""
|
||||
metadata: dict[str, Any] = {}
|
||||
|
||||
# Basic document info
|
||||
metadata["page_count"] = doc.page_count
|
||||
metadata["format"] = "PDF 1." + str(
|
||||
doc.pdf_version() if hasattr(doc, "pdf_version") else "?" # type: ignore[call-non-callable]
|
||||
)
|
||||
|
||||
if filename:
|
||||
metadata["filename"] = filename
|
||||
|
||||
# Extract PDF metadata dictionary
|
||||
pdf_metadata = doc.metadata
|
||||
if pdf_metadata:
|
||||
# Standard PDF metadata fields
|
||||
if pdf_metadata.get("title"):
|
||||
metadata["title"] = pdf_metadata["title"]
|
||||
if pdf_metadata.get("author"):
|
||||
metadata["author"] = pdf_metadata["author"]
|
||||
if pdf_metadata.get("subject"):
|
||||
metadata["subject"] = pdf_metadata["subject"]
|
||||
if pdf_metadata.get("keywords"):
|
||||
metadata["keywords"] = pdf_metadata["keywords"]
|
||||
if pdf_metadata.get("creator"):
|
||||
metadata["creator"] = pdf_metadata["creator"]
|
||||
if pdf_metadata.get("producer"):
|
||||
metadata["producer"] = pdf_metadata["producer"]
|
||||
if pdf_metadata.get("creationDate"):
|
||||
metadata["creation_date"] = pdf_metadata["creationDate"]
|
||||
if pdf_metadata.get("modDate"):
|
||||
metadata["modification_date"] = pdf_metadata["modDate"]
|
||||
|
||||
return metadata
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Check if PyMuPDF is available and working.
|
||||
|
||||
Returns:
|
||||
True if processor is ready to use
|
||||
"""
|
||||
try:
|
||||
# Try to create a simple PDF in memory
|
||||
test_doc = pymupdf.open()
|
||||
test_doc.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"PyMuPDF health check failed: {e}")
|
||||
return False
|
||||
@@ -37,7 +37,9 @@ class BM25SparseEmbeddingProvider:
|
||||
|
||||
def encode(self, text: str) -> dict[str, Any]:
|
||||
"""
|
||||
Generate BM25 sparse embedding for a single text.
|
||||
Generate BM25 sparse embedding for a single text (synchronous).
|
||||
|
||||
Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
|
||||
|
||||
Args:
|
||||
text: Input text to encode
|
||||
@@ -53,7 +55,24 @@ class BM25SparseEmbeddingProvider:
|
||||
"values": sparse_embedding.values.tolist(),
|
||||
}
|
||||
|
||||
def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
|
||||
async def encode_async(self, text: str) -> dict[str, Any]:
|
||||
"""
|
||||
Generate BM25 sparse embedding for a single text (async).
|
||||
|
||||
Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
|
||||
|
||||
Args:
|
||||
text: Input text to encode
|
||||
|
||||
Returns:
|
||||
Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
|
||||
"""
|
||||
import anyio
|
||||
|
||||
# Run CPU-bound BM25 encoding in thread pool
|
||||
return await anyio.to_thread.run_sync(lambda: self.encode(text)) # type: ignore[attr-defined]
|
||||
|
||||
async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Generate BM25 sparse embeddings for multiple texts (batched).
|
||||
|
||||
@@ -63,7 +82,12 @@ class BM25SparseEmbeddingProvider:
|
||||
Returns:
|
||||
List of dictionaries with 'indices' and 'values' for each text
|
||||
"""
|
||||
sparse_embeddings = list(self.model.embed(texts))
|
||||
import anyio
|
||||
|
||||
# Run CPU-bound BM25 encoding in thread pool to avoid blocking event loop
|
||||
sparse_embeddings = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
lambda: list(self.model.embed(texts))
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
|
||||
@@ -0,0 +1,192 @@
|
||||
"""Database migration utilities for nextcloud-mcp-server.
|
||||
|
||||
This module provides helper functions for managing Alembic database migrations
|
||||
programmatically. It enables automatic migration on application startup and
|
||||
provides CLI integration.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from alembic.config import Config
|
||||
|
||||
from alembic import command
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_alembic_config(database_path: str | Path | None = None) -> Config:
|
||||
"""
|
||||
Get Alembic configuration for programmatic use.
|
||||
|
||||
Works in both development and installed (Docker) modes by using
|
||||
package location instead of alembic.ini file.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file. If None, uses default
|
||||
(/app/data/tokens.db for Docker)
|
||||
|
||||
Returns:
|
||||
Alembic Config object configured for the specified database
|
||||
"""
|
||||
from nextcloud_mcp_server import alembic as alembic_package
|
||||
|
||||
# Use package location (works in both editable and installed modes)
|
||||
if alembic_package.__file__ is None:
|
||||
raise RuntimeError("alembic package __file__ is None")
|
||||
script_location = Path(alembic_package.__file__).parent
|
||||
|
||||
# Create config programmatically (no alembic.ini needed at runtime)
|
||||
config = Config()
|
||||
config.set_main_option("script_location", str(script_location))
|
||||
config.set_main_option("path_separator", "os") # Suppress deprecation warning
|
||||
|
||||
# Set database URL
|
||||
if database_path:
|
||||
db_path = Path(database_path).resolve()
|
||||
else:
|
||||
db_path = Path("/app/data/tokens.db") # Default for Docker
|
||||
|
||||
url = f"sqlite+aiosqlite:///{db_path}"
|
||||
config.set_main_option("sqlalchemy.url", url)
|
||||
|
||||
logger.debug(f"Alembic script location: {script_location}")
|
||||
logger.debug(f"Database: {db_path}")
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def upgrade_database(
|
||||
database_path: str | Path | None = None, revision: str = "head"
|
||||
) -> None:
|
||||
"""
|
||||
Upgrade database to a specific revision.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
revision: Target revision (default: "head" for latest)
|
||||
"""
|
||||
config = get_alembic_config(database_path)
|
||||
logger.info(f"Upgrading database to revision: {revision}")
|
||||
command.upgrade(config, revision)
|
||||
logger.info("Database upgrade completed successfully")
|
||||
|
||||
|
||||
def downgrade_database(
|
||||
database_path: str | Path | None = None, revision: str = "-1"
|
||||
) -> None:
|
||||
"""
|
||||
Downgrade database to a specific revision.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
revision: Target revision (default: "-1" for previous version)
|
||||
"""
|
||||
config = get_alembic_config(database_path)
|
||||
logger.warning(f"Downgrading database to revision: {revision}")
|
||||
command.downgrade(config, revision)
|
||||
logger.info("Database downgrade completed successfully")
|
||||
|
||||
|
||||
def get_current_revision(database_path: str | Path | None = None) -> str | None:
|
||||
"""
|
||||
Get the current database revision by directly querying the alembic_version table.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
|
||||
Returns:
|
||||
Current revision ID or None if not versioned
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
if database_path is None:
|
||||
database_path = "/app/data/tokens.db"
|
||||
|
||||
db_path = Path(database_path).resolve()
|
||||
|
||||
if not db_path.exists():
|
||||
logger.debug(f"Database does not exist: {db_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Query alembic_version table directly
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if alembic_version table exists
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='alembic_version'"
|
||||
)
|
||||
has_table = cursor.fetchone() is not None
|
||||
|
||||
if not has_table:
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
# Get current version
|
||||
cursor.execute("SELECT version_num FROM alembic_version")
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
return row[0] if row else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get current revision: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def stamp_database(
|
||||
database_path: str | Path | None = None, revision: str = "head"
|
||||
) -> None:
|
||||
"""
|
||||
Stamp database with a specific revision without running migrations.
|
||||
|
||||
This is useful for marking existing databases that were created before
|
||||
Alembic was introduced. It tells Alembic "this database is at revision X"
|
||||
without actually running the migration.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
revision: Revision to stamp (default: "head" for latest)
|
||||
"""
|
||||
config = get_alembic_config(database_path)
|
||||
logger.info(f"Stamping database with revision: {revision}")
|
||||
command.stamp(config, revision)
|
||||
logger.info("Database stamped successfully")
|
||||
|
||||
|
||||
def show_migration_history(database_path: str | Path | None = None) -> None:
|
||||
"""
|
||||
Display migration history.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
"""
|
||||
config = get_alembic_config(database_path)
|
||||
command.history(config, verbose=True)
|
||||
|
||||
|
||||
def create_migration(message: str, autogenerate: bool = False) -> None:
|
||||
"""
|
||||
Create a new migration script.
|
||||
|
||||
Args:
|
||||
message: Description of the migration
|
||||
autogenerate: Whether to attempt auto-generation (requires SQLAlchemy models)
|
||||
|
||||
Note:
|
||||
Since we don't use SQLAlchemy models, autogenerate will be disabled
|
||||
and migrations must be written manually.
|
||||
"""
|
||||
config = get_alembic_config()
|
||||
logger.info(f"Creating new migration: {message}")
|
||||
|
||||
if autogenerate:
|
||||
logger.warning(
|
||||
"Auto-generation is not supported (no SQLAlchemy models). "
|
||||
"Migration will be created with empty upgrade/downgrade functions."
|
||||
)
|
||||
|
||||
command.revision(config, message=message, autogenerate=False)
|
||||
logger.info("Migration created successfully. Edit the file to add SQL statements.")
|
||||
@@ -0,0 +1,170 @@
|
||||
"""Pydantic models for Nextcloud News app responses."""
|
||||
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from .base import BaseResponse
|
||||
|
||||
|
||||
class NewsFolder(BaseModel):
|
||||
"""Model for a News folder."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Folder ID")
|
||||
name: str = Field(description="Folder name")
|
||||
|
||||
|
||||
class NewsFeed(BaseModel):
|
||||
"""Model for a News feed (RSS/Atom subscription)."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Feed ID")
|
||||
url: str = Field(description="Feed URL")
|
||||
title: str = Field(description="Feed title")
|
||||
favicon_link: str | None = Field(
|
||||
None, alias="faviconLink", description="Favicon URL"
|
||||
)
|
||||
link: str | None = Field(None, description="Website link")
|
||||
added: int = Field(description="Unix timestamp when feed was added")
|
||||
folder_id: int | None = Field(
|
||||
None, alias="folderId", description="Parent folder ID"
|
||||
)
|
||||
unread_count: int = Field(
|
||||
0, alias="unreadCount", description="Number of unread items"
|
||||
)
|
||||
ordering: int = Field(
|
||||
0, description="Feed ordering (0=default, 1=oldest, 2=newest)"
|
||||
)
|
||||
pinned: bool = Field(False, description="Whether feed is pinned to top")
|
||||
update_error_count: int = Field(
|
||||
0, alias="updateErrorCount", description="Consecutive update failures"
|
||||
)
|
||||
last_update_error: str | None = Field(
|
||||
None, alias="lastUpdateError", description="Last update error message"
|
||||
)
|
||||
|
||||
@property
|
||||
def has_errors(self) -> bool:
|
||||
"""Check if feed has update errors."""
|
||||
return self.update_error_count > 0
|
||||
|
||||
|
||||
class NewsItem(BaseModel):
|
||||
"""Model for a News item (article) with full content."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Item ID")
|
||||
guid: str = Field(description="Globally unique identifier")
|
||||
guid_hash: str = Field(alias="guidHash", description="MD5 hash of GUID")
|
||||
url: str | None = Field(None, description="Article URL")
|
||||
title: str = Field(description="Article title")
|
||||
author: str | None = Field(None, description="Article author")
|
||||
pub_date: int | None = Field(
|
||||
None, alias="pubDate", description="Publication timestamp"
|
||||
)
|
||||
body: str | None = Field(None, description="Article content (HTML)")
|
||||
enclosure_mime: str | None = Field(
|
||||
None, alias="enclosureMime", description="Enclosure MIME type"
|
||||
)
|
||||
enclosure_link: str | None = Field(
|
||||
None, alias="enclosureLink", description="Enclosure URL"
|
||||
)
|
||||
media_thumbnail: str | None = Field(
|
||||
None, alias="mediaThumbnail", description="Media thumbnail URL"
|
||||
)
|
||||
media_description: str | None = Field(
|
||||
None, alias="mediaDescription", description="Media description"
|
||||
)
|
||||
feed_id: int = Field(alias="feedId", description="Parent feed ID")
|
||||
unread: bool = Field(True, description="Whether item is unread")
|
||||
starred: bool = Field(False, description="Whether item is starred")
|
||||
rtl: bool = Field(False, description="Right-to-left text")
|
||||
last_modified: int = Field(
|
||||
alias="lastModified", description="Last modification timestamp"
|
||||
)
|
||||
fingerprint: str | None = Field(
|
||||
None, description="Content fingerprint for deduplication"
|
||||
)
|
||||
content_hash: str | None = Field(
|
||||
None, alias="contentHash", description="Content hash"
|
||||
)
|
||||
|
||||
|
||||
class NewsItemSummary(BaseModel):
|
||||
"""Lightweight model for News item list responses."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Item ID")
|
||||
title: str = Field(description="Article title")
|
||||
feed_id: int = Field(alias="feedId", description="Parent feed ID")
|
||||
unread: bool = Field(True, description="Whether item is unread")
|
||||
starred: bool = Field(False, description="Whether item is starred")
|
||||
pub_date: int | None = Field(
|
||||
None, alias="pubDate", description="Publication timestamp"
|
||||
)
|
||||
url: str | None = Field(None, description="Article URL")
|
||||
author: str | None = Field(None, description="Article author")
|
||||
|
||||
|
||||
class NewsStatus(BaseModel):
|
||||
"""Model for News app status."""
|
||||
|
||||
version: str = Field(description="News app version")
|
||||
warnings: dict = Field(default_factory=dict, description="Configuration warnings")
|
||||
|
||||
|
||||
# --- Response Models ---
|
||||
|
||||
|
||||
class ListFoldersResponse(BaseResponse):
|
||||
"""Response model for listing folders."""
|
||||
|
||||
results: List[NewsFolder] = Field(description="List of folders")
|
||||
total_count: int = Field(description="Total number of folders")
|
||||
|
||||
|
||||
class ListFeedsResponse(BaseResponse):
|
||||
"""Response model for listing feeds."""
|
||||
|
||||
results: List[NewsFeed] = Field(description="List of feeds")
|
||||
starred_count: int = Field(0, description="Number of starred items")
|
||||
newest_item_id: int | None = Field(None, description="ID of newest item")
|
||||
total_count: int = Field(description="Total number of feeds")
|
||||
|
||||
|
||||
class ListItemsResponse(BaseResponse):
|
||||
"""Response model for listing items."""
|
||||
|
||||
results: List[NewsItemSummary] = Field(description="List of items")
|
||||
total_count: int = Field(description="Number of items returned")
|
||||
has_more: bool = Field(False, description="Whether more items exist")
|
||||
oldest_id: int | None = Field(None, description="Oldest item ID (for pagination)")
|
||||
|
||||
|
||||
class GetItemResponse(BaseResponse):
|
||||
"""Response model for getting a single item."""
|
||||
|
||||
item: NewsItem = Field(description="Full item details")
|
||||
|
||||
|
||||
class FeedHealthResponse(BaseResponse):
|
||||
"""Response model for feed health status."""
|
||||
|
||||
feed_id: int = Field(description="Feed ID")
|
||||
title: str = Field(description="Feed title")
|
||||
url: str = Field(description="Feed URL")
|
||||
has_errors: bool = Field(description="Whether feed has update errors")
|
||||
error_count: int = Field(description="Number of consecutive errors")
|
||||
last_error: str | None = Field(None, description="Last error message")
|
||||
|
||||
|
||||
class GetStatusResponse(BaseResponse):
|
||||
"""Response model for app status."""
|
||||
|
||||
version: str = Field(description="News app version")
|
||||
warnings: dict = Field(default_factory=dict, description="Configuration warnings")
|
||||
@@ -10,7 +10,7 @@ from .base import BaseResponse
|
||||
class SemanticSearchResult(BaseModel):
|
||||
"""Model for semantic search results with additional metadata."""
|
||||
|
||||
id: int = Field(description="Document ID")
|
||||
id: int = Field(description="Document ID (int for all document types)")
|
||||
doc_type: str = Field(
|
||||
description="Document type (note, calendar_event, deck_card, etc.)"
|
||||
)
|
||||
@@ -35,6 +35,32 @@ class SemanticSearchResult(BaseModel):
|
||||
chunk_end_offset: Optional[int] = Field(
|
||||
default=None, description="Character position where chunk ends in document"
|
||||
)
|
||||
page_number: Optional[int] = Field(
|
||||
default=None, description="Page number for PDF documents"
|
||||
)
|
||||
page_count: Optional[int] = Field(
|
||||
default=None, description="Total number of pages in PDF document"
|
||||
)
|
||||
# Context expansion fields (optional, populated when include_context=True)
|
||||
has_context_expansion: bool = Field(
|
||||
default=False, description="Whether context expansion was performed"
|
||||
)
|
||||
marked_text: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Full text with position markers around matched chunk",
|
||||
)
|
||||
before_context: Optional[str] = Field(
|
||||
default=None, description="Text before the matched chunk"
|
||||
)
|
||||
after_context: Optional[str] = Field(
|
||||
default=None, description="Text after the matched chunk"
|
||||
)
|
||||
has_before_truncation: Optional[bool] = Field(
|
||||
default=None, description="Whether before_context was truncated"
|
||||
)
|
||||
has_after_truncation: Optional[bool] = Field(
|
||||
default=None, description="Whether after_context was truncated"
|
||||
)
|
||||
|
||||
|
||||
class SemanticSearchResponse(BaseResponse):
|
||||
|
||||
@@ -37,7 +37,7 @@ class HealthCheckFilter(logging.Filter):
|
||||
"""
|
||||
# Check if the log message contains health check endpoints
|
||||
message = record.getMessage()
|
||||
return not any(
|
||||
health_check = any(
|
||||
endpoint in message
|
||||
for endpoint in [
|
||||
"/health/live",
|
||||
@@ -47,6 +47,8 @@ class HealthCheckFilter(logging.Filter):
|
||||
]
|
||||
)
|
||||
|
||||
return not health_check
|
||||
|
||||
|
||||
class TraceContextFormatter(JsonFormatter):
|
||||
"""
|
||||
@@ -58,7 +60,7 @@ class TraceContextFormatter(JsonFormatter):
|
||||
|
||||
def add_fields(
|
||||
self,
|
||||
log_record: dict[str, Any],
|
||||
log_data: dict[str, Any],
|
||||
record: logging.LogRecord,
|
||||
message_dict: dict[str, Any],
|
||||
) -> None:
|
||||
@@ -66,28 +68,28 @@ class TraceContextFormatter(JsonFormatter):
|
||||
Add custom fields to the log record, including trace context.
|
||||
|
||||
Args:
|
||||
log_record: Dictionary to be serialized as JSON
|
||||
log_data: Dictionary to be serialized as JSON
|
||||
record: LogRecord instance
|
||||
message_dict: Dictionary of extra fields from log call
|
||||
"""
|
||||
# Call parent to add standard fields
|
||||
super().add_fields(log_record, record, message_dict)
|
||||
super().add_fields(log_data, record, message_dict)
|
||||
|
||||
# Add trace context if available
|
||||
trace_context = get_trace_context()
|
||||
if trace_context:
|
||||
log_record["trace_id"] = trace_context.get("trace_id")
|
||||
log_record["span_id"] = trace_context.get("span_id")
|
||||
log_data["trace_id"] = trace_context.get("trace_id")
|
||||
log_data["span_id"] = trace_context.get("span_id")
|
||||
|
||||
# Add standard fields with consistent naming
|
||||
log_record["timestamp"] = self.formatTime(record)
|
||||
log_record["level"] = record.levelname
|
||||
log_record["logger"] = record.name
|
||||
log_record["message"] = record.getMessage()
|
||||
log_data["timestamp"] = self.formatTime(record)
|
||||
log_data["level"] = record.levelname
|
||||
log_data["logger"] = record.name
|
||||
log_data["message"] = record.getMessage()
|
||||
|
||||
# Include exception info if present
|
||||
if record.exc_info:
|
||||
log_record["exception"] = self.formatException(record.exc_info)
|
||||
log_data["exception"] = self.formatException(record.exc_info)
|
||||
|
||||
|
||||
class TraceContextTextFormatter(logging.Formatter):
|
||||
|
||||
@@ -53,10 +53,11 @@ def setup_tracing(
|
||||
global _tracer
|
||||
|
||||
# Create resource with service name
|
||||
pkg_name = __package__.split(".")[0] if __package__ else "nextcloud_mcp_server"
|
||||
resource = Resource.create(
|
||||
{
|
||||
"service.name": service_name,
|
||||
"service.version": version(__package__.split(".")[0]),
|
||||
"service.version": version(pkg_name),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -4,12 +4,14 @@ from .anthropic import AnthropicProvider
|
||||
from .base import Provider
|
||||
from .bedrock import BedrockProvider
|
||||
from .ollama import OllamaProvider
|
||||
from .openai import OpenAIProvider
|
||||
from .registry import get_provider, reset_provider
|
||||
from .simple import SimpleProvider
|
||||
|
||||
__all__ = [
|
||||
"Provider",
|
||||
"OllamaProvider",
|
||||
"OpenAIProvider",
|
||||
"AnthropicProvider",
|
||||
"SimpleProvider",
|
||||
"BedrockProvider",
|
||||
|
||||
@@ -17,18 +17,20 @@ class AnthropicProvider(Provider):
|
||||
Note: Anthropic doesn't provide embedding models, only text generation.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"):
|
||||
def __init__(
|
||||
self, api_key: str, generation_model: str = "claude-3-5-sonnet-20241022"
|
||||
):
|
||||
"""
|
||||
Initialize Anthropic provider.
|
||||
|
||||
Args:
|
||||
api_key: Anthropic API key
|
||||
model: Model name (e.g., "claude-3-5-sonnet-20241022")
|
||||
generation_model: Model name (e.g., "claude-3-5-sonnet-20241022")
|
||||
"""
|
||||
self.client = AsyncAnthropic(api_key=api_key)
|
||||
self.model = model
|
||||
self.model = generation_model
|
||||
|
||||
logger.info(f"Initialized Anthropic provider (model={model})")
|
||||
logger.info(f"Initialized Anthropic provider (model={self.model})")
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
|
||||
@@ -92,14 +92,21 @@ class OllamaProvider(Provider):
|
||||
response.raise_for_status()
|
||||
return response.json()["embedding"]
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
async def embed_batch(
|
||||
self, texts: list[str], batch_size: int = 32
|
||||
) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts (batched requests).
|
||||
Generate embeddings for multiple texts using Ollama's batch API.
|
||||
|
||||
Note: Ollama doesn't have native batch API, so we send requests sequentially.
|
||||
Uses /api/embed endpoint with array input for efficient batch processing.
|
||||
Conservative batch size (32) prevents quality degradation observed in
|
||||
Ollama issue #6262 with larger batches.
|
||||
|
||||
Note: Ollama processes batches serially, not in parallel.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
batch_size: Maximum texts per batch (default: 32)
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
@@ -112,11 +119,17 @@ class OllamaProvider(Provider):
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
embedding = await self.embed(text)
|
||||
embeddings.append(embedding)
|
||||
return embeddings
|
||||
all_embeddings = []
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/embed",
|
||||
json={"model": self.embedding_model, "input": batch},
|
||||
)
|
||||
response.raise_for_status()
|
||||
all_embeddings.extend(response.json()["embeddings"])
|
||||
|
||||
return all_embeddings
|
||||
|
||||
async def _detect_dimension(self):
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,271 @@
|
||||
"""Unified OpenAI provider for embeddings and text generation.
|
||||
|
||||
Supports:
|
||||
- OpenAI's standard API
|
||||
- GitHub Models API (models.github.ai)
|
||||
- Any OpenAI-compatible API via base_url override
|
||||
"""
|
||||
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
import anyio
|
||||
from openai import AsyncOpenAI, RateLimitError
|
||||
|
||||
from .base import Provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Rate limit retry configuration
|
||||
MAX_RETRIES = 5
|
||||
INITIAL_RETRY_DELAY = 2.0 # seconds
|
||||
MAX_RETRY_DELAY = 60.0 # seconds
|
||||
|
||||
|
||||
def retry_on_rate_limit(func):
|
||||
"""Decorator to retry on OpenAI rate limit errors with exponential backoff."""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
retry_delay = INITIAL_RETRY_DELAY
|
||||
last_error: Exception | None = None
|
||||
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
return await func(*args, **kwargs)
|
||||
except RateLimitError as e:
|
||||
last_error = e
|
||||
if attempt < MAX_RETRIES:
|
||||
logger.warning(
|
||||
f"Rate limit hit (attempt {attempt}/{MAX_RETRIES}), "
|
||||
f"retrying in {retry_delay:.1f}s..."
|
||||
)
|
||||
await anyio.sleep(retry_delay)
|
||||
retry_delay = min(retry_delay * 2, MAX_RETRY_DELAY)
|
||||
|
||||
logger.error(f"Rate limit exceeded after {MAX_RETRIES} attempts")
|
||||
raise last_error # type: ignore[misc]
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
# Well-known embedding dimensions for OpenAI models
|
||||
OPENAI_EMBEDDING_DIMENSIONS: dict[str, int] = {
|
||||
"text-embedding-3-small": 1536,
|
||||
"text-embedding-3-large": 3072,
|
||||
"text-embedding-ada-002": 1536,
|
||||
# GitHub Models API uses openai/ prefix
|
||||
"openai/text-embedding-3-small": 1536,
|
||||
"openai/text-embedding-3-large": 3072,
|
||||
}
|
||||
|
||||
|
||||
class OpenAIProvider(Provider):
|
||||
"""
|
||||
OpenAI provider supporting both embeddings and text generation.
|
||||
|
||||
Works with:
|
||||
- OpenAI's standard API (api.openai.com)
|
||||
- GitHub Models API (models.github.ai)
|
||||
- Any OpenAI-compatible API (via base_url)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
base_url: str | None = None,
|
||||
embedding_model: str | None = None,
|
||||
generation_model: str | None = None,
|
||||
timeout: float = 120.0,
|
||||
):
|
||||
"""
|
||||
Initialize OpenAI provider.
|
||||
|
||||
Args:
|
||||
api_key: OpenAI API key (or GITHUB_TOKEN for GitHub Models)
|
||||
base_url: Base URL override (e.g., "https://models.github.ai/inference")
|
||||
embedding_model: Model for embeddings (e.g., "text-embedding-3-small").
|
||||
None disables embeddings.
|
||||
generation_model: Model for text generation (e.g., "gpt-4o-mini").
|
||||
None disables generation.
|
||||
timeout: HTTP timeout in seconds (default: 120)
|
||||
"""
|
||||
self.embedding_model = embedding_model
|
||||
self.generation_model = generation_model
|
||||
self._dimension: int | None = None
|
||||
|
||||
# Initialize async client
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
# Try to get known dimension without API call
|
||||
if embedding_model and embedding_model in OPENAI_EMBEDDING_DIMENSIONS:
|
||||
self._dimension = OPENAI_EMBEDDING_DIMENSIONS[embedding_model]
|
||||
|
||||
logger.info(
|
||||
f"Initialized OpenAI provider: base_url={base_url or 'default'} "
|
||||
f"(embedding_model={embedding_model}, generation_model={generation_model}, "
|
||||
f"dimension={self._dimension})"
|
||||
)
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
"""Whether this provider supports embedding generation."""
|
||||
return self.embedding_model is not None
|
||||
|
||||
@property
|
||||
def supports_generation(self) -> bool:
|
||||
"""Whether this provider supports text generation."""
|
||||
return self.generation_model is not None
|
||||
|
||||
@retry_on_rate_limit
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
assert self.embedding_model is not None # Type narrowing
|
||||
response = await self.client.embeddings.create(
|
||||
input=text,
|
||||
model=self.embedding_model,
|
||||
)
|
||||
|
||||
embedding = response.data[0].embedding
|
||||
|
||||
# Update dimension if not set
|
||||
if self._dimension is None:
|
||||
self._dimension = len(embedding)
|
||||
logger.info(
|
||||
f"Detected embedding dimension: {self._dimension} "
|
||||
f"for model {self.embedding_model}"
|
||||
)
|
||||
|
||||
return embedding
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts using OpenAI's batch API.
|
||||
|
||||
OpenAI supports up to 2048 inputs per request.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
# OpenAI supports batches up to 2048, but use smaller batches for safety
|
||||
batch_size = 100
|
||||
all_embeddings: list[list[float]] = []
|
||||
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
|
||||
# Use helper method with retry logic for each batch
|
||||
batch_embeddings = await self._embed_batch_request(batch)
|
||||
all_embeddings.extend(batch_embeddings)
|
||||
|
||||
# Update dimension if not set
|
||||
if self._dimension is None and batch_embeddings:
|
||||
self._dimension = len(batch_embeddings[0])
|
||||
logger.info(
|
||||
f"Detected embedding dimension: {self._dimension} "
|
||||
f"for model {self.embedding_model}"
|
||||
)
|
||||
|
||||
return all_embeddings
|
||||
|
||||
@retry_on_rate_limit
|
||||
async def _embed_batch_request(self, batch: list[str]) -> list[list[float]]:
|
||||
"""Make a single batch embedding request with retry logic."""
|
||||
assert self.embedding_model is not None # Type narrowing
|
||||
response = await self.client.embeddings.create(
|
||||
input=batch,
|
||||
model=self.embedding_model,
|
||||
)
|
||||
# Sort by index to maintain order
|
||||
sorted_data = sorted(response.data, key=lambda x: x.index)
|
||||
return [item.embedding for item in sorted_data]
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension.
|
||||
|
||||
Returns:
|
||||
Vector dimension for the configured embedding model
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
RuntimeError: If dimension not detected yet (call embed first)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
if self._dimension is None:
|
||||
raise RuntimeError(
|
||||
f"Embedding dimension not detected yet for model {self.embedding_model}. "
|
||||
"Call embed() first or use a known model."
|
||||
)
|
||||
return self._dimension
|
||||
|
||||
@retry_on_rate_limit
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""
|
||||
Generate text from a prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If generation not enabled (no generation_model)
|
||||
"""
|
||||
if not self.supports_generation:
|
||||
raise NotImplementedError(
|
||||
"Text generation not supported - no generation_model configured"
|
||||
)
|
||||
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.generation_model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=max_tokens,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
return response.choices[0].message.content or ""
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close HTTP client."""
|
||||
await self.client.close()
|
||||
@@ -6,6 +6,7 @@ import os
|
||||
from .base import Provider
|
||||
from .bedrock import BedrockProvider
|
||||
from .ollama import OllamaProvider
|
||||
from .openai import OpenAIProvider
|
||||
from .simple import SimpleProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -17,8 +18,9 @@ class ProviderRegistry:
|
||||
|
||||
Checks environment variables in priority order and creates appropriate provider:
|
||||
1. Bedrock (AWS_REGION + BEDROCK_*_MODEL)
|
||||
2. Ollama (OLLAMA_BASE_URL)
|
||||
3. Simple (fallback for testing/development)
|
||||
2. OpenAI (OPENAI_API_KEY)
|
||||
3. Ollama (OLLAMA_BASE_URL)
|
||||
4. Simple (fallback for testing/development)
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@@ -28,8 +30,9 @@ class ProviderRegistry:
|
||||
|
||||
Priority order:
|
||||
1. Bedrock - if AWS_REGION or BEDROCK_EMBEDDING_MODEL is set
|
||||
2. Ollama - if OLLAMA_BASE_URL is set
|
||||
3. Simple - fallback for testing/development
|
||||
2. OpenAI - if OPENAI_API_KEY is set
|
||||
3. Ollama - if OLLAMA_BASE_URL is set
|
||||
4. Simple - fallback for testing/development
|
||||
|
||||
Returns:
|
||||
Provider instance
|
||||
@@ -42,6 +45,12 @@ class ProviderRegistry:
|
||||
- BEDROCK_EMBEDDING_MODEL: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
|
||||
- BEDROCK_GENERATION_MODEL: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
|
||||
|
||||
OpenAI:
|
||||
- OPENAI_API_KEY: OpenAI API key (or GITHUB_TOKEN for GitHub Models)
|
||||
- OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
|
||||
- OPENAI_EMBEDDING_MODEL: Model for embeddings (default: "text-embedding-3-small")
|
||||
- OPENAI_GENERATION_MODEL: Model for text generation (e.g., "gpt-4o-mini")
|
||||
|
||||
Ollama:
|
||||
- OLLAMA_BASE_URL: Ollama API base URL (e.g., "http://localhost:11434")
|
||||
- OLLAMA_EMBEDDING_MODEL: Model for embeddings (default: "nomic-embed-text")
|
||||
@@ -70,7 +79,28 @@ class ProviderRegistry:
|
||||
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
)
|
||||
|
||||
# 2. Check for Ollama
|
||||
# 2. Check for OpenAI
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
if openai_api_key:
|
||||
base_url = os.getenv("OPENAI_BASE_URL")
|
||||
embedding_model = os.getenv(
|
||||
"OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
|
||||
)
|
||||
generation_model = os.getenv("OPENAI_GENERATION_MODEL")
|
||||
|
||||
logger.info(
|
||||
f"Using OpenAI provider: base_url={base_url or 'default'}, "
|
||||
f"embedding_model={embedding_model}, "
|
||||
f"generation_model={generation_model}"
|
||||
)
|
||||
return OpenAIProvider(
|
||||
api_key=openai_api_key,
|
||||
base_url=base_url,
|
||||
embedding_model=embedding_model,
|
||||
generation_model=generation_model,
|
||||
)
|
||||
|
||||
# 3. Check for Ollama (local LLM)
|
||||
ollama_url = os.getenv("OLLAMA_BASE_URL")
|
||||
if ollama_url:
|
||||
embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
|
||||
@@ -89,12 +119,12 @@ class ProviderRegistry:
|
||||
verify_ssl=verify_ssl,
|
||||
)
|
||||
|
||||
# 3. Fallback to Simple provider for development/testing
|
||||
# 4. Fallback to Simple provider for development/testing
|
||||
dimension = int(os.getenv("SIMPLE_EMBEDDING_DIMENSION", "384"))
|
||||
logger.warning(
|
||||
"No provider configured (AWS_REGION, OLLAMA_BASE_URL not set). "
|
||||
"No provider configured (AWS_REGION, OPENAI_API_KEY, OLLAMA_BASE_URL not set). "
|
||||
"Using SimpleProvider for testing/development. "
|
||||
"For production, configure Bedrock or Ollama."
|
||||
"For production, configure Bedrock, OpenAI, or Ollama."
|
||||
)
|
||||
return SimpleProvider(dimension=dimension)
|
||||
|
||||
|
||||
@@ -83,6 +83,7 @@ async def get_indexed_doc_types(user_id: str) -> set[str]:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -97,17 +98,20 @@ async def get_indexed_doc_types(user_id: str) -> set[str]:
|
||||
scroll_results, _next_offset = await qdrant_client.scroll(
|
||||
collection_name=collection,
|
||||
scroll_filter=Filter(
|
||||
must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
|
||||
must=[
|
||||
get_placeholder_filter(), # Exclude placeholders from doc_type discovery
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
]
|
||||
),
|
||||
limit=1000, # Sample size to discover types
|
||||
with_payload=["doc_type"],
|
||||
with_vectors=False, # Don't need vectors for type discovery
|
||||
)
|
||||
|
||||
doc_types = {
|
||||
point.payload.get("doc_type")
|
||||
doc_types: set[str] = {
|
||||
str(point.payload.get("doc_type"))
|
||||
for point in scroll_results
|
||||
if point.payload.get("doc_type")
|
||||
if point.payload and point.payload.get("doc_type")
|
||||
}
|
||||
|
||||
logger.debug(f"Found indexed document types for user {user_id}: {doc_types}")
|
||||
@@ -123,7 +127,7 @@ class SearchResult:
|
||||
"""A single search result with metadata and score.
|
||||
|
||||
Attributes:
|
||||
id: Document ID
|
||||
id: Document ID (int for all document types)
|
||||
doc_type: Document type (note, file, calendar, contact, etc.)
|
||||
title: Document title
|
||||
excerpt: Content excerpt showing match context
|
||||
@@ -133,6 +137,11 @@ class SearchResult:
|
||||
metadata: Additional algorithm-specific metadata
|
||||
chunk_start_offset: Character position where chunk starts (None if not available)
|
||||
chunk_end_offset: Character position where chunk ends (None if not available)
|
||||
page_number: Page number for PDF documents (None for other doc types)
|
||||
page_count: Total number of pages in PDF document (None for other doc types)
|
||||
chunk_index: Zero-based index of this chunk in the document
|
||||
total_chunks: Total number of chunks in the document
|
||||
point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
|
||||
"""
|
||||
|
||||
id: int
|
||||
@@ -143,6 +152,11 @@ class SearchResult:
|
||||
metadata: dict[str, Any] | None = None
|
||||
chunk_start_offset: int | None = None
|
||||
chunk_end_offset: int | None = None
|
||||
page_number: int | None = None
|
||||
page_count: int | None = None
|
||||
chunk_index: int = 0
|
||||
total_chunks: int = 1
|
||||
point_id: str | None = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate score is non-negative.
|
||||
@@ -162,8 +176,15 @@ class SearchAlgorithm(ABC):
|
||||
|
||||
All search algorithms must implement the search() method with consistent
|
||||
interface, allowing them to be used interchangeably.
|
||||
|
||||
Attributes:
|
||||
query_embedding: The query embedding generated during the last search.
|
||||
Available after search() completes for algorithms that use embeddings.
|
||||
Can be reused by callers to avoid redundant embedding generation.
|
||||
"""
|
||||
|
||||
query_embedding: list[float] | None = None
|
||||
|
||||
@abstractmethod
|
||||
async def search(
|
||||
self,
|
||||
|
||||
@@ -9,7 +9,9 @@ from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.embedding import get_bm25_service, get_embedding_service
|
||||
from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
|
||||
from nextcloud_mcp_server.observability.tracing import trace_operation
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -72,6 +74,9 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
Returns unverified results from Qdrant. Access verification should be
|
||||
performed separately at the final output stage using verify_search_results().
|
||||
|
||||
Deduplicates by (doc_id, doc_type, chunk_start_offset, chunk_end_offset)
|
||||
to show multiple chunks from the same document while avoiding duplicate chunks.
|
||||
|
||||
Args:
|
||||
query: Natural language or keyword search query
|
||||
user_id: User ID for filtering
|
||||
@@ -95,13 +100,19 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
)
|
||||
|
||||
# Generate dense embedding for semantic search
|
||||
embedding_service = get_embedding_service()
|
||||
dense_embedding = await embedding_service.embed(query)
|
||||
with trace_operation("search.get_embedding_service"):
|
||||
embedding_service = get_embedding_service()
|
||||
with trace_operation("search.dense_embedding"):
|
||||
dense_embedding = await embedding_service.embed(query)
|
||||
# Store for reuse by callers (e.g., viz_routes PCA visualization)
|
||||
self.query_embedding = dense_embedding
|
||||
logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
|
||||
|
||||
# Generate sparse embedding for BM25 keyword search
|
||||
bm25_service = get_bm25_service()
|
||||
sparse_embedding = bm25_service.encode(query)
|
||||
with trace_operation("search.get_bm25_service"):
|
||||
bm25_service = get_bm25_service()
|
||||
with trace_operation("search.sparse_embedding_bm25"):
|
||||
sparse_embedding = await bm25_service.encode_async(query)
|
||||
logger.debug(
|
||||
f"Generated sparse embedding "
|
||||
f"({len(sparse_embedding['indices'])} non-zero terms)"
|
||||
@@ -109,10 +120,11 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
|
||||
# Build Qdrant filter
|
||||
filter_conditions = [
|
||||
get_placeholder_filter(), # Always exclude placeholders from user-facing queries
|
||||
FieldCondition(
|
||||
key="user_id",
|
||||
match=MatchValue(value=user_id),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
# Add doc_type filter if specified
|
||||
@@ -127,38 +139,44 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
query_filter = Filter(must=filter_conditions)
|
||||
|
||||
# Execute hybrid search with Qdrant native RRF fusion
|
||||
qdrant_client = await get_qdrant_client()
|
||||
with trace_operation("search.get_qdrant_client"):
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
try:
|
||||
# Use prefetch to run both dense and sparse searches
|
||||
# Qdrant will automatically merge results using RRF
|
||||
search_response = await qdrant_client.query_points(
|
||||
collection_name=settings.get_collection_name(),
|
||||
prefetch=[
|
||||
# Dense semantic search
|
||||
models.Prefetch(
|
||||
query=dense_embedding,
|
||||
using="dense",
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
filter=query_filter,
|
||||
),
|
||||
# Sparse BM25 search
|
||||
models.Prefetch(
|
||||
query=models.SparseVector(
|
||||
indices=sparse_embedding["indices"],
|
||||
values=sparse_embedding["values"],
|
||||
with trace_operation(
|
||||
"search.qdrant_query",
|
||||
attributes={"query.limit": limit * 2, "query.fusion": self.fusion_name},
|
||||
):
|
||||
search_response = await qdrant_client.query_points(
|
||||
collection_name=settings.get_collection_name(),
|
||||
prefetch=[
|
||||
# Dense semantic search
|
||||
models.Prefetch(
|
||||
query=dense_embedding,
|
||||
using="dense",
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
filter=query_filter,
|
||||
),
|
||||
using="sparse",
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
filter=query_filter,
|
||||
),
|
||||
],
|
||||
# Fusion query (RRF or DBSF based on initialization)
|
||||
query=models.FusionQuery(fusion=self.fusion),
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
score_threshold=score_threshold,
|
||||
with_payload=True,
|
||||
with_vectors=False, # Don't return vectors to save bandwidth
|
||||
)
|
||||
# Sparse BM25 search
|
||||
models.Prefetch(
|
||||
query=models.SparseVector(
|
||||
indices=sparse_embedding["indices"],
|
||||
values=sparse_embedding["values"],
|
||||
),
|
||||
using="sparse",
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
filter=query_filter,
|
||||
),
|
||||
],
|
||||
# Fusion query (RRF or DBSF based on initialization)
|
||||
query=models.FusionQuery(fusion=self.fusion),
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
score_threshold=score_threshold,
|
||||
with_payload=True,
|
||||
with_vectors=False, # Don't return vectors to save bandwidth
|
||||
)
|
||||
record_qdrant_operation("search", "success")
|
||||
except Exception:
|
||||
record_qdrant_operation("search", "error")
|
||||
@@ -176,41 +194,68 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
f"Top 3 {self.fusion_name.upper()} fusion scores: {top_scores}"
|
||||
)
|
||||
|
||||
# Deduplicate by (doc_id, doc_type) - multiple chunks per document
|
||||
seen_docs = set()
|
||||
results = []
|
||||
# Deduplicate by (doc_id, doc_type, chunk_start, chunk_end)
|
||||
# This allows multiple chunks from same doc, but removes duplicate chunks
|
||||
with trace_operation(
|
||||
"search.deduplicate",
|
||||
attributes={"dedupe.num_points": len(search_response.points)},
|
||||
):
|
||||
seen_chunks = set()
|
||||
results = []
|
||||
|
||||
for result in search_response.points:
|
||||
doc_id = int(result.payload["doc_id"])
|
||||
doc_type = result.payload.get("doc_type", "note")
|
||||
doc_key = (doc_id, doc_type)
|
||||
for result in search_response.points:
|
||||
if result.payload is None:
|
||||
continue
|
||||
# doc_id can be int (notes) or str (files - file paths)
|
||||
doc_id = result.payload["doc_id"]
|
||||
doc_type = result.payload.get("doc_type", "note")
|
||||
chunk_start = result.payload.get("chunk_start_offset")
|
||||
chunk_end = result.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, doc_type, chunk_start, chunk_end)
|
||||
|
||||
# Skip if we've already seen this document
|
||||
if doc_key in seen_docs:
|
||||
continue
|
||||
# Skip if we've already seen this exact chunk
|
||||
if chunk_key in seen_chunks:
|
||||
continue
|
||||
|
||||
seen_docs.add(doc_key)
|
||||
seen_chunks.add(chunk_key)
|
||||
|
||||
# Return unverified results (verification happens at output stage)
|
||||
results.append(
|
||||
SearchResult(
|
||||
id=doc_id,
|
||||
doc_type=doc_type,
|
||||
title=result.payload.get("title", "Untitled"),
|
||||
excerpt=result.payload.get("excerpt", ""),
|
||||
score=result.score, # Fusion score (RRF or DBSF)
|
||||
metadata={
|
||||
"chunk_index": result.payload.get("chunk_index"),
|
||||
"total_chunks": result.payload.get("total_chunks"),
|
||||
"search_method": f"bm25_hybrid_{self.fusion_name}",
|
||||
},
|
||||
chunk_start_offset=result.payload.get("chunk_start_offset"),
|
||||
chunk_end_offset=result.payload.get("chunk_end_offset"),
|
||||
# Build metadata dict with common fields
|
||||
metadata = {
|
||||
"chunk_index": result.payload.get("chunk_index"),
|
||||
"total_chunks": result.payload.get("total_chunks"),
|
||||
"search_method": f"bm25_hybrid_{self.fusion_name}",
|
||||
}
|
||||
|
||||
# Add file-specific metadata for PDF viewer
|
||||
if doc_type == "file" and (path := result.payload.get("file_path")):
|
||||
metadata["path"] = path
|
||||
|
||||
# Add deck_card-specific metadata for frontend URL construction
|
||||
if doc_type == "deck_card":
|
||||
if board_id := result.payload.get("board_id"):
|
||||
metadata["board_id"] = board_id
|
||||
|
||||
# Return unverified results (verification happens at output stage)
|
||||
results.append(
|
||||
SearchResult(
|
||||
id=doc_id,
|
||||
doc_type=doc_type,
|
||||
title=result.payload.get("title", "Untitled"),
|
||||
excerpt=result.payload.get("excerpt", ""),
|
||||
score=result.score, # Fusion score (RRF or DBSF)
|
||||
metadata=metadata,
|
||||
chunk_start_offset=result.payload.get("chunk_start_offset"),
|
||||
chunk_end_offset=result.payload.get("chunk_end_offset"),
|
||||
page_number=result.payload.get("page_number"),
|
||||
page_count=result.payload.get("page_count"),
|
||||
chunk_index=result.payload.get("chunk_index", 0),
|
||||
total_chunks=result.payload.get("total_chunks", 1),
|
||||
point_id=str(result.id), # Qdrant point ID for batch retrieval
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
logger.info(f"Returning {len(results)} unverified results after deduplication")
|
||||
if results:
|
||||
|
||||
@@ -0,0 +1,748 @@
|
||||
"""Context expansion for search results.
|
||||
|
||||
Provides utilities to expand matched chunks with surrounding context and
|
||||
position markers for better visualization and understanding of search results.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _get_chunk_from_qdrant(
|
||||
user_id: str, doc_id: int, doc_type: str, chunk_start: int, chunk_end: int
|
||||
) -> str | None:
|
||||
"""Retrieve full chunk text from Qdrant payload.
|
||||
|
||||
This avoids re-fetching and re-parsing documents by using the cached
|
||||
chunk content already stored in Qdrant.
|
||||
|
||||
Args:
|
||||
user_id: User ID who owns the document
|
||||
doc_id: Document ID
|
||||
doc_type: Document type (e.g., "note", "file")
|
||||
chunk_start: Character offset where chunk starts
|
||||
chunk_end: Character offset where chunk ends
|
||||
|
||||
Returns:
|
||||
Full chunk text from Qdrant excerpt field, or None if not found
|
||||
"""
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for the specific chunk
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
|
||||
FieldCondition(
|
||||
key="chunk_start_offset", match=MatchValue(value=chunk_start)
|
||||
),
|
||||
FieldCondition(
|
||||
key="chunk_end_offset", match=MatchValue(value=chunk_end)
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=["excerpt"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
excerpt = point.payload.get("excerpt")
|
||||
if excerpt:
|
||||
logger.debug(
|
||||
f"Retrieved chunk from Qdrant for {doc_type} {doc_id}: "
|
||||
f"{len(excerpt)} chars"
|
||||
)
|
||||
return str(excerpt)
|
||||
|
||||
logger.debug(
|
||||
f"Chunk not found in Qdrant for {doc_type} {doc_id}, "
|
||||
f"chunk [{chunk_start}:{chunk_end}]. Will fall back to document fetch."
|
||||
)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error querying Qdrant for chunk: {e}. Falling back to document fetch.",
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def _get_chunk_by_index_from_qdrant(
|
||||
user_id: str, doc_id: int, doc_type: str, chunk_index: int
|
||||
) -> str | None:
|
||||
"""Retrieve chunk text by chunk_index from Qdrant payload.
|
||||
|
||||
Used to fetch adjacent chunks for context expansion.
|
||||
|
||||
Args:
|
||||
user_id: User ID who owns the document
|
||||
doc_id: Document ID
|
||||
doc_type: Document type (e.g., "note", "file")
|
||||
chunk_index: Zero-based chunk index in document
|
||||
|
||||
Returns:
|
||||
Full chunk text from Qdrant excerpt field, or None if not found
|
||||
"""
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for chunk by index
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
|
||||
FieldCondition(
|
||||
key="chunk_index", match=MatchValue(value=chunk_index)
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=["excerpt"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
excerpt = point.payload.get("excerpt")
|
||||
if excerpt:
|
||||
logger.debug(
|
||||
f"Retrieved adjacent chunk {chunk_index} from Qdrant for "
|
||||
f"{doc_type} {doc_id}: {len(excerpt)} chars"
|
||||
)
|
||||
return str(excerpt)
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"Could not retrieve adjacent chunk {chunk_index} for "
|
||||
f"{doc_type} {doc_id}: {e}"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def _get_file_path_from_qdrant(
|
||||
user_id: str, file_id: int, chunk_start: int, chunk_end: int
|
||||
) -> str | None:
|
||||
"""Resolve file_id to file_path by querying Qdrant payload.
|
||||
|
||||
Args:
|
||||
user_id: User ID who owns the file
|
||||
file_id: Numeric file ID
|
||||
chunk_start: Character offset where chunk starts
|
||||
chunk_end: Character offset where chunk ends
|
||||
|
||||
Returns:
|
||||
File path string, or None if not found in Qdrant
|
||||
"""
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for the specific chunk
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=file_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="file")),
|
||||
FieldCondition(
|
||||
key="chunk_start_offset", match=MatchValue(value=chunk_start)
|
||||
),
|
||||
FieldCondition(
|
||||
key="chunk_end_offset", match=MatchValue(value=chunk_end)
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=["file_path"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
file_path = point.payload.get("file_path")
|
||||
if file_path:
|
||||
logger.debug(f"Resolved file_id {file_id} to file_path {file_path}")
|
||||
return str(file_path)
|
||||
|
||||
logger.warning(
|
||||
f"Could not find file_path in Qdrant for file_id {file_id}, "
|
||||
f"chunk [{chunk_start}:{chunk_end}]"
|
||||
)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error querying Qdrant for file_path: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
async def _get_deck_metadata_from_qdrant(
|
||||
user_id: str, card_id: int
|
||||
) -> dict[str, int] | None:
|
||||
"""Retrieve board_id and stack_id for a deck card from Qdrant payload.
|
||||
|
||||
Args:
|
||||
user_id: User ID who owns the card
|
||||
card_id: Card ID
|
||||
|
||||
Returns:
|
||||
Dictionary with board_id and stack_id, or None if not found
|
||||
"""
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for any chunk of this card (we just need metadata)
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=card_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="deck_card")),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=["board_id", "stack_id"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
board_id = point.payload.get("board_id")
|
||||
stack_id = point.payload.get("stack_id")
|
||||
if board_id is not None and stack_id is not None:
|
||||
logger.debug(
|
||||
f"Retrieved deck metadata for card {card_id}: "
|
||||
f"board_id={board_id}, stack_id={stack_id}"
|
||||
)
|
||||
return {"board_id": int(board_id), "stack_id": int(stack_id)}
|
||||
|
||||
logger.debug(
|
||||
f"Could not find deck metadata in Qdrant for card {card_id} "
|
||||
f"(might be legacy data without board_id/stack_id)"
|
||||
)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error querying Qdrant for deck metadata: {e}")
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChunkContext:
|
||||
"""Expanded chunk with surrounding context and position markers.
|
||||
|
||||
Attributes:
|
||||
chunk_text: The matched chunk text
|
||||
before_context: Text before the chunk (up to context_chars)
|
||||
after_context: Text after the chunk (up to context_chars)
|
||||
chunk_start_offset: Character position where chunk starts in document
|
||||
chunk_end_offset: Character position where chunk ends in document
|
||||
page_number: Page number for PDFs (None for other doc types)
|
||||
chunk_index: Zero-based chunk index (N in "chunk N of M")
|
||||
total_chunks: Total number of chunks in document
|
||||
marked_text: Full text with position markers around the chunk
|
||||
has_before_truncation: True if before_context was truncated
|
||||
has_after_truncation: True if after_context was truncated
|
||||
"""
|
||||
|
||||
chunk_text: str
|
||||
before_context: str
|
||||
after_context: str
|
||||
chunk_start_offset: int
|
||||
chunk_end_offset: int
|
||||
page_number: int | None
|
||||
chunk_index: int
|
||||
total_chunks: int
|
||||
marked_text: str
|
||||
has_before_truncation: bool
|
||||
has_after_truncation: bool
|
||||
|
||||
|
||||
async def get_chunk_with_context(
|
||||
nc_client: NextcloudClient,
|
||||
user_id: str,
|
||||
doc_id: str | int,
|
||||
doc_type: str,
|
||||
chunk_start: int,
|
||||
chunk_end: int,
|
||||
page_number: int | None = None,
|
||||
chunk_index: int = 0,
|
||||
total_chunks: int = 1,
|
||||
context_chars: int = 300,
|
||||
) -> ChunkContext | None:
|
||||
"""Fetch chunk with surrounding context.
|
||||
|
||||
First tries to retrieve the chunk from Qdrant (fast, cached). If that fails
|
||||
(e.g., legacy data with truncated excerpts), falls back to fetching and
|
||||
parsing the full document (slower, for PDFs especially).
|
||||
|
||||
Args:
|
||||
nc_client: Authenticated Nextcloud client
|
||||
user_id: User ID who owns the document
|
||||
doc_id: Document ID (int for notes/files)
|
||||
doc_type: Type of document ("note", "file", etc.)
|
||||
chunk_start: Character offset where chunk starts
|
||||
chunk_end: Character offset where chunk ends
|
||||
page_number: Optional page number for PDFs
|
||||
chunk_index: Zero-based chunk index in document
|
||||
total_chunks: Total number of chunks in document
|
||||
context_chars: Number of characters to include before/after chunk
|
||||
|
||||
Returns:
|
||||
ChunkContext with expanded context and markers, or None if document
|
||||
cannot be retrieved
|
||||
"""
|
||||
# Convert doc_id to int for Qdrant query
|
||||
doc_id_int = (
|
||||
int(doc_id)
|
||||
if isinstance(doc_id, str) and doc_id.isdigit()
|
||||
else (doc_id if isinstance(doc_id, int) else None)
|
||||
)
|
||||
|
||||
# Try to get chunk from Qdrant first (fast path)
|
||||
if doc_id_int is not None:
|
||||
chunk_text = await _get_chunk_from_qdrant(
|
||||
user_id, doc_id_int, doc_type, chunk_start, chunk_end
|
||||
)
|
||||
if chunk_text:
|
||||
logger.info(
|
||||
f"Retrieved chunk from Qdrant cache for {doc_type} {doc_id} "
|
||||
f"(avoids document re-fetch/re-parse)"
|
||||
)
|
||||
|
||||
# Fetch adjacent chunks for context expansion
|
||||
# Get chunk overlap from config to remove duplicate text
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
chunk_overlap = settings.document_chunk_overlap
|
||||
|
||||
before_context = ""
|
||||
after_context = ""
|
||||
has_before_truncation = False
|
||||
has_after_truncation = False
|
||||
|
||||
# Fetch previous chunk if not first chunk
|
||||
if chunk_index > 0:
|
||||
before_chunk = await _get_chunk_by_index_from_qdrant(
|
||||
user_id, doc_id_int, doc_type, chunk_index - 1
|
||||
)
|
||||
if before_chunk:
|
||||
# Remove overlap: the last chunk_overlap chars of previous chunk
|
||||
# overlap with the first chunk_overlap chars of current chunk
|
||||
before_context = (
|
||||
before_chunk[:-chunk_overlap]
|
||||
if len(before_chunk) > chunk_overlap
|
||||
else ""
|
||||
)
|
||||
# Truncate if requested context_chars < remaining length
|
||||
if before_context and len(before_context) > context_chars:
|
||||
before_context = before_context[-context_chars:]
|
||||
has_before_truncation = True
|
||||
else:
|
||||
# Could not fetch previous chunk, but we're not at start
|
||||
has_before_truncation = True
|
||||
|
||||
# Fetch next chunk if not last chunk
|
||||
if chunk_index < total_chunks - 1:
|
||||
after_chunk = await _get_chunk_by_index_from_qdrant(
|
||||
user_id, doc_id_int, doc_type, chunk_index + 1
|
||||
)
|
||||
if after_chunk:
|
||||
# Remove overlap: the first chunk_overlap chars of next chunk
|
||||
# overlap with the last chunk_overlap chars of current chunk
|
||||
after_context = (
|
||||
after_chunk[chunk_overlap:]
|
||||
if len(after_chunk) > chunk_overlap
|
||||
else ""
|
||||
)
|
||||
# Truncate if requested context_chars < remaining length
|
||||
if after_context and len(after_context) > context_chars:
|
||||
after_context = after_context[:context_chars]
|
||||
has_after_truncation = True
|
||||
else:
|
||||
# Could not fetch next chunk, but we're not at end
|
||||
has_after_truncation = True
|
||||
|
||||
marked_text = _insert_position_markers(
|
||||
before_context=before_context,
|
||||
chunk_text=chunk_text,
|
||||
after_context=after_context,
|
||||
page_number=page_number,
|
||||
chunk_index=chunk_index,
|
||||
total_chunks=total_chunks,
|
||||
has_before_truncation=has_before_truncation,
|
||||
has_after_truncation=has_after_truncation,
|
||||
)
|
||||
return ChunkContext(
|
||||
chunk_text=chunk_text,
|
||||
before_context=before_context,
|
||||
after_context=after_context,
|
||||
chunk_start_offset=chunk_start,
|
||||
chunk_end_offset=chunk_end,
|
||||
page_number=page_number,
|
||||
chunk_index=chunk_index,
|
||||
total_chunks=total_chunks,
|
||||
marked_text=marked_text,
|
||||
has_before_truncation=has_before_truncation,
|
||||
has_after_truncation=has_after_truncation,
|
||||
)
|
||||
|
||||
# Fallback: Fetch full document and extract chunk with context
|
||||
# This path is taken for:
|
||||
# 1. Legacy data with truncated excerpts in Qdrant
|
||||
# 2. Failed Qdrant queries
|
||||
logger.info(
|
||||
f"Falling back to document fetch for {doc_type} {doc_id} "
|
||||
f"(Qdrant cache miss, possibly legacy data)"
|
||||
)
|
||||
|
||||
# For files, retrieve file_path from Qdrant payload
|
||||
resolved_doc_id = doc_id
|
||||
if doc_type == "file" and isinstance(doc_id, int):
|
||||
file_path = await _get_file_path_from_qdrant(
|
||||
user_id, doc_id, chunk_start, chunk_end
|
||||
)
|
||||
if not file_path:
|
||||
logger.warning(
|
||||
f"Could not resolve file_id {doc_id} to file_path from Qdrant"
|
||||
)
|
||||
return None
|
||||
resolved_doc_id = file_path
|
||||
logger.debug(f"Resolved file_id {doc_id} to file_path {file_path}")
|
||||
|
||||
# Fetch full document text
|
||||
full_text = await _fetch_document_text(
|
||||
nc_client, resolved_doc_id, doc_type, user_id
|
||||
)
|
||||
if full_text is None:
|
||||
logger.warning(
|
||||
f"Could not fetch document text for {doc_type} {doc_id}, "
|
||||
"skipping context expansion"
|
||||
)
|
||||
return None
|
||||
|
||||
# Validate offsets
|
||||
if chunk_start < 0 or chunk_end > len(full_text) or chunk_start >= chunk_end:
|
||||
logger.warning(
|
||||
f"Invalid chunk offsets for {doc_type} {doc_id}: "
|
||||
f"start={chunk_start}, end={chunk_end}, doc_len={len(full_text)}"
|
||||
)
|
||||
return None
|
||||
|
||||
# Extract chunk text
|
||||
chunk_text = full_text[chunk_start:chunk_end]
|
||||
|
||||
# Calculate context boundaries
|
||||
context_start = max(0, chunk_start - context_chars)
|
||||
context_end = min(len(full_text), chunk_end + context_chars)
|
||||
|
||||
# Extract context
|
||||
before_context = full_text[context_start:chunk_start]
|
||||
after_context = full_text[chunk_end:context_end]
|
||||
|
||||
# Check for truncation
|
||||
has_before_truncation = context_start > 0
|
||||
has_after_truncation = context_end < len(full_text)
|
||||
|
||||
# Create marked text with position markers
|
||||
marked_text = _insert_position_markers(
|
||||
before_context=before_context,
|
||||
chunk_text=chunk_text,
|
||||
after_context=after_context,
|
||||
page_number=page_number,
|
||||
chunk_index=chunk_index,
|
||||
total_chunks=total_chunks,
|
||||
has_before_truncation=has_before_truncation,
|
||||
has_after_truncation=has_after_truncation,
|
||||
)
|
||||
|
||||
return ChunkContext(
|
||||
chunk_text=chunk_text,
|
||||
before_context=before_context,
|
||||
after_context=after_context,
|
||||
chunk_start_offset=chunk_start,
|
||||
chunk_end_offset=chunk_end,
|
||||
page_number=page_number,
|
||||
chunk_index=chunk_index,
|
||||
total_chunks=total_chunks,
|
||||
marked_text=marked_text,
|
||||
has_before_truncation=has_before_truncation,
|
||||
has_after_truncation=has_after_truncation,
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_document_text(
|
||||
nc_client: NextcloudClient, doc_id: str | int, doc_type: str, user_id: str
|
||||
) -> str | None:
|
||||
"""Fetch full text content of a document.
|
||||
|
||||
Args:
|
||||
nc_client: Authenticated Nextcloud client
|
||||
doc_id: Document ID (note ID or file path)
|
||||
doc_type: Type of document ("note", "file", etc.)
|
||||
|
||||
Returns:
|
||||
Full document text, or None if document cannot be retrieved
|
||||
"""
|
||||
try:
|
||||
if doc_type == "note":
|
||||
# Fetch note by ID
|
||||
note = await nc_client.notes.get_note(note_id=int(doc_id))
|
||||
# Reconstruct full content as indexed: title + "\n\n" + content
|
||||
# This ensures chunk offsets align with indexed content structure
|
||||
title = note.get("title", "")
|
||||
content = note.get("content", "")
|
||||
return f"{title}\n\n{content}"
|
||||
elif doc_type == "file":
|
||||
# Fetch file content via WebDAV
|
||||
try:
|
||||
file_path = str(doc_id)
|
||||
file_content, content_type = await nc_client.webdav.read_file(file_path)
|
||||
|
||||
# Check if it's a PDF (by content type or file extension)
|
||||
is_pdf = (
|
||||
content_type and "pdf" in content_type.lower()
|
||||
) or file_path.lower().endswith(".pdf")
|
||||
|
||||
if is_pdf:
|
||||
# Extract text from PDF using PyMuPDF
|
||||
# IMPORTANT: Use pymupdf4llm.to_markdown() to match indexing extraction
|
||||
# This ensures character offsets align between indexed chunks and retrieval
|
||||
import pymupdf
|
||||
import pymupdf4llm
|
||||
|
||||
logger.debug(f"Extracting text from PDF: {file_path}")
|
||||
pdf_doc = pymupdf.open(stream=file_content, filetype="pdf")
|
||||
text_parts = []
|
||||
|
||||
# Extract each page as markdown (same as indexing)
|
||||
for page_num in range(pdf_doc.page_count):
|
||||
page_md = pymupdf4llm.to_markdown(
|
||||
pdf_doc,
|
||||
pages=[page_num],
|
||||
write_images=False, # Don't need images for context
|
||||
page_chunks=False,
|
||||
)
|
||||
text_parts.append(page_md)
|
||||
|
||||
pdf_doc.close()
|
||||
|
||||
# Join pages (no separator - matches indexing)
|
||||
full_text = "".join(text_parts)
|
||||
logger.debug(
|
||||
f"Extracted {len(full_text)} characters from "
|
||||
f"{pdf_doc.page_count} pages in {file_path}"
|
||||
)
|
||||
return full_text
|
||||
else:
|
||||
# Assume it's a text file, decode to string
|
||||
logger.debug(f"Decoding text file: {file_path}")
|
||||
return file_content.decode("utf-8", errors="replace")
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error fetching file content for {doc_id}: {e}", exc_info=True
|
||||
)
|
||||
return None
|
||||
elif doc_type == "news_item":
|
||||
# Fetch news item by ID
|
||||
from nextcloud_mcp_server.vector.html_processor import html_to_markdown
|
||||
|
||||
item = await nc_client.news.get_item(int(doc_id))
|
||||
# Reconstruct full content as indexed: title + source + URL + body
|
||||
# This ensures chunk offsets align with indexed content structure
|
||||
body_markdown = html_to_markdown(item.get("body", ""))
|
||||
item_title = item.get("title", "")
|
||||
item_url = item.get("url", "")
|
||||
feed_title = item.get("feedTitle", "")
|
||||
|
||||
content_parts = [item_title]
|
||||
if feed_title:
|
||||
content_parts.append(f"Source: {feed_title}")
|
||||
if item_url:
|
||||
content_parts.append(f"URL: {item_url}")
|
||||
content_parts.append("") # Blank line
|
||||
content_parts.append(body_markdown)
|
||||
return "\n".join(content_parts)
|
||||
elif doc_type == "deck_card":
|
||||
# Fetch card from Deck API
|
||||
# Try to get board_id/stack_id from Qdrant metadata (O(1) lookup)
|
||||
# Otherwise fall back to iteration (legacy data)
|
||||
card = None
|
||||
deck_metadata = await _get_deck_metadata_from_qdrant(user_id, int(doc_id))
|
||||
|
||||
if deck_metadata:
|
||||
# Fast path: Direct lookup with known board_id/stack_id
|
||||
board_id = deck_metadata["board_id"]
|
||||
stack_id = deck_metadata["stack_id"]
|
||||
try:
|
||||
card = await nc_client.deck.get_card(
|
||||
board_id=board_id, stack_id=stack_id, card_id=int(doc_id)
|
||||
)
|
||||
logger.debug(
|
||||
f"Retrieved deck card {doc_id} using metadata "
|
||||
f"(board_id={board_id}, stack_id={stack_id})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to fetch card with metadata (board_id={board_id}, "
|
||||
f"stack_id={stack_id}, card_id={doc_id}): {e}, falling back to iteration"
|
||||
)
|
||||
|
||||
# Fallback: Iterate through all boards/stacks (for legacy data or if fast path failed)
|
||||
if card is None:
|
||||
boards = await nc_client.deck.get_boards()
|
||||
card_found = False
|
||||
|
||||
for board in boards:
|
||||
if card_found:
|
||||
break
|
||||
|
||||
# Skip deleted boards (soft delete: deletedAt > 0)
|
||||
if board.deletedAt > 0:
|
||||
logger.debug(
|
||||
f"Skipping deleted board {board.id} while searching for card {doc_id}"
|
||||
)
|
||||
continue
|
||||
|
||||
stacks = await nc_client.deck.get_stacks(board.id)
|
||||
|
||||
for stack in stacks:
|
||||
if card_found:
|
||||
break
|
||||
if stack.cards:
|
||||
for c in stack.cards:
|
||||
if c.id == int(doc_id):
|
||||
card = c
|
||||
card_found = True
|
||||
logger.debug(
|
||||
f"Found deck card {doc_id} in board {board.id}, "
|
||||
f"stack {stack.id} (fallback iteration)"
|
||||
)
|
||||
break
|
||||
|
||||
if not card_found:
|
||||
logger.warning(f"Deck card {doc_id} not found in any board/stack")
|
||||
return None
|
||||
|
||||
# Type narrowing: card is set if we reach here
|
||||
assert card is not None
|
||||
|
||||
# Reconstruct full content as indexed: title + "\n\n" + description
|
||||
# This ensures chunk offsets align with indexed content structure
|
||||
content_parts = [card.title]
|
||||
if card.description:
|
||||
content_parts.append(card.description)
|
||||
return "\n\n".join(content_parts)
|
||||
else:
|
||||
logger.warning(f"Unsupported doc_type for context expansion: {doc_type}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching document {doc_type} {doc_id}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
def _insert_position_markers(
|
||||
before_context: str,
|
||||
chunk_text: str,
|
||||
after_context: str,
|
||||
page_number: int | None,
|
||||
chunk_index: int,
|
||||
total_chunks: int,
|
||||
has_before_truncation: bool,
|
||||
has_after_truncation: bool,
|
||||
) -> str:
|
||||
"""Insert position markers around matched chunk.
|
||||
|
||||
Creates markdown-formatted text with visual markers indicating chunk
|
||||
boundaries and metadata.
|
||||
|
||||
Args:
|
||||
before_context: Text before chunk
|
||||
chunk_text: The matched chunk
|
||||
after_context: Text after chunk
|
||||
page_number: Optional page number
|
||||
chunk_index: Zero-based chunk index
|
||||
total_chunks: Total chunks in document
|
||||
has_before_truncation: Whether before_context is truncated
|
||||
has_after_truncation: Whether after_context is truncated
|
||||
|
||||
Returns:
|
||||
Formatted text with position markers
|
||||
"""
|
||||
# Build position metadata
|
||||
position_parts = []
|
||||
if page_number is not None:
|
||||
position_parts.append(f"Page {page_number}")
|
||||
position_parts.append(f"Chunk {chunk_index + 1} of {total_chunks}")
|
||||
position_metadata = ", ".join(position_parts)
|
||||
|
||||
# Build marked text
|
||||
parts = []
|
||||
|
||||
# Add truncation indicator for before context
|
||||
if has_before_truncation:
|
||||
parts.append("**[...]**\n\n")
|
||||
|
||||
# Add before context if present
|
||||
if before_context:
|
||||
parts.append(before_context)
|
||||
|
||||
# Add chunk start marker
|
||||
parts.append(f"\n\n🔍 **MATCHED CHUNK START** ({position_metadata})\n\n")
|
||||
|
||||
# Add chunk text
|
||||
parts.append(chunk_text)
|
||||
|
||||
# Add chunk end marker
|
||||
parts.append("\n\n🔍 **MATCHED CHUNK END**\n\n")
|
||||
|
||||
# Add after context if present
|
||||
if after_context:
|
||||
parts.append(after_context)
|
||||
|
||||
# Add truncation indicator for after context
|
||||
if has_after_truncation:
|
||||
parts.append("\n\n**[...]**")
|
||||
|
||||
return "".join(parts)
|
||||
@@ -0,0 +1,907 @@
|
||||
"""PDF chunk highlighting utilities for vector visualization.
|
||||
|
||||
This module provides utilities to generate highlighted page images showing
|
||||
matched chunks and their context from semantic search results.
|
||||
|
||||
The highlighting uses character offsets to precisely locate chunks within
|
||||
PDF documents, ensuring accurate highlighting even when text formatting
|
||||
varies between indexing and rendering.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import pymupdf
|
||||
import pymupdf4llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PDFHighlighter:
|
||||
"""Generate highlighted page images from PDF chunks."""
|
||||
|
||||
# Color definitions (RGB, 0-1 range)
|
||||
COLORS = {
|
||||
"yellow": [1, 1, 0],
|
||||
"red": [1, 0, 0],
|
||||
"green": [0, 1, 0],
|
||||
"blue": [0, 0, 1],
|
||||
"orange": [1, 0.5, 0],
|
||||
"pink": [1, 0, 1],
|
||||
"gray": [0.7, 0.7, 0.7],
|
||||
"light_blue": [0.7, 0.9, 1.0],
|
||||
"light_green": [0.7, 1.0, 0.7],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def strip_markdown(text: str) -> str:
|
||||
"""Remove markdown formatting to improve search accuracy.
|
||||
|
||||
Args:
|
||||
text: Text with potential markdown formatting
|
||||
|
||||
Returns:
|
||||
Plain text with markdown removed
|
||||
"""
|
||||
# Remove bold/italic markers
|
||||
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
|
||||
text = re.sub(r"\*(.+?)\*", r"\1", text)
|
||||
text = re.sub(r"__(.+?)__", r"\1", text)
|
||||
text = re.sub(r"_(.+?)_", r"\1", text)
|
||||
|
||||
# Remove headers
|
||||
text = re.sub(r"^#+\s+", "", text, flags=re.MULTILINE)
|
||||
|
||||
# Remove inline code
|
||||
text = re.sub(r"`(.+?)`", r"\1", text)
|
||||
|
||||
return text.strip()
|
||||
|
||||
@staticmethod
|
||||
def extract_pdf_text_with_boundaries(
|
||||
pdf_doc: pymupdf.Document,
|
||||
) -> tuple[str, list[dict]]:
|
||||
"""Extract full document text with page boundary tracking.
|
||||
|
||||
Uses pymupdf4llm.to_markdown() for consistency with indexing.
|
||||
|
||||
IMPORTANT: Must use write_images=True to match PyMuPDFProcessor behavior!
|
||||
Even though we don't need the images, we need the image references in the
|
||||
markdown text to maintain consistent character offsets with indexing.
|
||||
|
||||
Args:
|
||||
pdf_doc: Open PyMuPDF document
|
||||
|
||||
Returns:
|
||||
Tuple of (full_text, page_boundaries) where page_boundaries is a list of:
|
||||
{"page": 1, "start_offset": 0, "end_offset": 1234}
|
||||
"""
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
page_boundaries = []
|
||||
text_parts = []
|
||||
current_offset = 0
|
||||
|
||||
# Use temp directory for image output (images are discarded after extraction)
|
||||
temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_"))
|
||||
|
||||
for page_idx in range(pdf_doc.page_count):
|
||||
page_md = pymupdf4llm.to_markdown(
|
||||
pdf_doc,
|
||||
pages=[page_idx],
|
||||
write_images=True, # Must match indexing! Otherwise offsets misalign
|
||||
image_path=temp_dir,
|
||||
page_chunks=False,
|
||||
)
|
||||
|
||||
page_boundaries.append(
|
||||
{
|
||||
"page": page_idx + 1, # 1-indexed
|
||||
"start_offset": current_offset,
|
||||
"end_offset": current_offset + len(page_md),
|
||||
}
|
||||
)
|
||||
|
||||
text_parts.append(page_md)
|
||||
current_offset += len(page_md)
|
||||
|
||||
full_text = "".join(text_parts)
|
||||
|
||||
# Clean up temp directory and extracted images
|
||||
import shutil
|
||||
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clean up temp directory {temp_dir}: {e}")
|
||||
|
||||
return full_text, page_boundaries
|
||||
|
||||
@staticmethod
|
||||
def find_chunk_page(
|
||||
chunk_start_offset: int,
|
||||
chunk_end_offset: int,
|
||||
page_boundaries: list[dict],
|
||||
) -> Optional[dict]:
|
||||
"""Find which page contains the most of a given chunk.
|
||||
|
||||
Args:
|
||||
chunk_start_offset: Chunk start position in full document
|
||||
chunk_end_offset: Chunk end position in full document
|
||||
page_boundaries: Page boundary list from extract_pdf_text_with_boundaries()
|
||||
|
||||
Returns:
|
||||
Dict with keys: page_num, overlap_chars, page_relative_start, page_relative_end
|
||||
or None if chunk not found on any page
|
||||
"""
|
||||
chunk_pages = []
|
||||
|
||||
for boundary in page_boundaries:
|
||||
page_start = boundary["start_offset"]
|
||||
page_end = boundary["end_offset"]
|
||||
|
||||
# Check if chunk overlaps with this page
|
||||
if chunk_start_offset < page_end and chunk_end_offset > page_start:
|
||||
overlap_start = max(chunk_start_offset, page_start)
|
||||
overlap_end = min(chunk_end_offset, page_end)
|
||||
overlap_chars = overlap_end - overlap_start
|
||||
|
||||
chunk_pages.append(
|
||||
{
|
||||
"page_num": boundary["page"],
|
||||
"overlap_chars": overlap_chars,
|
||||
"page_relative_start": overlap_start - page_start,
|
||||
"page_relative_end": overlap_end - page_start,
|
||||
}
|
||||
)
|
||||
|
||||
if not chunk_pages:
|
||||
return None
|
||||
|
||||
# Return page with maximum overlap
|
||||
return max(chunk_pages, key=lambda p: p["overlap_chars"])
|
||||
|
||||
@staticmethod
|
||||
def highlight_chunk_by_word_positions(
|
||||
page: pymupdf.Page,
|
||||
chunk_text: str,
|
||||
color: str = "yellow",
|
||||
search_region: tuple[float, float, float, float] | None = None,
|
||||
) -> int:
|
||||
"""Highlight chunk using word-position matching.
|
||||
|
||||
This method matches words from the chunk to their positions on the PDF page,
|
||||
avoiding text search mismatches between markdown-formatted text and raw PDF text.
|
||||
|
||||
Args:
|
||||
page: PyMuPDF page object
|
||||
chunk_text: Text to highlight (may contain markdown)
|
||||
color: Color name from COLORS dict
|
||||
search_region: Optional (x0, y0, x1, y1) bounding box to constrain search.
|
||||
If provided, only words within this region are considered.
|
||||
|
||||
Returns:
|
||||
Number of highlight rectangles added
|
||||
"""
|
||||
# Tokenize chunk into words (alphanumeric only, lowercase)
|
||||
chunk_words = re.findall(
|
||||
r"\w+", PDFHighlighter.strip_markdown(chunk_text).lower()
|
||||
)
|
||||
|
||||
if not chunk_words:
|
||||
logger.warning("No words found in chunk text")
|
||||
return 0
|
||||
|
||||
# Get all words from page with positions
|
||||
# Format: (x0, y0, x1, y1, "word", block_no, line_no, word_no)
|
||||
try:
|
||||
page_words = page.get_text("words")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract words from page: {e}")
|
||||
return 0
|
||||
|
||||
if not page_words:
|
||||
logger.warning("No words found on page")
|
||||
return 0
|
||||
|
||||
# Filter words by search region if provided
|
||||
if search_region:
|
||||
rx0, ry0, rx1, ry1 = search_region
|
||||
# Allow some tolerance (10 points) for words near region boundary
|
||||
tolerance = 10
|
||||
page_words = [
|
||||
w
|
||||
for w in page_words
|
||||
if (
|
||||
w[0] >= rx0 - tolerance
|
||||
and w[2] <= rx1 + tolerance
|
||||
and w[1] >= ry0 - tolerance
|
||||
and w[3] <= ry1 + tolerance
|
||||
)
|
||||
]
|
||||
logger.debug(
|
||||
f"Filtered to {len(page_words)} words in region "
|
||||
f"({rx0:.0f}, {ry0:.0f}, {rx1:.0f}, {ry1:.0f})"
|
||||
)
|
||||
|
||||
if not page_words:
|
||||
logger.warning("No words found in search region")
|
||||
return 0
|
||||
|
||||
# Find matching word sequence - use FIRST match, not longest
|
||||
# This ensures we highlight the actual chunk location, not similar text elsewhere
|
||||
matches = []
|
||||
|
||||
# Build a simple word-to-positions index for the first few chunk words
|
||||
# to find candidate starting positions
|
||||
first_chunk_word = chunk_words[0] if chunk_words else ""
|
||||
candidate_starts = []
|
||||
|
||||
for i, pw in enumerate(page_words):
|
||||
page_word = pw[4].lower()
|
||||
# Check if this could be the start of the chunk
|
||||
if (
|
||||
first_chunk_word == page_word
|
||||
or first_chunk_word in page_word
|
||||
or page_word in first_chunk_word
|
||||
):
|
||||
candidate_starts.append(i)
|
||||
|
||||
# Try each candidate start position and take the FIRST good match
|
||||
for start_pos in candidate_starts:
|
||||
current_matches = []
|
||||
chunk_idx = 0
|
||||
skip_count = 0
|
||||
max_skips = 3 # Allow some formatting differences
|
||||
|
||||
for page_idx in range(start_pos, len(page_words)):
|
||||
if chunk_idx >= len(chunk_words):
|
||||
break
|
||||
|
||||
page_word = page_words[page_idx][4].lower()
|
||||
chunk_word = chunk_words[chunk_idx]
|
||||
|
||||
# Check for match (allow partial matches for flexibility)
|
||||
if (
|
||||
chunk_word == page_word
|
||||
or chunk_word in page_word
|
||||
or page_word in chunk_word
|
||||
):
|
||||
current_matches.append(page_words[page_idx])
|
||||
chunk_idx += 1
|
||||
skip_count = 0
|
||||
elif skip_count < max_skips:
|
||||
# Allow skipping some words (formatting, punctuation)
|
||||
skip_count += 1
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
# Accept if we matched at least 50% of chunk words
|
||||
if len(current_matches) >= len(chunk_words) * 0.5:
|
||||
matches = current_matches
|
||||
logger.debug(
|
||||
f"Found match at position {start_pos}: "
|
||||
f"{len(matches)}/{len(chunk_words)} words"
|
||||
)
|
||||
break # Take FIRST match, not best/longest
|
||||
|
||||
if not matches:
|
||||
logger.debug(f"No word matches found (chunk has {len(chunk_words)} words)")
|
||||
return 0
|
||||
|
||||
logger.debug(
|
||||
f"Matched {len(matches)} words out of {len(chunk_words)} chunk words"
|
||||
)
|
||||
|
||||
# Build rectangles from matched words
|
||||
rects = [pymupdf.Rect(w[0], w[1], w[2], w[3]) for w in matches]
|
||||
|
||||
# Check if matches are contiguous (not scattered across the page)
|
||||
# Scattered matches indicate false positives from common words
|
||||
if len(rects) > 1:
|
||||
# Sort by vertical position then horizontal
|
||||
sorted_matches = sorted(matches, key=lambda w: (round(w[1]), w[0]))
|
||||
|
||||
# Check for large vertical gaps (more than ~2 lines apart)
|
||||
# A typical line height is 12-20 points
|
||||
max_line_gap = 50 # Points - allows for ~2-3 lines gap
|
||||
prev_y = sorted_matches[0][1]
|
||||
large_gaps = 0
|
||||
|
||||
for match in sorted_matches[1:]:
|
||||
y_gap = match[1] - prev_y
|
||||
if y_gap > max_line_gap:
|
||||
large_gaps += 1
|
||||
prev_y = match[1]
|
||||
|
||||
# If matches are scattered (many large gaps), reject this match
|
||||
# A chunk should be mostly contiguous text
|
||||
if large_gaps > len(matches) * 0.3: # More than 30% have gaps
|
||||
logger.debug(
|
||||
f"Rejecting scattered matches: {large_gaps} large gaps "
|
||||
f"out of {len(matches)} matches"
|
||||
)
|
||||
return 0
|
||||
|
||||
# Merge adjacent rectangles on the same line for cleaner highlighting
|
||||
merged_rects = []
|
||||
sorted_rects = sorted(rects, key=lambda r: (round(r.y0), r.x0))
|
||||
|
||||
current_rect = None
|
||||
for rect in sorted_rects:
|
||||
if current_rect is None:
|
||||
current_rect = rect
|
||||
elif abs(rect.y0 - current_rect.y0) < 5: # Same line (within 5 points)
|
||||
current_rect = current_rect | rect # Union
|
||||
else:
|
||||
merged_rects.append(current_rect)
|
||||
current_rect = rect
|
||||
|
||||
if current_rect:
|
||||
merged_rects.append(current_rect)
|
||||
|
||||
# Add highlights
|
||||
rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
|
||||
for rect in merged_rects:
|
||||
highlight = page.add_highlight_annot(rect)
|
||||
highlight.set_colors({"stroke": rgb})
|
||||
highlight.set_info(
|
||||
content="Chunk from semantic search",
|
||||
title="PDF Highlighter (word-position)",
|
||||
)
|
||||
highlight.update()
|
||||
|
||||
return len(merged_rects)
|
||||
|
||||
@staticmethod
|
||||
def find_unique_phrase(
|
||||
text: str, min_len: int = 30, max_len: int = 80
|
||||
) -> str | None:
|
||||
"""Find a relatively unique phrase from text for location search.
|
||||
|
||||
Looks for phrases that are likely to be unique on the page:
|
||||
- Prefers phrases with numbers or special terms
|
||||
- Avoids very common words
|
||||
|
||||
Args:
|
||||
text: Source text to extract phrase from
|
||||
min_len: Minimum phrase length
|
||||
max_len: Maximum phrase length
|
||||
|
||||
Returns:
|
||||
A phrase likely to be unique, or None if not found
|
||||
"""
|
||||
clean_text = PDFHighlighter.strip_markdown(text).strip()
|
||||
if not clean_text:
|
||||
return None
|
||||
|
||||
# Try first sentence (often unique due to context)
|
||||
sentences = re.split(r"[.!?]\s+", clean_text)
|
||||
for sentence in sentences:
|
||||
sentence = sentence.strip()
|
||||
if min_len <= len(sentence) <= max_len:
|
||||
return sentence
|
||||
elif len(sentence) > max_len:
|
||||
return sentence[:max_len]
|
||||
|
||||
# Fallback: first N chars
|
||||
if len(clean_text) >= min_len:
|
||||
return clean_text[:max_len]
|
||||
|
||||
return clean_text if clean_text else None
|
||||
|
||||
@staticmethod
|
||||
def _find_chunk_bbox(
|
||||
page: pymupdf.Page,
|
||||
chunk_text: str,
|
||||
page_relative_start: int,
|
||||
page_relative_end: int,
|
||||
page_text_length: int,
|
||||
) -> tuple[float, float, float, float] | None:
|
||||
"""Find bounding box for a chunk without modifying the page.
|
||||
|
||||
Returns (x0, y0, x1, y1) in page coordinates, or None if not found.
|
||||
"""
|
||||
page_rect = page.rect
|
||||
|
||||
# Strip markdown for searching
|
||||
search_text = PDFHighlighter.strip_markdown(chunk_text)
|
||||
|
||||
# Try to find chunk location using text search
|
||||
anchor_rect = None
|
||||
search_phrases = []
|
||||
|
||||
# Build search phrases from chunk text
|
||||
sentences = re.split(r"[.!?]\s+", search_text)
|
||||
for sentence in sentences[:3]:
|
||||
sentence = sentence.strip()
|
||||
if len(sentence) >= 20:
|
||||
search_phrases.append(sentence[:80])
|
||||
if len(sentence) >= 40:
|
||||
search_phrases.append(sentence[:40])
|
||||
|
||||
# Also try first N characters
|
||||
if len(search_text) >= 30:
|
||||
search_phrases.append(search_text[:60])
|
||||
search_phrases.append(search_text[:30])
|
||||
|
||||
for phrase in search_phrases:
|
||||
if not phrase:
|
||||
continue
|
||||
rects = page.search_for(phrase.strip())
|
||||
if rects:
|
||||
anchor_rect = rects[0]
|
||||
break
|
||||
|
||||
if not anchor_rect:
|
||||
return None
|
||||
|
||||
# Calculate chunk height based on character count
|
||||
chunk_chars = len(search_text)
|
||||
estimated_lines = max(1, chunk_chars / 60)
|
||||
estimated_height = estimated_lines * 14
|
||||
|
||||
# Build bounding box
|
||||
return (
|
||||
page_rect.x0 + 30, # Left margin
|
||||
anchor_rect.y0 - 5, # Start slightly above anchor
|
||||
page_rect.x1 - 30, # Right margin
|
||||
min(anchor_rect.y0 + estimated_height + 10, page_rect.y1 - 30),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def highlight_chunk_on_page(
|
||||
page: pymupdf.Page,
|
||||
chunk_text: str,
|
||||
color: str = "yellow",
|
||||
page_relative_start: int | None = None,
|
||||
page_relative_end: int | None = None,
|
||||
page_text_length: int | None = None,
|
||||
) -> int:
|
||||
"""Add bounding box highlight to a PDF page for the given chunk text.
|
||||
|
||||
Uses text search to find the chunk's location on the page, then draws
|
||||
a bounding box around that region. Falls back to character offset estimation
|
||||
if text search fails.
|
||||
|
||||
Args:
|
||||
page: PyMuPDF page object
|
||||
chunk_text: Text to highlight (may contain markdown)
|
||||
color: Color name from COLORS dict
|
||||
page_relative_start: Character offset where chunk starts on page (optional)
|
||||
page_relative_end: Character offset where chunk ends on page (optional)
|
||||
page_text_length: Total character length of page text (optional)
|
||||
|
||||
Returns:
|
||||
Number of highlights added (1 for bounding box, 0 if failed)
|
||||
"""
|
||||
page_rect = page.rect
|
||||
rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
|
||||
|
||||
# Strip markdown for searching
|
||||
search_text = PDFHighlighter.strip_markdown(chunk_text)
|
||||
|
||||
# Try to find chunk location using text search
|
||||
# Search for progressively shorter phrases until we find a match
|
||||
anchor_rect = None
|
||||
search_phrases = []
|
||||
|
||||
# Build search phrases from chunk text
|
||||
sentences = re.split(r"[.!?]\s+", search_text)
|
||||
for sentence in sentences[:3]: # Try first 3 sentences
|
||||
sentence = sentence.strip()
|
||||
if len(sentence) >= 20:
|
||||
search_phrases.append(sentence[:80])
|
||||
if len(sentence) >= 40:
|
||||
search_phrases.append(sentence[:40])
|
||||
|
||||
# Also try first N characters
|
||||
if len(search_text) >= 30:
|
||||
search_phrases.append(search_text[:60])
|
||||
search_phrases.append(search_text[:30])
|
||||
|
||||
for phrase in search_phrases:
|
||||
if not phrase:
|
||||
continue
|
||||
rects = page.search_for(phrase.strip())
|
||||
if rects:
|
||||
anchor_rect = rects[0] # Use first match
|
||||
logger.debug(f"Found chunk anchor using phrase: '{phrase[:30]}...'")
|
||||
break
|
||||
|
||||
if not anchor_rect:
|
||||
page_num = page.number + 1 if page.number is not None else "unknown"
|
||||
logger.warning(f"Could not find chunk text on page {page_num}")
|
||||
return 0
|
||||
|
||||
# Calculate chunk height based on character count
|
||||
# Estimate ~15 chars per line, ~12pt line height
|
||||
chunk_chars = len(search_text)
|
||||
estimated_lines = max(1, chunk_chars / 60) # ~60 chars per line typical
|
||||
estimated_height = estimated_lines * 14 # ~14pt per line
|
||||
|
||||
# Build bounding box starting from anchor
|
||||
chunk_rect = pymupdf.Rect(
|
||||
page_rect.x0 + 30, # Left margin
|
||||
anchor_rect.y0 - 5, # Start slightly above anchor
|
||||
page_rect.x1 - 30, # Right margin
|
||||
min(
|
||||
anchor_rect.y0 + estimated_height + 10, page_rect.y1 - 30
|
||||
), # Estimated bottom
|
||||
)
|
||||
|
||||
# Draw a visible rectangle around the chunk region
|
||||
shape = page.new_shape()
|
||||
shape.draw_rect(chunk_rect)
|
||||
shape.finish(
|
||||
color=rgb, # Border color
|
||||
fill=None, # No fill (transparent)
|
||||
width=2.5, # Border width
|
||||
dashes="[4 2]", # Dashed line
|
||||
)
|
||||
shape.commit()
|
||||
|
||||
# Add semi-transparent fill for visibility
|
||||
fill_shape = page.new_shape()
|
||||
fill_shape.draw_rect(chunk_rect)
|
||||
fill_shape.finish(
|
||||
color=None, # No border
|
||||
fill=[1, 1, 0.7], # Light yellow fill
|
||||
fill_opacity=0.15, # Very transparent
|
||||
)
|
||||
fill_shape.commit()
|
||||
|
||||
logger.debug(
|
||||
f"Added bounding box at y={chunk_rect.y0:.0f}-{chunk_rect.y1:.0f} "
|
||||
f"(estimated {estimated_lines:.1f} lines)"
|
||||
)
|
||||
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def highlight_chunk(
|
||||
pdf_bytes: bytes,
|
||||
chunk_start_offset: int,
|
||||
chunk_end_offset: int,
|
||||
stored_page_number: Optional[int] = None,
|
||||
color: str = "yellow",
|
||||
zoom: float = 2.0,
|
||||
) -> Optional[tuple[bytes, int, int]]:
|
||||
"""Generate PNG image of PDF page with highlighted chunk.
|
||||
|
||||
This is the main entry point for highlighting. It:
|
||||
1. Extracts document text with page boundaries
|
||||
2. Finds which page contains the chunk
|
||||
3. Extracts chunk text using character offsets
|
||||
4. Highlights the chunk on the page
|
||||
5. Renders page to PNG
|
||||
|
||||
Args:
|
||||
pdf_bytes: PDF file bytes
|
||||
chunk_start_offset: Chunk start position (document-level)
|
||||
chunk_end_offset: Chunk end position (document-level)
|
||||
stored_page_number: Page number from metadata (optional, for validation)
|
||||
color: Highlight color name
|
||||
zoom: Rendering zoom factor (2.0 = 144 DPI)
|
||||
|
||||
Returns:
|
||||
Tuple of (png_bytes, page_number, highlight_count) or None if failed
|
||||
"""
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
temp_pdf_path = None
|
||||
try:
|
||||
# Write PDF to temp file with consistent name "pdf.pdf"
|
||||
# This ensures image references match indexing (e.g., pdf-0001.png)
|
||||
# Different temp filenames would cause different markdown text lengths!
|
||||
temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_"))
|
||||
temp_pdf_path = temp_dir / "pdf.pdf"
|
||||
temp_pdf_path.write_bytes(pdf_bytes)
|
||||
|
||||
# Open PDF from temp file
|
||||
doc = pymupdf.open(temp_pdf_path)
|
||||
|
||||
# Extract text with page boundaries
|
||||
full_text, page_boundaries = (
|
||||
PDFHighlighter.extract_pdf_text_with_boundaries(doc)
|
||||
)
|
||||
|
||||
# Find which page contains the chunk
|
||||
chunk_page_info = PDFHighlighter.find_chunk_page(
|
||||
chunk_start_offset, chunk_end_offset, page_boundaries
|
||||
)
|
||||
|
||||
if not chunk_page_info:
|
||||
logger.error("Chunk not found on any page")
|
||||
doc.close()
|
||||
return None
|
||||
|
||||
page_num = chunk_page_info["page_num"]
|
||||
|
||||
# Log if page differs from stored metadata
|
||||
if stored_page_number and stored_page_number != page_num:
|
||||
logger.info(
|
||||
f"Chunk primarily on page {page_num}, metadata says {stored_page_number}"
|
||||
)
|
||||
|
||||
# Extract page text
|
||||
page_boundary = page_boundaries[page_num - 1]
|
||||
page_start = page_boundary["start_offset"]
|
||||
page_end = page_boundary["end_offset"]
|
||||
page_text = full_text[page_start:page_end]
|
||||
|
||||
# Extract chunk text using page-relative offsets
|
||||
page_relative_start = chunk_page_info["page_relative_start"]
|
||||
page_relative_end = chunk_page_info["page_relative_end"]
|
||||
chunk_text = page_text[page_relative_start:page_relative_end]
|
||||
|
||||
# Calculate page text length for region estimation
|
||||
page_text_length = page_end - page_start
|
||||
|
||||
logger.debug(
|
||||
f"Extracted {len(chunk_text)} chars on page {page_num} "
|
||||
f"(offsets {page_relative_start}-{page_relative_end} of {page_text_length})"
|
||||
)
|
||||
|
||||
# Get page and add highlights
|
||||
page = doc[page_num - 1]
|
||||
highlight_count = PDFHighlighter.highlight_chunk_on_page(
|
||||
page,
|
||||
chunk_text,
|
||||
color,
|
||||
page_relative_start=page_relative_start,
|
||||
page_relative_end=page_relative_end,
|
||||
page_text_length=page_text_length,
|
||||
)
|
||||
|
||||
if highlight_count == 0:
|
||||
logger.warning("No highlights added")
|
||||
doc.close()
|
||||
return None
|
||||
|
||||
# Render page to PNG
|
||||
mat = pymupdf.Matrix(zoom, zoom)
|
||||
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
png_bytes = pix.tobytes("png")
|
||||
|
||||
doc.close()
|
||||
|
||||
logger.info(
|
||||
f"Generated {len(png_bytes):,} byte image with {highlight_count} highlights"
|
||||
)
|
||||
|
||||
return (png_bytes, page_num, highlight_count)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error highlighting chunk: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
finally:
|
||||
# Clean up temp directory and PDF file
|
||||
if temp_pdf_path and temp_pdf_path.parent.exists():
|
||||
try:
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(temp_pdf_path.parent)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to delete temp directory {temp_pdf_path.parent}: {e}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def highlight_chunks_batch(
|
||||
pdf_bytes: bytes,
|
||||
chunks: list[tuple[int, int, int, int | None, str]],
|
||||
page_boundaries: list[dict],
|
||||
full_text: str,
|
||||
color: str = "yellow",
|
||||
zoom: float = 2.0,
|
||||
) -> dict[int, tuple[bytes, int, int]]:
|
||||
"""Generate highlighted images for multiple chunks.
|
||||
|
||||
Opens PDF once for rendering, uses pre-computed page boundaries from the
|
||||
document processor. This ensures consistent character offsets between
|
||||
chunking and highlighting.
|
||||
|
||||
Args:
|
||||
pdf_bytes: PDF file bytes
|
||||
chunks: List of (chunk_index, start_offset, end_offset, stored_page_number, chunk_text)
|
||||
The chunk_index is used as the key in the returned dict.
|
||||
chunk_text is the actual text content of the chunk.
|
||||
page_boundaries: Pre-computed page boundaries from document processor.
|
||||
Each entry: {"page": 1, "start_offset": 0, "end_offset": 1234}
|
||||
full_text: Full document text for extracting page-relative portions.
|
||||
color: Highlight color name
|
||||
zoom: Rendering zoom factor (2.0 = 144 DPI)
|
||||
|
||||
Returns:
|
||||
Dict mapping chunk_index to (png_bytes, page_number, highlight_count)
|
||||
Chunks that fail to highlight are omitted from the result.
|
||||
"""
|
||||
import shutil
|
||||
import tempfile
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
results: dict[int, tuple[bytes, int, int]] = {}
|
||||
|
||||
if not chunks:
|
||||
return results
|
||||
|
||||
temp_pdf_path = None
|
||||
try:
|
||||
# Write PDF to temp file
|
||||
temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_batch_"))
|
||||
temp_pdf_path = temp_dir / "pdf.pdf"
|
||||
temp_pdf_path.write_bytes(pdf_bytes)
|
||||
|
||||
# Open PDF once (only for rendering, not text extraction)
|
||||
doc = pymupdf.open(temp_pdf_path)
|
||||
|
||||
logger.debug(
|
||||
f"Batch highlighting: {len(chunks)} chunks, "
|
||||
f"{len(page_boundaries)} pages"
|
||||
)
|
||||
|
||||
# Group chunks by their target page for efficient rendering
|
||||
# We'll render each page only once with all its highlights
|
||||
chunks_by_page: dict[int, list[tuple[int, dict, str]]] = defaultdict(list)
|
||||
|
||||
for chunk_tuple in chunks:
|
||||
# Unpack chunk tuple - chunk_text is now passed directly
|
||||
chunk_index, start_offset, end_offset, stored_page_num, chunk_text = (
|
||||
chunk_tuple
|
||||
)
|
||||
|
||||
# Find which page contains this chunk
|
||||
chunk_page_info = PDFHighlighter.find_chunk_page(
|
||||
start_offset, end_offset, page_boundaries
|
||||
)
|
||||
|
||||
if not chunk_page_info:
|
||||
logger.warning(f"Chunk {chunk_index}: not found on any page")
|
||||
continue
|
||||
|
||||
page_num = chunk_page_info["page_num"]
|
||||
|
||||
# Log if page differs from stored metadata
|
||||
if stored_page_num and stored_page_num != page_num:
|
||||
logger.debug(
|
||||
f"Chunk {chunk_index}: found on page {page_num}, "
|
||||
f"metadata says {stored_page_num}"
|
||||
)
|
||||
|
||||
# Extract page-relative portion of chunk text
|
||||
# This is critical for cross-page chunks where the start
|
||||
# of the chunk might be on a different page
|
||||
page_boundary = page_boundaries[page_num - 1]
|
||||
page_start = page_boundary["start_offset"]
|
||||
page_end = page_boundary["end_offset"]
|
||||
page_text_length = page_end - page_start
|
||||
|
||||
# Calculate what portion of the chunk appears on this page
|
||||
chunk_start_on_page = max(start_offset, page_start)
|
||||
chunk_end_on_page = min(end_offset, page_end)
|
||||
|
||||
# Extract just the text that appears on this page
|
||||
page_relative_text = full_text[chunk_start_on_page:chunk_end_on_page]
|
||||
|
||||
chunks_by_page[page_num].append(
|
||||
(chunk_index, chunk_page_info, page_relative_text, page_text_length)
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Chunks distributed across {len(chunks_by_page)} unique pages"
|
||||
)
|
||||
|
||||
# OPTIMIZATION: Render each page ONCE, then draw highlights using PIL
|
||||
# This avoids expensive page.get_pixmap() calls per chunk
|
||||
from io import BytesIO
|
||||
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# PIL color for bounding box (RGB tuple)
|
||||
rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
|
||||
pil_color = tuple(int(c * 255) for c in rgb)
|
||||
fill_color = (255, 255, 178, 38) # Light yellow with alpha
|
||||
|
||||
for page_num, page_chunks in chunks_by_page.items():
|
||||
page = doc[page_num - 1]
|
||||
|
||||
# Render page ONCE to get base image (most expensive operation)
|
||||
mat = pymupdf.Matrix(zoom, zoom)
|
||||
base_pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
base_png = base_pix.tobytes("png")
|
||||
|
||||
# Convert to PIL Image for fast highlight drawing
|
||||
base_image = Image.open(BytesIO(base_png)).convert("RGBA")
|
||||
page_rect = page.rect
|
||||
|
||||
logger.debug(
|
||||
f"Page {page_num}: rendered once, processing {len(page_chunks)} chunks"
|
||||
)
|
||||
|
||||
for (
|
||||
chunk_index,
|
||||
chunk_page_info,
|
||||
chunk_text,
|
||||
page_text_length,
|
||||
) in page_chunks:
|
||||
try:
|
||||
# Find chunk bounding box using text search
|
||||
bbox = PDFHighlighter._find_chunk_bbox(
|
||||
page,
|
||||
chunk_text,
|
||||
chunk_page_info["page_relative_start"],
|
||||
chunk_page_info["page_relative_end"],
|
||||
page_text_length,
|
||||
)
|
||||
|
||||
if bbox is None:
|
||||
logger.warning(f"Chunk {chunk_index}: could not find bbox")
|
||||
continue
|
||||
|
||||
# Copy base image for this chunk
|
||||
chunk_image = base_image.copy()
|
||||
|
||||
# Scale bbox coordinates to pixmap coordinates
|
||||
scale_x = base_pix.width / page_rect.width
|
||||
scale_y = base_pix.height / page_rect.height
|
||||
pil_bbox = (
|
||||
int(bbox[0] * scale_x),
|
||||
int(bbox[1] * scale_y),
|
||||
int(bbox[2] * scale_x),
|
||||
int(bbox[3] * scale_y),
|
||||
)
|
||||
|
||||
# Create transparent overlay for fill (proper alpha blending)
|
||||
overlay = Image.new("RGBA", chunk_image.size, (0, 0, 0, 0))
|
||||
overlay_draw = ImageDraw.Draw(overlay)
|
||||
overlay_draw.rectangle(pil_bbox, fill=fill_color)
|
||||
|
||||
# Alpha composite the overlay onto the chunk image
|
||||
chunk_image = Image.alpha_composite(chunk_image, overlay)
|
||||
|
||||
# Draw border on top (solid, not transparent)
|
||||
border_draw = ImageDraw.Draw(chunk_image)
|
||||
border_draw.rectangle(pil_bbox, outline=pil_color, width=3)
|
||||
|
||||
# Convert back to PNG bytes
|
||||
output = BytesIO()
|
||||
chunk_image.convert("RGB").save(output, format="PNG")
|
||||
png_bytes = output.getvalue()
|
||||
|
||||
results[chunk_index] = (png_bytes, page_num, 1)
|
||||
|
||||
logger.debug(
|
||||
f"Chunk {chunk_index}: {len(png_bytes):,} bytes, "
|
||||
f"page {page_num}, bbox {pil_bbox}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Chunk {chunk_index}: error - {e}")
|
||||
continue
|
||||
|
||||
doc.close()
|
||||
|
||||
logger.info(
|
||||
f"Batch highlighted {len(results)}/{len(chunks)} chunks successfully"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in batch highlighting: {e}", exc_info=True)
|
||||
return results
|
||||
|
||||
finally:
|
||||
# Clean up temp directory
|
||||
if temp_pdf_path and temp_pdf_path.parent.exists():
|
||||
try:
|
||||
shutil.rmtree(temp_pdf_path.parent)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clean up temp dir: {e}")
|
||||
@@ -9,6 +9,7 @@ from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.embedding import get_embedding_service
|
||||
from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -50,6 +51,9 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
Returns unverified results from Qdrant. Access verification should be
|
||||
performed separately at the final output stage using verify_search_results().
|
||||
|
||||
Deduplicates by (doc_id, doc_type, chunk_start_offset, chunk_end_offset)
|
||||
to show multiple chunks from the same document while avoiding duplicate chunks.
|
||||
|
||||
Args:
|
||||
query: Natural language search query
|
||||
user_id: User ID for filtering
|
||||
@@ -74,16 +78,19 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
# Generate embedding for query
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
# Store for reuse by callers (e.g., viz_routes PCA visualization)
|
||||
self.query_embedding = query_embedding
|
||||
logger.debug(
|
||||
f"Generated embedding for query (dimension={len(query_embedding)})"
|
||||
)
|
||||
|
||||
# Build Qdrant filter
|
||||
filter_conditions = [
|
||||
get_placeholder_filter(), # Always exclude placeholders from user-facing queries
|
||||
FieldCondition(
|
||||
key="user_id",
|
||||
match=MatchValue(value=user_id),
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
# Add doc_type filter if specified
|
||||
@@ -123,20 +130,41 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
top_scores = [p.score for p in search_response.points[:3]]
|
||||
logger.debug(f"Top 3 similarity scores: {top_scores}")
|
||||
|
||||
# Deduplicate by (doc_id, doc_type) - multiple chunks per document
|
||||
seen_docs = set()
|
||||
# Deduplicate by (doc_id, doc_type, chunk_start, chunk_end)
|
||||
# This allows multiple chunks from same doc, but removes duplicate chunks
|
||||
seen_chunks = set()
|
||||
results = []
|
||||
|
||||
for result in search_response.points:
|
||||
doc_id = int(result.payload["doc_id"])
|
||||
if result.payload is None:
|
||||
continue
|
||||
# doc_id can be int (notes) or str (files - file paths)
|
||||
doc_id = result.payload["doc_id"]
|
||||
doc_type = result.payload.get("doc_type", "note")
|
||||
doc_key = (doc_id, doc_type)
|
||||
chunk_start = result.payload.get("chunk_start_offset")
|
||||
chunk_end = result.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, doc_type, chunk_start, chunk_end)
|
||||
|
||||
# Skip if we've already seen this document
|
||||
if doc_key in seen_docs:
|
||||
# Skip if we've already seen this exact chunk
|
||||
if chunk_key in seen_chunks:
|
||||
continue
|
||||
|
||||
seen_docs.add(doc_key)
|
||||
seen_chunks.add(chunk_key)
|
||||
|
||||
# Build metadata dict with common fields
|
||||
metadata = {
|
||||
"chunk_index": result.payload.get("chunk_index"),
|
||||
"total_chunks": result.payload.get("total_chunks"),
|
||||
}
|
||||
|
||||
# Add file-specific metadata for PDF viewer
|
||||
if doc_type == "file" and (path := result.payload.get("file_path")):
|
||||
metadata["path"] = path
|
||||
|
||||
# Add deck_card-specific metadata for frontend URL construction
|
||||
if doc_type == "deck_card":
|
||||
if board_id := result.payload.get("board_id"):
|
||||
metadata["board_id"] = board_id
|
||||
|
||||
# Return unverified results (verification happens at output stage)
|
||||
results.append(
|
||||
@@ -146,12 +174,14 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
title=result.payload.get("title", "Untitled"),
|
||||
excerpt=result.payload.get("excerpt", ""),
|
||||
score=result.score,
|
||||
metadata={
|
||||
"chunk_index": result.payload.get("chunk_index"),
|
||||
"total_chunks": result.payload.get("total_chunks"),
|
||||
},
|
||||
metadata=metadata,
|
||||
chunk_start_offset=result.payload.get("chunk_start_offset"),
|
||||
chunk_end_offset=result.payload.get("chunk_end_offset"),
|
||||
page_number=result.payload.get("page_number"),
|
||||
page_count=result.payload.get("page_count"),
|
||||
chunk_index=result.payload.get("chunk_index", 0),
|
||||
total_chunks=result.payload.get("total_chunks", 1),
|
||||
point_id=str(result.id), # Qdrant point ID for batch retrieval
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from .calendar import configure_calendar_tools
|
||||
from .contacts import configure_contacts_tools
|
||||
from .cookbook import configure_cookbook_tools
|
||||
from .deck import configure_deck_tools
|
||||
from .news import configure_news_tools
|
||||
from .notes import configure_notes_tools
|
||||
from .semantic import configure_semantic_tools
|
||||
from .sharing import configure_sharing_tools
|
||||
@@ -13,6 +14,7 @@ __all__ = [
|
||||
"configure_contacts_tools",
|
||||
"configure_cookbook_tools",
|
||||
"configure_deck_tools",
|
||||
"configure_news_tools",
|
||||
"configure_notes_tools",
|
||||
"configure_semantic_tools",
|
||||
"configure_sharing_tools",
|
||||
|
||||
@@ -3,6 +3,7 @@ import logging
|
||||
from typing import Optional
|
||||
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -19,7 +20,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def configure_calendar_tools(mcp: FastMCP):
|
||||
# Calendar tools
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Calendars",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_list_calendars(ctx: Context) -> ListCalendarsResponse:
|
||||
@@ -30,7 +34,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
calendars = [Calendar(**cal_data) for cal_data in calendars_data]
|
||||
return ListCalendarsResponse(calendars=calendars, total_count=len(calendars))
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Calendar Event",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_create_event(
|
||||
@@ -107,7 +114,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return await client.calendar.create_event(calendar_name, event_data)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Calendar Events",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_list_events(
|
||||
@@ -210,7 +220,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return events
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Calendar Event",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_get_event(
|
||||
@@ -223,7 +236,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
event_data, etag = await client.calendar.get_event(calendar_name, event_uid)
|
||||
return event_data
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Calendar Event",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_update_event(
|
||||
@@ -297,7 +313,12 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
calendar_name, event_uid, event_data, etag
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Calendar Event",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_delete_event(
|
||||
@@ -309,7 +330,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.calendar.delete_event(calendar_name, event_uid)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Meeting",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_create_meeting(
|
||||
@@ -376,7 +400,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return await client.calendar.create_event(calendar_name, event_data)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Upcoming Events",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_get_upcoming_events(
|
||||
@@ -427,7 +454,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
all_events.sort(key=lambda x: x.get("start_datetime", ""))
|
||||
return all_events[:limit]
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Find Availability",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_find_availability(
|
||||
@@ -508,7 +538,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
constraints=constraints,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Bulk Calendar Operations",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_bulk_operations(
|
||||
@@ -758,7 +791,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
"results": results,
|
||||
}
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Manage Calendar",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_manage_calendar(
|
||||
@@ -828,7 +864,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
# ============= Todo/Task Tools =============
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Todo Tasks",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("todo:read", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_list_todos(
|
||||
@@ -874,7 +913,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
todos=todos, calendar_name=calendar_name, total_count=len(todos)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Todo Task",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("todo:write", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_create_todo(
|
||||
@@ -918,7 +960,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return await client.calendar.create_todo(calendar_name, todo_data)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Todo Task",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("todo:write", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_update_todo(
|
||||
@@ -979,7 +1024,12 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return await client.calendar.update_todo(calendar_name, todo_uid, todo_data)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Todo Task",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("todo:write", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_delete_todo(
|
||||
@@ -1000,7 +1050,10 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.calendar.delete_todo(calendar_name, todo_uid)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Search Todo Tasks",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("todo:read", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_search_todos(
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -11,7 +12,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def configure_contacts_tools(mcp: FastMCP):
|
||||
# Contacts tools
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Address Books",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("contacts:read")
|
||||
@instrument_tool
|
||||
async def nc_contacts_list_addressbooks(ctx: Context):
|
||||
@@ -19,7 +23,10 @@ def configure_contacts_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.contacts.list_addressbooks()
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Contacts",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("contacts:read")
|
||||
@instrument_tool
|
||||
async def nc_contacts_list_contacts(ctx: Context, *, addressbook: str):
|
||||
@@ -27,7 +34,10 @@ def configure_contacts_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.contacts.list_contacts(addressbook=addressbook)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Address Book",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("contacts:write")
|
||||
@instrument_tool
|
||||
async def nc_contacts_create_addressbook(
|
||||
@@ -44,7 +54,12 @@ def configure_contacts_tools(mcp: FastMCP):
|
||||
name=name, display_name=display_name
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Address Book",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("contacts:write")
|
||||
@instrument_tool
|
||||
async def nc_contacts_delete_addressbook(ctx: Context, *, name: str):
|
||||
@@ -52,7 +67,10 @@ def configure_contacts_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.contacts.delete_addressbook(name=name)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Contact",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("contacts:write")
|
||||
@instrument_tool
|
||||
async def nc_contacts_create_contact(
|
||||
@@ -70,7 +88,12 @@ def configure_contacts_tools(mcp: FastMCP):
|
||||
addressbook=addressbook, uid=uid, contact_data=contact_data
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Contact",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("contacts:write")
|
||||
@instrument_tool
|
||||
async def nc_contacts_delete_contact(ctx: Context, *, addressbook: str, uid: str):
|
||||
@@ -78,7 +101,10 @@ def configure_contacts_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.contacts.delete_contact(addressbook=addressbook, uid=uid)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Contact",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("contacts:write")
|
||||
@instrument_tool
|
||||
async def nc_contacts_update_contact(
|
||||
|
||||
@@ -3,7 +3,7 @@ import logging
|
||||
from httpx import HTTPStatusError, RequestError
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.shared.exceptions import McpError
|
||||
from mcp.types import ErrorData
|
||||
from mcp.types import ErrorData, ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -71,7 +71,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Import Recipe from URL",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:write")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_import_recipe(url: str, ctx: Context) -> ImportRecipeResponse:
|
||||
@@ -129,7 +132,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Recipes",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:read")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_list_recipes(ctx: Context) -> ListRecipesResponse:
|
||||
@@ -155,7 +161,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Recipe",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:read")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_get_recipe(recipe_id: int, ctx: Context) -> Recipe:
|
||||
@@ -181,7 +190,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Recipe",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:write")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_create_recipe(
|
||||
@@ -261,7 +273,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Recipe",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:write")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_update_recipe(
|
||||
@@ -351,7 +366,12 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Recipe",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("cookbook:write")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_delete_recipe(
|
||||
@@ -387,7 +407,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Search Recipes",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:read")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_search_recipes(
|
||||
@@ -424,7 +447,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Recipe Categories",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:read")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_list_categories(ctx: Context) -> ListCategoriesResponse:
|
||||
@@ -452,7 +478,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Recipes in Category",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:read")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_get_recipes_in_category(
|
||||
@@ -489,7 +518,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Recipe Keywords",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:read")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_list_keywords(ctx: Context) -> ListKeywordsResponse:
|
||||
@@ -515,7 +547,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Recipes with Keywords",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:read")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_get_recipes_with_keywords(
|
||||
@@ -550,7 +585,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Set Cookbook Configuration",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:write")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_set_config(
|
||||
@@ -594,7 +632,10 @@ def configure_cookbook_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Reindex Recipes",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("cookbook:write")
|
||||
@instrument_tool
|
||||
async def nc_cookbook_reindex(ctx: Context) -> ReindexResponse:
|
||||
|
||||
@@ -2,6 +2,7 @@ import logging
|
||||
from typing import Optional
|
||||
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -117,7 +118,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
|
||||
# Read Tools (converted from resources)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Deck Boards",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:read")
|
||||
@instrument_tool
|
||||
async def deck_get_boards(ctx: Context) -> list[DeckBoard]:
|
||||
@@ -126,7 +130,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
boards = await client.deck.get_boards()
|
||||
return boards
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Deck Board",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:read")
|
||||
@instrument_tool
|
||||
async def deck_get_board(ctx: Context, board_id: int) -> DeckBoard:
|
||||
@@ -135,7 +142,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board = await client.deck.get_board(board_id)
|
||||
return board
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Deck Stacks",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:read")
|
||||
@instrument_tool
|
||||
async def deck_get_stacks(ctx: Context, board_id: int) -> list[DeckStack]:
|
||||
@@ -144,7 +154,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
stacks = await client.deck.get_stacks(board_id)
|
||||
return stacks
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Deck Stack",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:read")
|
||||
@instrument_tool
|
||||
async def deck_get_stack(ctx: Context, board_id: int, stack_id: int) -> DeckStack:
|
||||
@@ -153,7 +166,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
stack = await client.deck.get_stack(board_id, stack_id)
|
||||
return stack
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Deck Cards",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:read")
|
||||
@instrument_tool
|
||||
async def deck_get_cards(
|
||||
@@ -166,7 +182,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
return stack.cards
|
||||
return []
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Deck Card",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:read")
|
||||
@instrument_tool
|
||||
async def deck_get_card(
|
||||
@@ -177,7 +196,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
card = await client.deck.get_card(board_id, stack_id, card_id)
|
||||
return card
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Deck Labels",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:read")
|
||||
@instrument_tool
|
||||
async def deck_get_labels(ctx: Context, board_id: int) -> list[DeckLabel]:
|
||||
@@ -186,7 +208,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board = await client.deck.get_board(board_id)
|
||||
return board.labels
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Deck Label",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:read")
|
||||
@instrument_tool
|
||||
async def deck_get_label(ctx: Context, board_id: int, label_id: int) -> DeckLabel:
|
||||
@@ -197,7 +222,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
|
||||
# Create/Update/Delete Tools
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Deck Board",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_create_board(
|
||||
@@ -215,7 +243,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
|
||||
# Stack Tools
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Deck Stack",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_create_stack(
|
||||
@@ -232,7 +263,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
stack = await client.deck.create_stack(board_id, title, order)
|
||||
return CreateStackResponse(id=stack.id, title=stack.title, order=stack.order)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Deck Stack",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_update_stack(
|
||||
@@ -259,7 +293,12 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board_id=board_id,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Deck Stack",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_delete_stack(
|
||||
@@ -281,7 +320,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
)
|
||||
|
||||
# Card Tools
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_create_card(
|
||||
@@ -316,7 +358,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
stackId=card.stackId,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_update_card(
|
||||
@@ -370,7 +415,12 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board_id=board_id,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Deck Card",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_delete_card(
|
||||
@@ -393,7 +443,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board_id=board_id,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Archive Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_archive_card(
|
||||
@@ -416,7 +469,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board_id=board_id,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Unarchive Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_unarchive_card(
|
||||
@@ -439,7 +495,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board_id=board_id,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Reorder/Move Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_reorder_card(
|
||||
@@ -472,7 +531,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
)
|
||||
|
||||
# Label Tools
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Deck Label",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_create_label(
|
||||
@@ -489,7 +551,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
label = await client.deck.create_label(board_id, title, color)
|
||||
return CreateLabelResponse(id=label.id, title=label.title, color=label.color)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Deck Label",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_update_label(
|
||||
@@ -516,7 +581,12 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board_id=board_id,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Deck Label",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_delete_label(
|
||||
@@ -538,7 +608,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
)
|
||||
|
||||
# Card-Label Assignment Tools
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Assign Label to Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_assign_label_to_card(
|
||||
@@ -562,7 +635,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board_id=board_id,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Remove Label from Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_remove_label_from_card(
|
||||
@@ -587,7 +663,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
)
|
||||
|
||||
# Card-User Assignment Tools
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Assign User to Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_assign_user_to_card(
|
||||
@@ -611,7 +690,10 @@ def configure_deck_tools(mcp: FastMCP):
|
||||
board_id=board_id,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Unassign User from Deck Card",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("deck:write")
|
||||
@instrument_tool
|
||||
async def deck_unassign_user_from_card(
|
||||
|
||||
@@ -0,0 +1,384 @@
|
||||
"""MCP tools for Nextcloud News app."""
|
||||
|
||||
import logging
|
||||
|
||||
from httpx import HTTPStatusError, RequestError
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.shared.exceptions import McpError
|
||||
from mcp.types import ErrorData, ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.client.news import NewsItemType
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
from nextcloud_mcp_server.models.news import (
|
||||
FeedHealthResponse,
|
||||
GetItemResponse,
|
||||
GetStatusResponse,
|
||||
ListFeedsResponse,
|
||||
ListFoldersResponse,
|
||||
ListItemsResponse,
|
||||
NewsFeed,
|
||||
NewsFolder,
|
||||
NewsItem,
|
||||
NewsItemSummary,
|
||||
)
|
||||
from nextcloud_mcp_server.observability.metrics import instrument_tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def configure_news_tools(mcp: FastMCP):
|
||||
"""Configure News app MCP tools."""
|
||||
|
||||
@mcp.tool(
|
||||
title="List News Folders",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_list_folders(ctx: Context) -> ListFoldersResponse:
|
||||
"""List all News folders (requires news:read scope)."""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
folders_data = await client.news.get_folders()
|
||||
folders = [NewsFolder(**f) for f in folders_data]
|
||||
return ListFoldersResponse(results=folders, total_count=len(folders))
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(code=-1, message=f"Network error listing folders: {str(e)}")
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to list folders: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="List News Feeds",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_list_feeds(ctx: Context) -> ListFeedsResponse:
|
||||
"""List all News feeds with metadata (requires news:read scope).
|
||||
|
||||
Returns feeds with unread counts, error status, and overall starred count.
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
data = await client.news.get_feeds()
|
||||
feeds = [NewsFeed(**f) for f in data.get("feeds", [])]
|
||||
return ListFeedsResponse(
|
||||
results=feeds,
|
||||
starred_count=data.get("starredCount", 0),
|
||||
newest_item_id=data.get("newestItemId"),
|
||||
total_count=len(feeds),
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(code=-1, message=f"Network error listing feeds: {str(e)}")
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to list feeds: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="List News Items",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_list_items(
|
||||
ctx: Context,
|
||||
feed_id: int | None = None,
|
||||
folder_id: int | None = None,
|
||||
starred_only: bool = False,
|
||||
unread_only: bool = False,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> ListItemsResponse:
|
||||
"""List News items (articles) with optional filtering (requires news:read scope).
|
||||
|
||||
Args:
|
||||
feed_id: Filter by specific feed ID
|
||||
folder_id: Filter by specific folder ID
|
||||
starred_only: Return only starred items
|
||||
unread_only: Return only unread items
|
||||
limit: Maximum number of items to return (default 50, -1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
|
||||
Returns:
|
||||
ListItemsResponse with items, count, and pagination info
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
|
||||
# Determine item type filter
|
||||
type_ = NewsItemType.ALL
|
||||
id_ = 0
|
||||
if starred_only:
|
||||
type_ = NewsItemType.STARRED
|
||||
elif feed_id is not None:
|
||||
type_ = NewsItemType.FEED
|
||||
id_ = feed_id
|
||||
elif folder_id is not None:
|
||||
type_ = NewsItemType.FOLDER
|
||||
id_ = folder_id
|
||||
|
||||
try:
|
||||
items_data = await client.news.get_items(
|
||||
batch_size=limit,
|
||||
offset=offset,
|
||||
type_=type_,
|
||||
id_=id_,
|
||||
get_read=not unread_only,
|
||||
)
|
||||
items = [NewsItemSummary(**i) for i in items_data]
|
||||
|
||||
# Determine pagination info
|
||||
oldest_id = min((i.id for i in items), default=None) if items else None
|
||||
has_more = len(items) == limit and limit > 0
|
||||
|
||||
return ListItemsResponse(
|
||||
results=items,
|
||||
total_count=len(items),
|
||||
has_more=has_more,
|
||||
oldest_id=oldest_id,
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(code=-1, message=f"Network error listing items: {str(e)}")
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to list items: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="Get News Item",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_item(item_id: int, ctx: Context) -> GetItemResponse:
|
||||
"""Get a specific News item by ID with full content (requires news:read scope).
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Returns:
|
||||
GetItemResponse with full item details including HTML body
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
item_data = await client.news.get_item(item_id)
|
||||
item = NewsItem(**item_data)
|
||||
return GetItemResponse(item=item)
|
||||
except ValueError as e:
|
||||
raise McpError(ErrorData(code=-1, message=str(e)))
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1, message=f"Network error getting item {item_id}: {str(e)}"
|
||||
)
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise McpError(ErrorData(code=-1, message=f"Item {item_id} not found"))
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get item {item_id}: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="Get Starred News Items",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_starred_items(
|
||||
ctx: Context, limit: int = 50, offset: int = 0
|
||||
) -> ListItemsResponse:
|
||||
"""Get starred (favorited) News items (requires news:read scope).
|
||||
|
||||
Convenience method for retrieving user's starred articles.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of items to return (default 50, -1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
|
||||
Returns:
|
||||
ListItemsResponse with starred items
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
items_data = await client.news.get_items(
|
||||
batch_size=limit,
|
||||
offset=offset,
|
||||
type_=NewsItemType.STARRED,
|
||||
get_read=True, # Include read starred items
|
||||
)
|
||||
items = [NewsItemSummary(**i) for i in items_data]
|
||||
|
||||
oldest_id = min((i.id for i in items), default=None) if items else None
|
||||
has_more = len(items) == limit and limit > 0
|
||||
|
||||
return ListItemsResponse(
|
||||
results=items,
|
||||
total_count=len(items),
|
||||
has_more=has_more,
|
||||
oldest_id=oldest_id,
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1, message=f"Network error getting starred items: {str(e)}"
|
||||
)
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get starred items: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="Get Unread News Items",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_unread_items(
|
||||
ctx: Context, limit: int = 50, offset: int = 0
|
||||
) -> ListItemsResponse:
|
||||
"""Get unread News items (requires news:read scope).
|
||||
|
||||
Convenience method for retrieving unread articles across all feeds.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of items to return (default 50, -1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
|
||||
Returns:
|
||||
ListItemsResponse with unread items
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
items_data = await client.news.get_items(
|
||||
batch_size=limit,
|
||||
offset=offset,
|
||||
type_=NewsItemType.ALL,
|
||||
get_read=False, # Only unread items
|
||||
)
|
||||
items = [NewsItemSummary(**i) for i in items_data]
|
||||
|
||||
oldest_id = min((i.id for i in items), default=None) if items else None
|
||||
has_more = len(items) == limit and limit > 0
|
||||
|
||||
return ListItemsResponse(
|
||||
results=items,
|
||||
total_count=len(items),
|
||||
has_more=has_more,
|
||||
oldest_id=oldest_id,
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1, message=f"Network error getting unread items: {str(e)}"
|
||||
)
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get unread items: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="Get News Feed Health",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_feed_health(feed_id: int, ctx: Context) -> FeedHealthResponse:
|
||||
"""Get health status for a specific feed (requires news:read scope).
|
||||
|
||||
Returns error count and last error message if the feed has update issues.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID to check
|
||||
|
||||
Returns:
|
||||
FeedHealthResponse with error status
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
data = await client.news.get_feeds()
|
||||
for feed_data in data.get("feeds", []):
|
||||
if feed_data.get("id") == feed_id:
|
||||
feed = NewsFeed(**feed_data)
|
||||
return FeedHealthResponse(
|
||||
feed_id=feed.id,
|
||||
title=feed.title,
|
||||
url=feed.url,
|
||||
has_errors=feed.has_errors,
|
||||
error_count=feed.update_error_count,
|
||||
last_error=feed.last_update_error,
|
||||
)
|
||||
raise McpError(ErrorData(code=-1, message=f"Feed {feed_id} not found"))
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Network error getting feed health: {str(e)}",
|
||||
)
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get feed health: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="Get News App Status",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_status(ctx: Context) -> GetStatusResponse:
|
||||
"""Get News app status and version (requires news:read scope).
|
||||
|
||||
Returns version information and any configuration warnings.
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
status_data = await client.news.get_status()
|
||||
return GetStatusResponse(
|
||||
version=status_data.get("version", "unknown"),
|
||||
warnings=status_data.get("warnings", {}),
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(code=-1, message=f"Network error getting status: {str(e)}")
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get status: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
@@ -3,7 +3,7 @@ import logging
|
||||
from httpx import HTTPStatusError, RequestError
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.shared.exceptions import McpError
|
||||
from mcp.types import ErrorData
|
||||
from mcp.types import ErrorData, ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -85,7 +85,13 @@ def configure_notes_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Multiple calls create multiple notes
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("notes:write")
|
||||
@instrument_tool
|
||||
async def nc_notes_create_note(
|
||||
@@ -132,7 +138,13 @@ def configure_notes_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Requires etag which changes = not idempotent
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("notes:write")
|
||||
@instrument_tool
|
||||
async def nc_notes_update_note(
|
||||
@@ -198,7 +210,13 @@ def configure_notes_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Append to Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Each call adds content = not idempotent
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("notes:write")
|
||||
@instrument_tool
|
||||
async def nc_notes_append_content(
|
||||
@@ -249,7 +267,13 @@ def configure_notes_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Search Notes",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Search doesn't modify data
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("notes:read")
|
||||
@instrument_tool
|
||||
async def nc_notes_search_notes(query: str, ctx: Context) -> SearchNotesResponse:
|
||||
@@ -296,7 +320,13 @@ def configure_notes_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Note",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Read operation only
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("notes:read")
|
||||
@instrument_tool
|
||||
async def nc_notes_get_note(note_id: int, ctx: Context) -> Note:
|
||||
@@ -326,7 +356,13 @@ def configure_notes_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Note Attachment",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Read operation only
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("notes:read")
|
||||
@instrument_tool
|
||||
async def nc_notes_get_attachment(
|
||||
@@ -373,7 +409,14 @@ def configure_notes_tools(mcp: FastMCP):
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Note",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, # Permanently deletes data
|
||||
idempotentHint=True, # Deleting deleted note = same end state
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("notes:write")
|
||||
@instrument_tool
|
||||
async def nc_notes_delete_note(note_id: int, ctx: Context) -> DeleteNoteResponse:
|
||||
|
||||
@@ -15,6 +15,7 @@ import httpx
|
||||
from mcp.server.auth.middleware.auth_context import get_access_token
|
||||
from mcp.server.auth.provider import AccessToken
|
||||
from mcp.server.fastmcp import Context
|
||||
from mcp.types import ToolAnnotations
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
@@ -417,11 +418,12 @@ async def revoke_nextcloud_access(
|
||||
storage = RefreshTokenStorage.from_env()
|
||||
await storage.initialize()
|
||||
|
||||
encryption_key = os.getenv("TOKEN_ENCRYPTION_KEY")
|
||||
if not encryption_key:
|
||||
# Get OAuth client credentials from storage
|
||||
client_creds = await storage.get_oauth_client()
|
||||
if not client_creds:
|
||||
return RevocationResult(
|
||||
success=False,
|
||||
message="Token encryption key not configured.",
|
||||
message="OAuth client credentials not found in storage.",
|
||||
)
|
||||
|
||||
broker = TokenBrokerService(
|
||||
@@ -431,7 +433,8 @@ async def revoke_nextcloud_access(
|
||||
f"{os.getenv('NEXTCLOUD_HOST')}/.well-known/openid-configuration",
|
||||
),
|
||||
nextcloud_host=os.getenv("NEXTCLOUD_HOST"), # type: ignore
|
||||
encryption_key=encryption_key,
|
||||
client_id=client_creds["client_id"],
|
||||
client_secret=client_creds["client_secret"],
|
||||
)
|
||||
|
||||
# Revoke access
|
||||
@@ -684,11 +687,16 @@ def register_oauth_tools(mcp):
|
||||
|
||||
@mcp.tool(
|
||||
name="provision_nextcloud_access",
|
||||
title="Grant Server Access to Nextcloud",
|
||||
description=(
|
||||
"Provision offline access to Nextcloud resources. "
|
||||
"This is required before using Nextcloud tools. "
|
||||
"You'll need to complete an OAuth authorization in your browser."
|
||||
),
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Creates new OAuth session each time
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("openid")
|
||||
async def tool_provision_access(
|
||||
@@ -699,7 +707,13 @@ def register_oauth_tools(mcp):
|
||||
|
||||
@mcp.tool(
|
||||
name="revoke_nextcloud_access",
|
||||
title="Revoke Server Access to Nextcloud",
|
||||
description="Revoke offline access to Nextcloud resources.",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, # Removes stored access tokens
|
||||
idempotentHint=True, # Revoking revoked access = same end state
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("openid")
|
||||
async def tool_revoke_access(
|
||||
@@ -709,7 +723,12 @@ def register_oauth_tools(mcp):
|
||||
|
||||
@mcp.tool(
|
||||
name="check_provisioning_status",
|
||||
title="Check Provisioning Status",
|
||||
description="Check whether Nextcloud access is provisioned.",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Only checks status, doesn't modify
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("openid")
|
||||
async def tool_check_status(
|
||||
@@ -719,10 +738,15 @@ def register_oauth_tools(mcp):
|
||||
|
||||
@mcp.tool(
|
||||
name="check_logged_in",
|
||||
title="Check Server Login Status",
|
||||
description=(
|
||||
"Check if you are logged in to Nextcloud. "
|
||||
"If not logged in, this tool will prompt you to complete the login flow."
|
||||
),
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Checking status doesn't modify state
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("openid")
|
||||
async def tool_check_logged_in(ctx: Context, user_id: Optional[str] = None) -> str:
|
||||
|
||||
@@ -12,6 +12,7 @@ from mcp.types import (
|
||||
ModelPreferences,
|
||||
SamplingMessage,
|
||||
TextContent,
|
||||
ToolAnnotations,
|
||||
)
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
@@ -26,6 +27,7 @@ from nextcloud_mcp_server.observability.metrics import (
|
||||
instrument_tool,
|
||||
)
|
||||
from nextcloud_mcp_server.search.bm25_hybrid import BM25HybridSearchAlgorithm
|
||||
from nextcloud_mcp_server.search.context import get_chunk_with_context
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -33,7 +35,13 @@ logger = logging.getLogger(__name__)
|
||||
def configure_semantic_tools(mcp: FastMCP):
|
||||
"""Configure semantic search tools for MCP server."""
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Semantic Search",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Search doesn't modify data
|
||||
openWorldHint=True, # Queries external Nextcloud service
|
||||
),
|
||||
)
|
||||
@require_scopes("semantic:read")
|
||||
@instrument_tool
|
||||
async def nc_semantic_search(
|
||||
@@ -43,6 +51,8 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
doc_types: list[str] | None = None,
|
||||
score_threshold: float = 0.0,
|
||||
fusion: str = "rrf",
|
||||
include_context: bool = False,
|
||||
context_chars: int = 300,
|
||||
) -> SemanticSearchResponse:
|
||||
"""
|
||||
Search Nextcloud content using BM25 hybrid search with cross-app support.
|
||||
@@ -55,17 +65,19 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
database for optimal relevance. This provides the best of both semantic
|
||||
understanding and keyword precision.
|
||||
|
||||
Requires VECTOR_SYNC_ENABLED=true. Currently only "note" documents are
|
||||
fully supported for indexing.
|
||||
Requires VECTOR_SYNC_ENABLED=true. Supports indexing of notes, files,
|
||||
news items, and deck cards.
|
||||
|
||||
Args:
|
||||
query: Natural language or keyword search query
|
||||
limit: Maximum number of results to return (default: 10)
|
||||
doc_types: Document types to search (e.g., ["note", "file"]). None = search all indexed types (default)
|
||||
doc_types: Document types to search (e.g., ["note", "file", "deck_card", "news_item"]). None = search all indexed types (default)
|
||||
score_threshold: Minimum fusion score (0-1, default: 0.0)
|
||||
fusion: Fusion algorithm: "rrf" (Reciprocal Rank Fusion, default) or "dbsf" (Distribution-Based Score Fusion)
|
||||
RRF: Good general-purpose fusion using reciprocal ranks
|
||||
DBSF: Uses distribution-based normalization, may better balance different score ranges
|
||||
include_context: Whether to expand results with surrounding context (default: False)
|
||||
context_chars: Number of characters to include before/after matched chunk (default: 300)
|
||||
|
||||
Returns:
|
||||
SemanticSearchResponse with matching documents ranked by fusion scores
|
||||
@@ -128,18 +140,16 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
# Sort combined results by score
|
||||
all_results.sort(key=lambda r: r.score, reverse=True)
|
||||
|
||||
# Deduplicate results (hybrid search may return same doc from dense + sparse)
|
||||
# Qdrant already filters by user_id for multi-tenant isolation
|
||||
# Sampling tool will verify access when fetching full content
|
||||
seen = set()
|
||||
unique_results = []
|
||||
for result in all_results:
|
||||
key = (result.id, result.doc_type)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_results.append(result)
|
||||
|
||||
search_results = unique_results[:limit] # Final limit after deduplication
|
||||
# Note: BM25HybridSearchAlgorithm already deduplicates at chunk level
|
||||
# (doc_id, doc_type, chunk_start, chunk_end), which allows multiple
|
||||
# chunks from the same document while preventing duplicate chunks.
|
||||
# No additional deduplication needed here - multiple chunks per document
|
||||
# are valuable for RAG contexts.
|
||||
# Qdrant already filters by user_id for multi-tenant isolation.
|
||||
# Sampling tool will verify access when fetching full content.
|
||||
search_results = all_results[
|
||||
:limit
|
||||
] # Final limit after chunk-level dedup in algorithm
|
||||
|
||||
# Convert SearchResult objects to SemanticSearchResult for response
|
||||
results = []
|
||||
@@ -160,9 +170,99 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
else 1,
|
||||
chunk_start_offset=r.chunk_start_offset,
|
||||
chunk_end_offset=r.chunk_end_offset,
|
||||
page_number=r.page_number,
|
||||
)
|
||||
)
|
||||
|
||||
# Expand results with surrounding context if requested
|
||||
if include_context and results:
|
||||
logger.info(
|
||||
f"Expanding {len(results)} results with context "
|
||||
f"(context_chars={context_chars})"
|
||||
)
|
||||
|
||||
# Fetch context for all results in parallel
|
||||
# Limit concurrent requests to prevent connection pool exhaustion
|
||||
max_concurrent = 20
|
||||
semaphore = anyio.Semaphore(max_concurrent)
|
||||
expanded_results = [None] * len(results)
|
||||
|
||||
async def fetch_context(index: int, result: SemanticSearchResult):
|
||||
"""Fetch context for a single result (parallel with semaphore)."""
|
||||
async with semaphore:
|
||||
# Only expand if we have valid chunk offsets
|
||||
if (
|
||||
result.chunk_start_offset is None
|
||||
or result.chunk_end_offset is None
|
||||
):
|
||||
# Keep result as-is without context expansion
|
||||
expanded_results[index] = result
|
||||
return
|
||||
|
||||
try:
|
||||
chunk_context = await get_chunk_with_context(
|
||||
nc_client=client,
|
||||
user_id=username,
|
||||
doc_id=result.id,
|
||||
doc_type=result.doc_type,
|
||||
chunk_start=result.chunk_start_offset,
|
||||
chunk_end=result.chunk_end_offset,
|
||||
page_number=result.page_number,
|
||||
chunk_index=result.chunk_index,
|
||||
total_chunks=result.total_chunks,
|
||||
context_chars=context_chars,
|
||||
)
|
||||
|
||||
if chunk_context:
|
||||
# Create new result with context fields populated
|
||||
expanded_results[index] = SemanticSearchResult(
|
||||
id=result.id,
|
||||
doc_type=result.doc_type,
|
||||
title=result.title,
|
||||
category=result.category,
|
||||
excerpt=result.excerpt,
|
||||
score=result.score,
|
||||
chunk_index=result.chunk_index,
|
||||
total_chunks=result.total_chunks,
|
||||
chunk_start_offset=result.chunk_start_offset,
|
||||
chunk_end_offset=result.chunk_end_offset,
|
||||
page_number=result.page_number,
|
||||
# Context expansion fields
|
||||
has_context_expansion=True,
|
||||
marked_text=chunk_context.marked_text,
|
||||
before_context=chunk_context.before_context,
|
||||
after_context=chunk_context.after_context,
|
||||
has_before_truncation=chunk_context.has_before_truncation,
|
||||
has_after_truncation=chunk_context.has_after_truncation,
|
||||
)
|
||||
logger.debug(
|
||||
f"Expanded context for {result.doc_type} {result.id}"
|
||||
)
|
||||
else:
|
||||
# Context expansion failed, keep original result
|
||||
expanded_results[index] = result
|
||||
logger.debug(
|
||||
f"Failed to expand context for {result.doc_type} {result.id}, "
|
||||
"keeping original result"
|
||||
)
|
||||
except Exception as e:
|
||||
# Context expansion failed, keep original result
|
||||
expanded_results[index] = result
|
||||
logger.warning(
|
||||
f"Error expanding context for {result.doc_type} {result.id}: {e}"
|
||||
)
|
||||
|
||||
# Run all context fetches in parallel using anyio task group
|
||||
async with anyio.create_task_group() as tg:
|
||||
for idx, result in enumerate(results):
|
||||
tg.start_soon(fetch_context, idx, result)
|
||||
|
||||
# Replace results with expanded versions
|
||||
results = [r for r in expanded_results if r is not None]
|
||||
logger.info(
|
||||
f"Context expansion completed: {len(results)} results with context"
|
||||
)
|
||||
|
||||
logger.info(f"Returning {len(results)} results from BM25 hybrid search")
|
||||
|
||||
return SemanticSearchResponse(
|
||||
@@ -192,7 +292,13 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
logger.error(f"Search error: {e}", exc_info=True)
|
||||
raise McpError(ErrorData(code=-1, message=f"Search failed: {str(e)}"))
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Search with AI-Generated Answer",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Search doesn't modify data
|
||||
openWorldHint=False, # Searches only indexed Nextcloud data
|
||||
),
|
||||
)
|
||||
@require_scopes("semantic:read")
|
||||
@instrument_tool
|
||||
async def nc_semantic_search_answer(
|
||||
@@ -202,6 +308,8 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
score_threshold: float = 0.7,
|
||||
max_answer_tokens: int = 500,
|
||||
fusion: str = "rrf",
|
||||
include_context: bool = False,
|
||||
context_chars: int = 300,
|
||||
) -> SamplingSearchResponse:
|
||||
"""
|
||||
Semantic search with LLM-generated answer using MCP sampling.
|
||||
@@ -227,6 +335,8 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
score_threshold: Minimum similarity score 0-1 (default: 0.7)
|
||||
max_answer_tokens: Maximum tokens for generated answer (default: 500)
|
||||
fusion: Fusion algorithm: "rrf" (Reciprocal Rank Fusion, default) or "dbsf" (Distribution-Based Score Fusion)
|
||||
include_context: Whether to expand results with surrounding context (default: False)
|
||||
context_chars: Number of characters to include before/after matched chunk (default: 300)
|
||||
|
||||
Returns:
|
||||
SamplingSearchResponse containing:
|
||||
@@ -238,27 +348,6 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
Note: Requires MCP client to support sampling. If sampling is unavailable,
|
||||
the tool gracefully degrades to returning documents with an explanation.
|
||||
The client may prompt the user to approve the sampling request.
|
||||
|
||||
Examples:
|
||||
>>> # Query about objectives across multiple apps
|
||||
>>> result = await nc_semantic_search_answer(
|
||||
... query="What are my Q1 2025 project goals?",
|
||||
... ctx=ctx
|
||||
... )
|
||||
>>> print(result.generated_answer)
|
||||
"Based on Document 1 (note: Project Kickoff), Document 2 (calendar event:
|
||||
Q1 Planning Meeting), and Document 3 (deck card: Implement semantic search),
|
||||
your main goals are: 1) Improve semantic search accuracy by 20%,
|
||||
2) Deploy new embedding model, 3) Reduce indexing latency..."
|
||||
|
||||
>>> # Query about appointments
|
||||
>>> result = await nc_semantic_search_answer(
|
||||
... query="When is my next dentist appointment?",
|
||||
... ctx=ctx,
|
||||
... limit=10
|
||||
... )
|
||||
>>> len(result.sources) # Calendar events and related notes
|
||||
3
|
||||
"""
|
||||
# 1. Retrieve relevant documents via existing semantic search
|
||||
search_response = await nc_semantic_search(
|
||||
@@ -267,6 +356,8 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
limit=limit,
|
||||
score_threshold=score_threshold,
|
||||
fusion=fusion,
|
||||
include_context=include_context,
|
||||
context_chars=context_chars,
|
||||
)
|
||||
|
||||
# 2. Handle no results case - don't waste a sampling call
|
||||
@@ -421,9 +512,11 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
)
|
||||
|
||||
# 6. Request LLM completion via MCP sampling with timeout
|
||||
# Note: 5 minute timeout to accommodate slower local LLMs (e.g., Ollama)
|
||||
sampling_timeout_seconds = 300
|
||||
|
||||
try:
|
||||
with anyio.fail_after(30):
|
||||
with anyio.fail_after(sampling_timeout_seconds):
|
||||
sampling_result = await ctx.session.create_message(
|
||||
messages=[
|
||||
SamplingMessage(
|
||||
@@ -470,14 +563,14 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
f"Sampling request timed out after 30 seconds for query: '{query}', "
|
||||
f"Sampling request timed out after {sampling_timeout_seconds} seconds for query: '{query}', "
|
||||
f"returning search results only"
|
||||
)
|
||||
return SamplingSearchResponse(
|
||||
query=query,
|
||||
generated_answer=(
|
||||
f"[Sampling request timed out]\n\n"
|
||||
f"The answer generation took too long (>30s). "
|
||||
f"The answer generation took too long (>{sampling_timeout_seconds}s). "
|
||||
f"Found {len(accessible_results)} relevant documents. "
|
||||
f"Please review the sources below or try a simpler query."
|
||||
),
|
||||
@@ -543,7 +636,13 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
success=True,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Check Indexing Status",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Only checks status
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("semantic:read")
|
||||
@instrument_tool
|
||||
async def nc_get_vector_sync_status(ctx: Context) -> VectorSyncStatusResponse:
|
||||
@@ -597,15 +696,22 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
# Get Qdrant client and query indexed count
|
||||
indexed_count = 0
|
||||
try:
|
||||
from qdrant_client.models import Filter
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.placeholder import (
|
||||
get_placeholder_filter,
|
||||
)
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
settings = get_settings()
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
# Count documents in collection
|
||||
# Count documents in collection, excluding placeholders
|
||||
# Placeholders are zero-vector points used to track processing state
|
||||
count_result = await qdrant_client.count(
|
||||
collection_name=settings.get_collection_name()
|
||||
collection_name=settings.get_collection_name(),
|
||||
count_filter=Filter(must=[get_placeholder_filter()]),
|
||||
)
|
||||
indexed_count = count_result.count
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import json
|
||||
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -16,7 +17,10 @@ def configure_sharing_tools(mcp: FastMCP):
|
||||
mcp: FastMCP server instance
|
||||
"""
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Share",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("sharing:write")
|
||||
@instrument_tool
|
||||
async def nc_share_create(
|
||||
@@ -56,7 +60,12 @@ def configure_sharing_tools(mcp: FastMCP):
|
||||
)
|
||||
return json.dumps(share_data, indent=2)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Share",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("sharing:write")
|
||||
@instrument_tool
|
||||
async def nc_share_delete(share_id: int, ctx: Context) -> str:
|
||||
@@ -76,7 +85,10 @@ def configure_sharing_tools(mcp: FastMCP):
|
||||
{"success": True, "message": f"Share {share_id} deleted"}, indent=2
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Share Details",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("sharing:write")
|
||||
@instrument_tool
|
||||
async def nc_share_get(share_id: int, ctx: Context) -> str:
|
||||
@@ -95,7 +107,10 @@ def configure_sharing_tools(mcp: FastMCP):
|
||||
share_data = await client.sharing.get_share(share_id)
|
||||
return json.dumps(share_data, indent=2)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Shares",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("sharing:write")
|
||||
@instrument_tool
|
||||
async def nc_share_list(
|
||||
@@ -117,7 +132,10 @@ def configure_sharing_tools(mcp: FastMCP):
|
||||
)
|
||||
return json.dumps(shares, indent=2)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Share",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("sharing:write")
|
||||
@instrument_tool
|
||||
async def nc_share_update(share_id: int, permissions: int, ctx: Context) -> str:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -11,7 +12,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def configure_tables_tools(mcp: FastMCP):
|
||||
# Tables tools
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Tables",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("tables:read")
|
||||
@instrument_tool
|
||||
async def nc_tables_list_tables(ctx: Context):
|
||||
@@ -19,7 +23,10 @@ def configure_tables_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.tables.list_tables()
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Get Table Schema",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("tables:read")
|
||||
@instrument_tool
|
||||
async def nc_tables_get_schema(table_id: int, ctx: Context):
|
||||
@@ -27,7 +34,10 @@ def configure_tables_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.tables.get_table_schema(table_id)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Read Table Rows",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("tables:read")
|
||||
@instrument_tool
|
||||
async def nc_tables_read_table(
|
||||
@@ -40,7 +50,10 @@ def configure_tables_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.tables.get_table_rows(table_id, limit, offset)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Insert Table Row",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("tables:write")
|
||||
@instrument_tool
|
||||
async def nc_tables_insert_row(table_id: int, data: dict, ctx: Context):
|
||||
@@ -51,7 +64,10 @@ def configure_tables_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.tables.create_row(table_id, data)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Update Table Row",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@require_scopes("tables:write")
|
||||
@instrument_tool
|
||||
async def nc_tables_update_row(row_id: int, data: dict, ctx: Context):
|
||||
@@ -62,7 +78,12 @@ def configure_tables_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.tables.update_row(row_id, data)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete Table Row",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@require_scopes("tables:write")
|
||||
@instrument_tool
|
||||
async def nc_tables_delete_row(row_id: int, ctx: Context):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -16,7 +17,13 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def configure_webdav_tools(mcp: FastMCP):
|
||||
# WebDAV file system tools
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Files and Directories",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:read")
|
||||
@instrument_tool
|
||||
async def nc_webdav_list_directory(
|
||||
@@ -50,7 +57,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
total_size=total_size,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Read File",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:read")
|
||||
@instrument_tool
|
||||
async def nc_webdav_read_file(path: str, ctx: Context):
|
||||
@@ -64,20 +77,6 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
- Text files are decoded to UTF-8
|
||||
- Documents (PDF, DOCX, etc.) are parsed and text is extracted
|
||||
- Other binary files are base64 encoded
|
||||
|
||||
Examples:
|
||||
# Read a text file
|
||||
result = await nc_webdav_read_file("Documents/readme.txt")
|
||||
logger.info(result['content']) # Decoded text content
|
||||
|
||||
# Read a PDF document (automatically parsed)
|
||||
result = await nc_webdav_read_file("Documents/report.pdf")
|
||||
logger.info(result['content']) # Extracted text from PDF
|
||||
logger.info(result['parsing_metadata']) # Document parsing info
|
||||
|
||||
# Read a binary file
|
||||
result = await nc_webdav_read_file("Images/photo.jpg")
|
||||
logger.info(result['encoding']) # 'base64'
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
content, content_type = await client.webdav.read_file(path)
|
||||
@@ -131,7 +130,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
"encoding": "base64",
|
||||
}
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Write File",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=True, # HTTP PUT without version control is idempotent
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:write")
|
||||
@instrument_tool
|
||||
async def nc_webdav_write_file(
|
||||
@@ -160,7 +165,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
|
||||
return await client.webdav.write_file(path, content_bytes, content_type)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Create Directory",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=True, # Creating existing dir returns 405 = same end state
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:write")
|
||||
@instrument_tool
|
||||
async def nc_webdav_create_directory(path: str, ctx: Context):
|
||||
@@ -175,7 +186,14 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.webdav.create_directory(path)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Delete File or Directory",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, # Permanently deletes data
|
||||
idempotentHint=True, # Deleting deleted resource = same end state
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:write")
|
||||
@instrument_tool
|
||||
async def nc_webdav_delete_resource(path: str, ctx: Context):
|
||||
@@ -190,7 +208,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.webdav.delete_resource(path)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Move or Rename File",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Moving changes source and dest
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:write")
|
||||
@instrument_tool
|
||||
async def nc_webdav_move_resource(
|
||||
@@ -211,7 +235,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
source_path, destination_path, overwrite
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Copy File or Directory",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Creates new resource each time
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:write")
|
||||
@instrument_tool
|
||||
async def nc_webdav_copy_resource(
|
||||
@@ -232,7 +262,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
source_path, destination_path, overwrite
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Search Files",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:read")
|
||||
@instrument_tool
|
||||
async def nc_webdav_search_files(
|
||||
@@ -349,7 +385,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
filters_applied=filters if filters else None,
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Find Files by Name",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:read")
|
||||
@instrument_tool
|
||||
async def nc_webdav_find_by_name(
|
||||
@@ -377,7 +419,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
filters_applied={"name_pattern": pattern},
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="Find Files by Type",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:read")
|
||||
@instrument_tool
|
||||
async def nc_webdav_find_by_type(
|
||||
@@ -405,7 +453,13 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
filters_applied={"mime_type": mime_type},
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@mcp.tool(
|
||||
title="List Favorite Files",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
@require_scopes("files:read")
|
||||
@instrument_tool
|
||||
async def nc_webdav_list_favorites(
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
"""Smithery-specific entrypoint for stateless deployment.
|
||||
|
||||
ADR-016: This entrypoint is used when deploying on Smithery's hosting platform.
|
||||
It configures the server for stateless operation with per-session authentication.
|
||||
|
||||
Features disabled in Smithery mode:
|
||||
- Vector sync / semantic search (no persistent storage)
|
||||
- Admin UI at /app (no webhooks, no vector viz)
|
||||
- OAuth provisioning tools (no token storage)
|
||||
|
||||
Features enabled:
|
||||
- Core Nextcloud tools (notes, calendar, contacts, files, deck, tables, cookbook)
|
||||
- Per-session app password authentication via Smithery configSchema
|
||||
- Health check endpoints (/health/live, /health/ready)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import uvicorn
|
||||
|
||||
from nextcloud_mcp_server.config import setup_logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main():
|
||||
"""Start the MCP server in Smithery stateless mode."""
|
||||
# Setup logging first
|
||||
setup_logging()
|
||||
|
||||
# Force stateless mode environment variables
|
||||
os.environ["SMITHERY_DEPLOYMENT"] = "true"
|
||||
os.environ["VECTOR_SYNC_ENABLED"] = "false"
|
||||
|
||||
logger.info("Starting Nextcloud MCP Server in Smithery stateless mode")
|
||||
|
||||
# Import app after setting environment variables
|
||||
from nextcloud_mcp_server.app import get_app
|
||||
|
||||
# Create the app with streamable-http transport (required for Smithery)
|
||||
app = get_app(transport="streamable-http")
|
||||
|
||||
# Smithery sets PORT environment variable
|
||||
port = int(os.environ.get("PORT", 8081))
|
||||
|
||||
logger.info(f"Listening on port {port}")
|
||||
|
||||
uvicorn.run(
|
||||
app,
|
||||
host="0.0.0.0",
|
||||
port=port,
|
||||
log_level="info",
|
||||
# Disable access log for cleaner output
|
||||
access_log=False,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -15,6 +15,8 @@ class ChunkWithPosition:
|
||||
text: str
|
||||
start_offset: int # Character position where chunk starts
|
||||
end_offset: int # Character position where chunk ends (exclusive)
|
||||
page_number: int | None = None # Page number for PDF chunks (optional)
|
||||
metadata: dict | None = None # Additional processor-specific metadata (optional)
|
||||
|
||||
|
||||
class DocumentChunker:
|
||||
@@ -50,7 +52,7 @@ class DocumentChunker:
|
||||
strip_whitespace=True,
|
||||
)
|
||||
|
||||
def chunk_text(self, content: str) -> list[ChunkWithPosition]:
|
||||
async def chunk_text(self, content: str) -> list[ChunkWithPosition]:
|
||||
"""
|
||||
Split text into overlapping chunks with position tracking.
|
||||
|
||||
@@ -66,12 +68,17 @@ class DocumentChunker:
|
||||
Returns:
|
||||
List of chunks with their character positions in the original content
|
||||
"""
|
||||
import anyio
|
||||
|
||||
# Handle empty content - return single empty chunk for backward compatibility
|
||||
if not content:
|
||||
return [ChunkWithPosition(text="", start_offset=0, end_offset=0)]
|
||||
|
||||
# Use LangChain to create documents with position tracking
|
||||
docs = self.splitter.create_documents([content])
|
||||
# Run CPU-bound text splitting in thread pool to avoid blocking event loop
|
||||
docs = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
self.splitter.create_documents,
|
||||
[content],
|
||||
)
|
||||
|
||||
# Convert LangChain Documents to ChunkWithPosition objects
|
||||
chunks = [
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
"""HTML to Markdown conversion utilities for vector sync."""
|
||||
|
||||
import logging
|
||||
|
||||
from markdownify import markdownify as md
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def html_to_markdown(html_content: str | None) -> str:
|
||||
"""Convert HTML content to Markdown, preserving semantic structure.
|
||||
|
||||
This function converts HTML (typically from RSS/Atom feed items) to Markdown
|
||||
for better text embedding. Markdown preserves:
|
||||
- Heading hierarchy (important for document structure)
|
||||
- Lists (bullet and numbered)
|
||||
- Links (as [text](url))
|
||||
- Bold/italic emphasis
|
||||
- Paragraphs and line breaks
|
||||
|
||||
Args:
|
||||
html_content: HTML string to convert (may be None or empty)
|
||||
|
||||
Returns:
|
||||
Markdown string, or empty string if input is None/empty
|
||||
|
||||
Example:
|
||||
>>> html_to_markdown("<h1>Title</h1><p>Content with <b>bold</b>.</p>")
|
||||
'# Title\\n\\nContent with **bold**.\\n\\n'
|
||||
"""
|
||||
if not html_content:
|
||||
return ""
|
||||
|
||||
try:
|
||||
markdown = md(
|
||||
html_content,
|
||||
heading_style="ATX", # Use # style headings
|
||||
strip=["script", "style", "iframe", "noscript"], # Remove unsafe elements
|
||||
bullets="-", # Use - for unordered lists
|
||||
code_language="", # Don't add language hints to code blocks
|
||||
)
|
||||
return markdown.strip()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to convert HTML to Markdown: {e}")
|
||||
# Fallback: strip all HTML tags as a last resort
|
||||
import re
|
||||
|
||||
text = re.sub(r"<[^>]+>", " ", html_content)
|
||||
return " ".join(text.split()) # Normalize whitespace
|
||||
@@ -0,0 +1,352 @@
|
||||
"""OAuth mode vector sync orchestration.
|
||||
|
||||
Manages multi-user background vector sync when running in OAuth mode
|
||||
with ENABLE_OFFLINE_ACCESS=true:
|
||||
- User Manager: Monitors RefreshTokenStorage for user changes
|
||||
- Per-User Scanners: One scanner task per provisioned user
|
||||
- Shared Processor Pool: Processes documents from all users
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import anyio
|
||||
from anyio.abc import TaskGroup, TaskStatus
|
||||
from anyio.streams.memory import (
|
||||
MemoryObjectReceiveStream,
|
||||
MemoryObjectSendStream,
|
||||
)
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.scanner import DocumentTask, scan_user_documents
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
|
||||
from nextcloud_mcp_server.auth.token_broker import TokenBrokerService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Scopes required for vector sync operations
|
||||
VECTOR_SYNC_SCOPES = [
|
||||
"notes:read",
|
||||
"files:read",
|
||||
"deck:read",
|
||||
# "news:read", # News app may not be installed
|
||||
]
|
||||
|
||||
|
||||
class NotProvisionedError(Exception):
|
||||
"""User has not provisioned offline access or has revoked it."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class UserSyncState:
|
||||
"""State for a single user's scanner task."""
|
||||
|
||||
user_id: str
|
||||
cancel_scope: anyio.CancelScope
|
||||
started_at: float = field(default_factory=time.time)
|
||||
|
||||
|
||||
async def get_user_client(
|
||||
user_id: str,
|
||||
token_broker: "TokenBrokerService",
|
||||
nextcloud_host: str,
|
||||
) -> NextcloudClient:
|
||||
"""Get an authenticated NextcloudClient for a user.
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
token_broker: Token broker for obtaining access tokens
|
||||
nextcloud_host: Nextcloud base URL
|
||||
|
||||
Returns:
|
||||
Authenticated NextcloudClient
|
||||
|
||||
Raises:
|
||||
NotProvisionedError: If user has not provisioned offline access
|
||||
"""
|
||||
token = await token_broker.get_background_token(user_id, VECTOR_SYNC_SCOPES)
|
||||
if not token:
|
||||
raise NotProvisionedError(f"User {user_id} has not provisioned offline access")
|
||||
|
||||
return NextcloudClient.from_token(
|
||||
base_url=nextcloud_host,
|
||||
token=token,
|
||||
username=user_id,
|
||||
)
|
||||
|
||||
|
||||
async def user_scanner_task(
|
||||
user_id: str,
|
||||
send_stream: MemoryObjectSendStream[DocumentTask],
|
||||
shutdown_event: anyio.Event,
|
||||
wake_event: anyio.Event,
|
||||
token_broker: "TokenBrokerService",
|
||||
nextcloud_host: str,
|
||||
*,
|
||||
task_status: TaskStatus = anyio.TASK_STATUS_IGNORED,
|
||||
) -> None:
|
||||
"""Scanner task for a single user in OAuth mode.
|
||||
|
||||
Gets a fresh token at the start of each scan cycle.
|
||||
|
||||
Args:
|
||||
user_id: User to scan
|
||||
send_stream: Stream to send changed documents to processors
|
||||
shutdown_event: Event signaling shutdown
|
||||
wake_event: Event to trigger immediate scan
|
||||
token_broker: Token broker for obtaining access tokens
|
||||
nextcloud_host: Nextcloud base URL
|
||||
task_status: Status object for signaling task readiness
|
||||
"""
|
||||
logger.info(f"[OAuth] Scanner started for user: {user_id}")
|
||||
settings = get_settings()
|
||||
|
||||
task_status.started()
|
||||
|
||||
while not shutdown_event.is_set():
|
||||
nc_client = None
|
||||
try:
|
||||
# Get fresh token for this scan cycle
|
||||
nc_client = await get_user_client(user_id, token_broker, nextcloud_host)
|
||||
|
||||
# Scan user's documents
|
||||
await scan_user_documents(
|
||||
user_id=user_id,
|
||||
send_stream=send_stream,
|
||||
nc_client=nc_client,
|
||||
)
|
||||
|
||||
except NotProvisionedError:
|
||||
logger.warning(
|
||||
f"[OAuth] User {user_id} no longer provisioned, stopping scanner"
|
||||
)
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[OAuth] Scanner error for {user_id}: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
if nc_client:
|
||||
await nc_client.close()
|
||||
|
||||
# Sleep until next interval or wake event
|
||||
try:
|
||||
with anyio.move_on_after(settings.vector_sync_scan_interval):
|
||||
await wake_event.wait()
|
||||
except anyio.get_cancelled_exc_class():
|
||||
break
|
||||
|
||||
logger.info(f"[OAuth] Scanner stopped for user: {user_id}")
|
||||
|
||||
|
||||
async def oauth_processor_task(
|
||||
worker_id: int,
|
||||
receive_stream: MemoryObjectReceiveStream[DocumentTask],
|
||||
shutdown_event: anyio.Event,
|
||||
token_broker: "TokenBrokerService",
|
||||
nextcloud_host: str,
|
||||
*,
|
||||
task_status: TaskStatus = anyio.TASK_STATUS_IGNORED,
|
||||
) -> None:
|
||||
"""Processor task for OAuth mode.
|
||||
|
||||
Handles documents from any user by fetching tokens on-demand.
|
||||
|
||||
Args:
|
||||
worker_id: Worker identifier for logging
|
||||
receive_stream: Stream to receive documents from
|
||||
shutdown_event: Event signaling shutdown
|
||||
token_broker: Token broker for obtaining access tokens
|
||||
nextcloud_host: Nextcloud base URL
|
||||
task_status: Status object for signaling task readiness
|
||||
"""
|
||||
from nextcloud_mcp_server.vector.processor import process_document
|
||||
|
||||
logger.info(f"[OAuth] Processor {worker_id} started")
|
||||
task_status.started()
|
||||
|
||||
while not shutdown_event.is_set():
|
||||
doc_task = None
|
||||
nc_client = None
|
||||
try:
|
||||
# Get document with timeout
|
||||
with anyio.fail_after(1.0):
|
||||
doc_task = await receive_stream.receive()
|
||||
|
||||
# Get token for THIS document's user
|
||||
nc_client = await get_user_client(
|
||||
doc_task.user_id, token_broker, nextcloud_host
|
||||
)
|
||||
|
||||
# Process the document
|
||||
await process_document(doc_task, nc_client)
|
||||
|
||||
except TimeoutError:
|
||||
continue
|
||||
|
||||
except anyio.EndOfStream:
|
||||
logger.info(f"[OAuth] Processor {worker_id}: Stream closed, exiting")
|
||||
break
|
||||
|
||||
except NotProvisionedError:
|
||||
if doc_task:
|
||||
logger.warning(
|
||||
f"[OAuth] User {doc_task.user_id} not provisioned, "
|
||||
f"skipping {doc_task.doc_type}_{doc_task.doc_id}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
if doc_task:
|
||||
logger.error(
|
||||
f"[OAuth] Processor {worker_id} error processing "
|
||||
f"{doc_task.doc_type}_{doc_task.doc_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
else:
|
||||
logger.error(f"[OAuth] Processor {worker_id} error: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
if nc_client:
|
||||
await nc_client.close()
|
||||
|
||||
logger.info(f"[OAuth] Processor {worker_id} stopped")
|
||||
|
||||
|
||||
async def _run_user_scanner_with_scope(
|
||||
user_id: str,
|
||||
cancel_scope: anyio.CancelScope,
|
||||
send_stream: MemoryObjectSendStream[DocumentTask],
|
||||
shutdown_event: anyio.Event,
|
||||
wake_event: anyio.Event,
|
||||
token_broker: "TokenBrokerService",
|
||||
nextcloud_host: str,
|
||||
user_states: dict[str, UserSyncState],
|
||||
) -> None:
|
||||
"""Wrapper to run scanner with cancellation scope.
|
||||
|
||||
Cleans up user state on exit.
|
||||
"""
|
||||
cloned_stream = send_stream.clone()
|
||||
try:
|
||||
with cancel_scope:
|
||||
await user_scanner_task(
|
||||
user_id=user_id,
|
||||
send_stream=cloned_stream,
|
||||
shutdown_event=shutdown_event,
|
||||
wake_event=wake_event,
|
||||
token_broker=token_broker,
|
||||
nextcloud_host=nextcloud_host,
|
||||
)
|
||||
finally:
|
||||
# Clean up on exit
|
||||
if user_id in user_states:
|
||||
del user_states[user_id]
|
||||
await cloned_stream.aclose()
|
||||
|
||||
|
||||
async def user_manager_task(
|
||||
send_stream: MemoryObjectSendStream[DocumentTask],
|
||||
shutdown_event: anyio.Event,
|
||||
wake_event: anyio.Event,
|
||||
token_broker: "TokenBrokerService",
|
||||
refresh_token_storage: "RefreshTokenStorage",
|
||||
nextcloud_host: str,
|
||||
user_states: dict[str, UserSyncState],
|
||||
tg: TaskGroup,
|
||||
*,
|
||||
task_status: TaskStatus = anyio.TASK_STATUS_IGNORED,
|
||||
) -> None:
|
||||
"""Supervisor task that manages per-user scanners.
|
||||
|
||||
Periodically polls RefreshTokenStorage to detect:
|
||||
- New users who have provisioned offline access -> start scanner
|
||||
- Users who have revoked access -> cancel their scanner
|
||||
|
||||
Args:
|
||||
send_stream: Stream to send documents to processors
|
||||
shutdown_event: Event signaling shutdown
|
||||
wake_event: Event to wake scanners for immediate scan
|
||||
token_broker: Token broker for obtaining access tokens
|
||||
refresh_token_storage: Storage for refresh tokens
|
||||
nextcloud_host: Nextcloud base URL
|
||||
user_states: Shared dict tracking active user scanners
|
||||
tg: Task group for spawning scanner tasks
|
||||
task_status: Status object for signaling task readiness
|
||||
"""
|
||||
settings = get_settings()
|
||||
poll_interval = settings.vector_sync_user_poll_interval
|
||||
|
||||
logger.info(f"[OAuth] User manager started (poll interval: {poll_interval}s)")
|
||||
task_status.started()
|
||||
|
||||
while not shutdown_event.is_set():
|
||||
try:
|
||||
# Get current provisioned users
|
||||
provisioned_users = set(await refresh_token_storage.get_all_user_ids())
|
||||
active_users = set(user_states.keys())
|
||||
|
||||
# Start scanners for new users
|
||||
new_users = provisioned_users - active_users
|
||||
for user_id in new_users:
|
||||
logger.info(
|
||||
f"[OAuth] Starting scanner for newly provisioned user: {user_id}"
|
||||
)
|
||||
cancel_scope = anyio.CancelScope()
|
||||
user_states[user_id] = UserSyncState(
|
||||
user_id=user_id,
|
||||
cancel_scope=cancel_scope,
|
||||
)
|
||||
|
||||
# Start scanner in task group
|
||||
tg.start_soon(
|
||||
_run_user_scanner_with_scope,
|
||||
user_id,
|
||||
cancel_scope,
|
||||
send_stream,
|
||||
shutdown_event,
|
||||
wake_event,
|
||||
token_broker,
|
||||
nextcloud_host,
|
||||
user_states,
|
||||
)
|
||||
|
||||
# Cancel scanners for revoked users
|
||||
revoked_users = active_users - provisioned_users
|
||||
for user_id in revoked_users:
|
||||
logger.info(f"[OAuth] Stopping scanner for revoked user: {user_id}")
|
||||
state = user_states.get(user_id)
|
||||
if state:
|
||||
state.cancel_scope.cancel()
|
||||
# Note: state will be removed by _run_user_scanner_with_scope on exit
|
||||
|
||||
if new_users:
|
||||
logger.info(f"[OAuth] Started {len(new_users)} new scanner(s)")
|
||||
if revoked_users:
|
||||
logger.info(f"[OAuth] Stopped {len(revoked_users)} scanner(s)")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[OAuth] User manager error: {e}", exc_info=True)
|
||||
|
||||
# Sleep until next poll
|
||||
try:
|
||||
with anyio.move_on_after(poll_interval):
|
||||
await shutdown_event.wait()
|
||||
except anyio.get_cancelled_exc_class():
|
||||
break
|
||||
|
||||
# Cancel all remaining scanners on shutdown
|
||||
logger.info(
|
||||
f"[OAuth] User manager shutting down, cancelling {len(user_states)} scanner(s)"
|
||||
)
|
||||
for state in list(user_states.values()):
|
||||
state.cancel_scope.cancel()
|
||||
|
||||
logger.info("[OAuth] User manager stopped")
|
||||
@@ -0,0 +1,306 @@
|
||||
"""Placeholder point management for Qdrant state tracking.
|
||||
|
||||
Placeholders are zero-vector points stored in Qdrant to track document processing
|
||||
state. They prevent duplicate work by marking documents as "in-flight" during the
|
||||
gap between scanner queuing and processor completion.
|
||||
|
||||
Architecture:
|
||||
- Scanner writes placeholders when queuing documents for processing
|
||||
- Processor deletes placeholders and writes real vectors after processing
|
||||
- All user-facing queries filter out placeholders (is_placeholder: False)
|
||||
|
||||
Placeholders contain:
|
||||
- Zero vectors (dimension from embedding service)
|
||||
- is_placeholder: True flag (for filtering)
|
||||
- status: "pending", "processing", "completed", "failed"
|
||||
- modified_at, etag from source document
|
||||
- queued_at timestamp
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue, PointStruct
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.embedding import get_embedding_service
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _generate_placeholder_id(doc_type: str, doc_id: str | int) -> str:
|
||||
"""Generate deterministic UUID for placeholder point.
|
||||
|
||||
Args:
|
||||
doc_type: Document type (note, file, etc.)
|
||||
doc_id: Document ID
|
||||
|
||||
Returns:
|
||||
UUID string for point ID
|
||||
"""
|
||||
point_name = f"{doc_type}:{doc_id}:placeholder"
|
||||
return str(uuid.uuid5(uuid.NAMESPACE_DNS, point_name))
|
||||
|
||||
|
||||
async def write_placeholder_point(
|
||||
doc_id: str | int,
|
||||
doc_type: str,
|
||||
user_id: str,
|
||||
modified_at: int,
|
||||
etag: str = "",
|
||||
file_path: str | None = None,
|
||||
) -> None:
|
||||
"""Write a placeholder point to Qdrant to mark document as queued.
|
||||
|
||||
This should be called by the scanner BEFORE queuing a document for processing.
|
||||
The placeholder prevents duplicate work if the scanner runs again before
|
||||
processing completes.
|
||||
|
||||
Args:
|
||||
doc_id: Document ID (int for notes/files)
|
||||
doc_type: Document type (note, file, etc.)
|
||||
user_id: User ID who owns the document
|
||||
modified_at: Document modification timestamp
|
||||
etag: Document ETag (if available)
|
||||
file_path: File path (for files only)
|
||||
|
||||
Raises:
|
||||
Exception: If Qdrant write fails
|
||||
"""
|
||||
try:
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
embedding_service = get_embedding_service()
|
||||
|
||||
# Get dimension dynamically (never hardcode)
|
||||
dimension = embedding_service.get_dimension()
|
||||
|
||||
# Create zero vectors
|
||||
zero_dense = [0.0] * dimension
|
||||
|
||||
# Create empty sparse vector for placeholders
|
||||
# Use models.SparseVector with empty indices/values
|
||||
from qdrant_client import models
|
||||
|
||||
empty_sparse = models.SparseVector(indices=[], values=[])
|
||||
|
||||
# Generate deterministic point ID
|
||||
point_id = _generate_placeholder_id(doc_type, doc_id)
|
||||
|
||||
# Build payload
|
||||
payload = {
|
||||
"user_id": user_id,
|
||||
"doc_id": doc_id,
|
||||
"doc_type": doc_type,
|
||||
"is_placeholder": True,
|
||||
"status": "pending",
|
||||
"modified_at": modified_at,
|
||||
"etag": etag,
|
||||
"queued_at": int(time.time()),
|
||||
}
|
||||
|
||||
# Add file_path for files
|
||||
if doc_type == "file" and file_path:
|
||||
payload["file_path"] = file_path
|
||||
|
||||
# Create placeholder point
|
||||
point = PointStruct(
|
||||
id=point_id,
|
||||
vector={
|
||||
"dense": zero_dense,
|
||||
"sparse": empty_sparse, # Empty sparse vector for placeholders
|
||||
},
|
||||
payload=payload,
|
||||
)
|
||||
|
||||
# Upsert to Qdrant
|
||||
await qdrant_client.upsert(
|
||||
collection_name=settings.get_collection_name(),
|
||||
points=[point],
|
||||
wait=True,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Wrote placeholder for {doc_type}_{doc_id} (user={user_id}, "
|
||||
f"modified_at={modified_at})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to write placeholder for {doc_type}_{doc_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
async def query_document_metadata(
|
||||
doc_id: str | int,
|
||||
doc_type: str,
|
||||
user_id: str,
|
||||
) -> dict | None:
|
||||
"""Query Qdrant for existing document entry (placeholder or real).
|
||||
|
||||
Returns the payload of the first matching point, which could be:
|
||||
- A placeholder (is_placeholder: True)
|
||||
- A real indexed document (is_placeholder: False or missing)
|
||||
- None if document not in Qdrant
|
||||
|
||||
Args:
|
||||
doc_id: Document ID
|
||||
doc_type: Document type
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
Payload dict if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for any entry matching doc_id, doc_type, user_id
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=True,
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
return dict(point.payload)
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error querying document metadata for {doc_type}_{doc_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
async def delete_placeholder_point(
|
||||
doc_id: str | int,
|
||||
doc_type: str,
|
||||
user_id: str,
|
||||
) -> None:
|
||||
"""Delete a placeholder point from Qdrant.
|
||||
|
||||
This should be called by the processor BEFORE writing real vectors.
|
||||
We delete the placeholder to avoid duplicates, then write the real chunks.
|
||||
|
||||
Args:
|
||||
doc_id: Document ID
|
||||
doc_type: Document type
|
||||
user_id: User ID
|
||||
|
||||
Raises:
|
||||
Exception: If Qdrant delete fails
|
||||
"""
|
||||
try:
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Delete by filter (in case there are multiple chunks from old indexing)
|
||||
await qdrant_client.delete(
|
||||
collection_name=settings.get_collection_name(),
|
||||
points_selector=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
|
||||
FieldCondition(key="is_placeholder", match=MatchValue(value=True)),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
logger.debug(f"Deleted placeholder for {doc_type}_{doc_id} (user={user_id})")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to delete placeholder for {doc_type}_{doc_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
async def update_placeholder_status(
|
||||
doc_id: str | int,
|
||||
doc_type: str,
|
||||
user_id: str,
|
||||
status: str,
|
||||
) -> None:
|
||||
"""Update the status field of a placeholder point.
|
||||
|
||||
Status values:
|
||||
- "pending": Queued for processing
|
||||
- "processing": Currently being processed
|
||||
- "completed": Processing completed successfully
|
||||
- "failed": Processing failed
|
||||
|
||||
Args:
|
||||
doc_id: Document ID
|
||||
doc_type: Document type
|
||||
user_id: User ID
|
||||
status: New status value
|
||||
|
||||
Raises:
|
||||
Exception: If Qdrant update fails
|
||||
"""
|
||||
try:
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Update payload using set_payload
|
||||
await qdrant_client.set_payload(
|
||||
collection_name=settings.get_collection_name(),
|
||||
payload={"status": status},
|
||||
points=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
|
||||
FieldCondition(key="is_placeholder", match=MatchValue(value=True)),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Updated placeholder status for {doc_type}_{doc_id} to '{status}' "
|
||||
f"(user={user_id})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to update placeholder status for {doc_type}_{doc_id}: {e}"
|
||||
)
|
||||
# Don't raise - status updates are non-critical
|
||||
|
||||
|
||||
def get_placeholder_filter() -> FieldCondition:
|
||||
"""Get a filter condition to exclude placeholders from queries.
|
||||
|
||||
Add this to all user-facing search/visualization queries to ensure
|
||||
placeholders are never returned to users.
|
||||
|
||||
Returns:
|
||||
FieldCondition that filters out is_placeholder: True
|
||||
|
||||
Example:
|
||||
Filter(
|
||||
must=[
|
||||
get_placeholder_filter(), # Exclude placeholders
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
]
|
||||
)
|
||||
"""
|
||||
return FieldCondition(
|
||||
key="is_placeholder",
|
||||
match=MatchValue(value=False),
|
||||
)
|
||||
@@ -6,6 +6,7 @@ Processes documents from stream: fetches content, generates embeddings, stores i
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, cast
|
||||
|
||||
import anyio
|
||||
from anyio.abc import TaskStatus
|
||||
@@ -23,12 +24,50 @@ from nextcloud_mcp_server.observability.metrics import (
|
||||
)
|
||||
from nextcloud_mcp_server.observability.tracing import trace_operation
|
||||
from nextcloud_mcp_server.vector.document_chunker import DocumentChunker
|
||||
from nextcloud_mcp_server.vector.placeholder import delete_placeholder_point
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
from nextcloud_mcp_server.vector.scanner import DocumentTask
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def assign_page_numbers(chunks, page_boundaries):
|
||||
"""Assign page numbers to chunks based on page boundaries.
|
||||
|
||||
Each chunk gets the page number where most of its content appears.
|
||||
For chunks spanning multiple pages, assigns the page containing the
|
||||
majority of the chunk's characters.
|
||||
|
||||
Args:
|
||||
chunks: List of ChunkWithPosition objects
|
||||
page_boundaries: List of dicts with {page, start_offset, end_offset}
|
||||
|
||||
Returns:
|
||||
None (modifies chunks in place)
|
||||
"""
|
||||
if not page_boundaries:
|
||||
return
|
||||
|
||||
for chunk in chunks:
|
||||
# Find which page(s) this chunk overlaps with
|
||||
max_overlap = 0
|
||||
assigned_page = None
|
||||
|
||||
for boundary in page_boundaries:
|
||||
# Calculate overlap between chunk and page
|
||||
overlap_start = max(chunk.start_offset, boundary["start_offset"])
|
||||
overlap_end = min(chunk.end_offset, boundary["end_offset"])
|
||||
overlap = max(0, overlap_end - overlap_start)
|
||||
|
||||
# Assign to page with maximum overlap
|
||||
if overlap > max_overlap:
|
||||
max_overlap = overlap
|
||||
assigned_page = boundary["page"]
|
||||
|
||||
if assigned_page is not None:
|
||||
chunk.page_number = assigned_page
|
||||
|
||||
|
||||
async def processor_task(
|
||||
worker_id: int,
|
||||
receive_stream: MemoryObjectReceiveStream[DocumentTask],
|
||||
@@ -218,31 +257,406 @@ async def _index_document(
|
||||
settings = get_settings()
|
||||
|
||||
# Fetch document content
|
||||
if doc_task.doc_type == "note":
|
||||
document = await nc_client.notes.get_note(int(doc_task.doc_id))
|
||||
content = f"{document['title']}\n\n{document['content']}"
|
||||
title = document["title"]
|
||||
etag = document.get("etag", "")
|
||||
else:
|
||||
raise ValueError(f"Unsupported doc_type: {doc_task.doc_type}")
|
||||
with trace_operation(
|
||||
"vector_sync.fetch_content",
|
||||
attributes={
|
||||
"vector_sync.doc_type": doc_task.doc_type,
|
||||
"vector_sync.doc_id": doc_task.doc_id,
|
||||
},
|
||||
):
|
||||
if doc_task.doc_type == "note":
|
||||
document = await nc_client.notes.get_note(int(doc_task.doc_id))
|
||||
content = f"{document['title']}\n\n{document['content']}"
|
||||
title = document["title"]
|
||||
etag = document.get("etag", "")
|
||||
file_metadata = {} # No file-specific metadata for notes
|
||||
file_path = None # Notes don't have file paths
|
||||
content_bytes = None # Notes don't have binary content
|
||||
content_type = None
|
||||
elif doc_task.doc_type == "news_item":
|
||||
from nextcloud_mcp_server.vector.html_processor import html_to_markdown
|
||||
|
||||
item = await nc_client.news.get_item(int(doc_task.doc_id))
|
||||
# Convert HTML body to Markdown for better embedding
|
||||
body_markdown = html_to_markdown(item.get("body", ""))
|
||||
# Build content: title + URL + body
|
||||
item_title = item.get("title", "")
|
||||
item_url = item.get("url", "")
|
||||
feed_title = item.get("feedTitle", "")
|
||||
|
||||
# Structure content for embedding
|
||||
content_parts = [item_title]
|
||||
if feed_title:
|
||||
content_parts.append(f"Source: {feed_title}")
|
||||
if item_url:
|
||||
content_parts.append(f"URL: {item_url}")
|
||||
content_parts.append("") # Blank line
|
||||
content_parts.append(body_markdown)
|
||||
content = "\n".join(content_parts)
|
||||
|
||||
title = item_title
|
||||
etag = item.get("guidHash", "")
|
||||
# Store news-specific metadata for later use in payload
|
||||
file_metadata = {
|
||||
"feed_id": item.get("feedId"),
|
||||
"feed_title": feed_title,
|
||||
"author": item.get("author"),
|
||||
"pub_date": item.get("pubDate"),
|
||||
"starred": item.get("starred", False),
|
||||
"unread": item.get("unread", True),
|
||||
"url": item_url,
|
||||
"guid_hash": item.get("guidHash"),
|
||||
"enclosure_link": item.get("enclosureLink"),
|
||||
"enclosure_mime": item.get("enclosureMime"),
|
||||
}
|
||||
file_path = None
|
||||
content_bytes = None
|
||||
content_type = None
|
||||
elif doc_task.doc_type == "deck_card":
|
||||
# Fetch card from Deck API
|
||||
# Use metadata from scanner if available (O(1) lookup)
|
||||
# Otherwise fall back to iteration (legacy data)
|
||||
card = None
|
||||
board = None
|
||||
stack = None
|
||||
|
||||
if (
|
||||
doc_task.metadata
|
||||
and "board_id" in doc_task.metadata
|
||||
and "stack_id" in doc_task.metadata
|
||||
):
|
||||
# Fast path: Direct lookup with known board_id/stack_id
|
||||
board_id = doc_task.metadata["board_id"]
|
||||
stack_id = doc_task.metadata["stack_id"]
|
||||
try:
|
||||
card = await nc_client.deck.get_card(
|
||||
board_id=int(board_id),
|
||||
stack_id=int(stack_id),
|
||||
card_id=int(doc_task.doc_id),
|
||||
)
|
||||
# Fetch board and stack info for metadata
|
||||
boards = await nc_client.deck.get_boards()
|
||||
for b in boards:
|
||||
if b.id == int(board_id):
|
||||
board = b
|
||||
stacks = await nc_client.deck.get_stacks(b.id)
|
||||
for s in stacks:
|
||||
if s.id == int(stack_id):
|
||||
stack = s
|
||||
break
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to fetch card with metadata (board_id={board_id}, stack_id={stack_id}, card_id={doc_task.doc_id}): {e}, falling back to iteration"
|
||||
)
|
||||
|
||||
# Fallback: Iterate through all boards/stacks (for legacy data or if fast path failed)
|
||||
if card is None:
|
||||
boards = await nc_client.deck.get_boards()
|
||||
card_found = False
|
||||
|
||||
for b in boards:
|
||||
if card_found:
|
||||
break
|
||||
# Skip deleted boards (soft delete: deletedAt > 0)
|
||||
if b.deletedAt > 0:
|
||||
continue
|
||||
stacks = await nc_client.deck.get_stacks(b.id)
|
||||
for s in stacks:
|
||||
if card_found:
|
||||
break
|
||||
if s.cards:
|
||||
for c in s.cards:
|
||||
if c.id == int(doc_task.doc_id):
|
||||
card = c
|
||||
board = b
|
||||
stack = s
|
||||
card_found = True
|
||||
break
|
||||
|
||||
if not card_found:
|
||||
raise ValueError(
|
||||
f"Deck card {doc_task.doc_id} not found in any board/stack"
|
||||
)
|
||||
|
||||
# Type narrowing: card, board, stack are all set if we reach here
|
||||
assert card is not None
|
||||
assert board is not None
|
||||
assert stack is not None
|
||||
|
||||
# Build content from card title and description
|
||||
content_parts = [card.title]
|
||||
if card.description:
|
||||
content_parts.append(card.description)
|
||||
content = "\n\n".join(content_parts)
|
||||
title = card.title
|
||||
|
||||
# Store deck-specific metadata
|
||||
file_metadata = {
|
||||
"board_id": board.id,
|
||||
"board_title": board.title,
|
||||
"stack_id": stack.id,
|
||||
"stack_title": stack.title,
|
||||
"card_type": card.type,
|
||||
"duedate": (card.duedate.isoformat() if card.duedate else None),
|
||||
"archived": card.archived,
|
||||
"owner": (
|
||||
card.owner.uid if hasattr(card.owner, "uid") else str(card.owner)
|
||||
),
|
||||
}
|
||||
etag = card.etag or ""
|
||||
file_path = None
|
||||
content_bytes = None
|
||||
content_type = None
|
||||
elif doc_task.doc_type == "file":
|
||||
# For files, doc_id is now the numeric file ID, file_path comes from DocumentTask
|
||||
if not doc_task.file_path:
|
||||
raise ValueError(
|
||||
f"File path required for file indexing but not provided (file_id={doc_task.doc_id})"
|
||||
)
|
||||
file_path = doc_task.file_path
|
||||
|
||||
# Read file content via WebDAV
|
||||
content_bytes, content_type = await nc_client.webdav.read_file(file_path)
|
||||
else:
|
||||
raise ValueError(f"Unsupported doc_type: {doc_task.doc_type}")
|
||||
|
||||
# Process file content (text extraction)
|
||||
if doc_task.doc_type == "file":
|
||||
# Type narrowing: content_bytes and content_type are set for files
|
||||
assert content_bytes is not None
|
||||
assert content_type is not None
|
||||
assert file_path is not None
|
||||
|
||||
with trace_operation(
|
||||
"vector_sync.document_process",
|
||||
attributes={
|
||||
"vector_sync.content_type": content_type,
|
||||
"vector_sync.file_size": len(content_bytes),
|
||||
},
|
||||
):
|
||||
# Use document processor registry to extract text
|
||||
from nextcloud_mcp_server.document_processors import get_registry
|
||||
|
||||
registry = get_registry()
|
||||
|
||||
try:
|
||||
result = await registry.process(
|
||||
content=content_bytes,
|
||||
content_type=content_type,
|
||||
filename=file_path,
|
||||
)
|
||||
content = result.text
|
||||
file_metadata = result.metadata
|
||||
title = file_metadata.get("title") or file_path.split("/")[-1]
|
||||
etag = "" # WebDAV read_file doesn't return etag
|
||||
|
||||
# Diagnostic: Log page boundary information if available
|
||||
if "page_boundaries" in file_metadata:
|
||||
page_boundaries = file_metadata["page_boundaries"]
|
||||
logger.info(
|
||||
f"Page boundaries for {file_path}: "
|
||||
f"{len(page_boundaries)} pages, text length: {len(content)}"
|
||||
)
|
||||
# Log first 3 page boundaries for debugging
|
||||
for boundary in page_boundaries[:3]:
|
||||
logger.debug(
|
||||
f" Page {boundary['page']}: "
|
||||
f"offsets [{boundary['start_offset']}:{boundary['end_offset']}]"
|
||||
)
|
||||
# Verify last boundary matches text length
|
||||
if page_boundaries:
|
||||
last_boundary = page_boundaries[-1]
|
||||
if last_boundary["end_offset"] != len(content):
|
||||
logger.warning(
|
||||
f"Text length mismatch: content={len(content)}, "
|
||||
f"last_boundary_end={last_boundary['end_offset']}"
|
||||
)
|
||||
else:
|
||||
logger.debug(f"No page_boundaries in metadata for {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process file {file_path}: {e}")
|
||||
raise
|
||||
|
||||
# Tokenize and chunk (using configured chunk size and overlap)
|
||||
chunker = DocumentChunker(
|
||||
chunk_size=settings.document_chunk_size,
|
||||
overlap=settings.document_chunk_overlap,
|
||||
)
|
||||
chunks = chunker.chunk_text(content)
|
||||
with trace_operation(
|
||||
"vector_sync.chunk_text",
|
||||
attributes={
|
||||
"vector_sync.input_chars": len(content),
|
||||
"vector_sync.chunk_size": settings.document_chunk_size,
|
||||
"vector_sync.overlap": settings.document_chunk_overlap,
|
||||
},
|
||||
):
|
||||
chunker = DocumentChunker(
|
||||
chunk_size=settings.document_chunk_size,
|
||||
overlap=settings.document_chunk_overlap,
|
||||
)
|
||||
chunks = await chunker.chunk_text(content)
|
||||
|
||||
# Assign page numbers to chunks if page boundaries are available (PDFs)
|
||||
page_boundaries = file_metadata.get("page_boundaries")
|
||||
if doc_task.doc_type == "file" and page_boundaries is not None:
|
||||
# Type narrowing: page_boundaries is guaranteed to be list[dict] here
|
||||
page_boundaries_list = cast(list[dict[str, Any]], page_boundaries)
|
||||
with trace_operation(
|
||||
"vector_sync.assign_page_numbers",
|
||||
attributes={
|
||||
"vector_sync.chunk_count": len(chunks),
|
||||
"vector_sync.page_count": len(page_boundaries_list),
|
||||
},
|
||||
):
|
||||
assign_page_numbers(chunks, page_boundaries_list)
|
||||
|
||||
# Diagnostic: Verify page number assignment
|
||||
assigned_count = sum(1 for c in chunks if c.page_number is not None)
|
||||
logger.info(
|
||||
f"Assigned page numbers to {assigned_count}/{len(chunks)} chunks "
|
||||
f"for {file_path}"
|
||||
)
|
||||
|
||||
# Log first 3 chunks to see their page assignments
|
||||
for i, chunk in enumerate(chunks[:3]):
|
||||
logger.debug(
|
||||
f" Chunk {i}: page={chunk.page_number}, "
|
||||
f"offsets=[{chunk.start_offset}:{chunk.end_offset}]"
|
||||
)
|
||||
|
||||
# Warning if NO page numbers were assigned
|
||||
if assigned_count == 0:
|
||||
logger.warning(
|
||||
f"NO page numbers assigned! "
|
||||
f"Text length: {len(content)}, "
|
||||
f"Chunks: {len(chunks)}, "
|
||||
f"Chunk offset range: [{chunks[0].start_offset}:{chunks[-1].end_offset}], "
|
||||
f"Page boundaries: {len(page_boundaries_list)} pages, "
|
||||
f"First boundary: {page_boundaries_list[0] if page_boundaries_list else 'None'}"
|
||||
)
|
||||
|
||||
# Extract chunk texts for embedding
|
||||
chunk_texts = [chunk.text for chunk in chunks]
|
||||
|
||||
# Generate dense embeddings (I/O bound - external API call)
|
||||
embedding_service = get_embedding_service()
|
||||
dense_embeddings = await embedding_service.embed_batch(chunk_texts)
|
||||
# Initialize results containers
|
||||
dense_embeddings: list = []
|
||||
sparse_embeddings: list = []
|
||||
chunk_images: dict[int, dict] = {}
|
||||
|
||||
# Generate sparse embeddings (BM25 for keyword matching)
|
||||
bm25_service = get_bm25_service()
|
||||
sparse_embeddings = bm25_service.encode_batch(chunk_texts)
|
||||
# Determine if we need PDF highlighting
|
||||
is_pdf = doc_task.doc_type == "file" and content_type == "application/pdf"
|
||||
|
||||
# Define async tasks for parallel execution
|
||||
async def generate_dense_embeddings():
|
||||
"""Generate dense embeddings (I/O bound - external API call)."""
|
||||
nonlocal dense_embeddings
|
||||
with trace_operation(
|
||||
"vector_sync.embed_dense",
|
||||
attributes={
|
||||
"vector_sync.chunk_count": len(chunk_texts),
|
||||
"vector_sync.total_chars": sum(len(t) for t in chunk_texts),
|
||||
},
|
||||
):
|
||||
embedding_service = get_embedding_service()
|
||||
dense_embeddings = await embedding_service.embed_batch(chunk_texts)
|
||||
|
||||
async def generate_sparse_embeddings():
|
||||
"""Generate sparse embeddings (BM25 for keyword matching)."""
|
||||
nonlocal sparse_embeddings
|
||||
with trace_operation(
|
||||
"vector_sync.embed_sparse",
|
||||
attributes={
|
||||
"vector_sync.chunk_count": len(chunk_texts),
|
||||
},
|
||||
):
|
||||
bm25_service = get_bm25_service()
|
||||
sparse_embeddings = await bm25_service.encode_batch(chunk_texts)
|
||||
|
||||
async def generate_highlights():
|
||||
"""Generate highlighted page images for PDF chunks (CPU-bound)."""
|
||||
nonlocal chunk_images
|
||||
if not is_pdf:
|
||||
return
|
||||
|
||||
# Type narrowing: content_bytes is set for PDF files
|
||||
assert content_bytes is not None
|
||||
|
||||
with trace_operation(
|
||||
"vector_sync.generate_highlights",
|
||||
attributes={
|
||||
"vector_sync.chunk_count": len(chunks),
|
||||
"vector_sync.pdf_size": len(content_bytes),
|
||||
},
|
||||
):
|
||||
import base64
|
||||
|
||||
from nextcloud_mcp_server.search.pdf_highlighter import PDFHighlighter
|
||||
|
||||
# Build chunk data for batch processing
|
||||
# Format: (chunk_index, start_offset, end_offset, page_number, chunk_text)
|
||||
chunk_data: list[tuple[int, int, int, int | None, str]] = [
|
||||
(i, chunk.start_offset, chunk.end_offset, chunk.page_number, chunk.text)
|
||||
for i, chunk in enumerate(chunks)
|
||||
if chunk.page_number is not None
|
||||
]
|
||||
|
||||
# Get pre-computed page boundaries from document processor
|
||||
page_boundaries = file_metadata.get("page_boundaries")
|
||||
if not page_boundaries:
|
||||
logger.warning("No page boundaries available, skipping highlighting")
|
||||
return
|
||||
|
||||
# Type narrowing: page_boundaries is guaranteed to be list[dict] here
|
||||
page_boundaries_list = cast(list[dict[str, Any]], page_boundaries)
|
||||
|
||||
logger.info(
|
||||
f"Batch generating highlighted page images for {len(chunk_data)} PDF chunks"
|
||||
)
|
||||
|
||||
# Run CPU-bound highlighting in thread pool
|
||||
# Pass pre-computed page boundaries and full text to avoid re-processing the PDF
|
||||
batch_results = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
lambda: PDFHighlighter.highlight_chunks_batch(
|
||||
pdf_bytes=content_bytes,
|
||||
chunks=chunk_data,
|
||||
page_boundaries=page_boundaries_list,
|
||||
full_text=content,
|
||||
color="yellow",
|
||||
zoom=2.0,
|
||||
)
|
||||
)
|
||||
|
||||
# Convert results to storage format
|
||||
for chunk_index, (
|
||||
png_bytes,
|
||||
actual_page_num,
|
||||
highlight_count,
|
||||
) in batch_results.items():
|
||||
image_base64 = base64.b64encode(png_bytes).decode("utf-8")
|
||||
chunk_images[chunk_index] = {
|
||||
"image": image_base64,
|
||||
"page": actual_page_num,
|
||||
"highlights": highlight_count,
|
||||
"size": len(png_bytes),
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Generated {len(chunk_images)}/{len(chunks)} highlighted page images "
|
||||
f"(avg {sum(img['size'] for img in chunk_images.values()) // max(len(chunk_images), 1):,} bytes)"
|
||||
)
|
||||
|
||||
# Run all embedding/highlighting operations in parallel
|
||||
# - Dense embeddings: I/O bound (API call)
|
||||
# - Sparse embeddings: CPU bound (local BM25)
|
||||
# - Highlighting: CPU bound (PyMuPDF rendering, runs in thread pool)
|
||||
with trace_operation(
|
||||
"vector_sync.parallel_processing",
|
||||
attributes={
|
||||
"vector_sync.is_pdf": is_pdf,
|
||||
"vector_sync.chunk_count": len(chunks),
|
||||
},
|
||||
):
|
||||
async with anyio.create_task_group() as tg:
|
||||
tg.start_soon(generate_dense_embeddings)
|
||||
tg.start_soon(generate_sparse_embeddings)
|
||||
tg.start_soon(generate_highlights)
|
||||
|
||||
# Prepare Qdrant points
|
||||
indexed_at = int(time.time())
|
||||
@@ -267,8 +681,9 @@ async def _index_document(
|
||||
"user_id": doc_task.user_id,
|
||||
"doc_id": doc_task.doc_id,
|
||||
"doc_type": doc_task.doc_type,
|
||||
"is_placeholder": False, # Real indexed document (not placeholder)
|
||||
"title": title,
|
||||
"excerpt": chunk.text[:200],
|
||||
"excerpt": chunk.text, # Full chunk text (up to chunk_size, default 2048 chars)
|
||||
"indexed_at": indexed_at,
|
||||
"modified_at": doc_task.modified_at,
|
||||
"etag": etag,
|
||||
@@ -277,16 +692,105 @@ async def _index_document(
|
||||
"chunk_start_offset": chunk.start_offset,
|
||||
"chunk_end_offset": chunk.end_offset,
|
||||
"metadata_version": 2, # v2 includes position metadata
|
||||
# File-specific metadata (PDF, etc.)
|
||||
**(
|
||||
{
|
||||
"file_path": file_path, # Store file path for retrieval
|
||||
"mime_type": content_type, # From WebDAV response
|
||||
"file_size": file_metadata.get("file_size"),
|
||||
"page_number": chunk.page_number,
|
||||
"page_count": file_metadata.get("page_count"),
|
||||
"author": file_metadata.get("author"),
|
||||
"creation_date": file_metadata.get("creation_date"),
|
||||
"has_images": file_metadata.get("has_images", False),
|
||||
"image_count": file_metadata.get("image_count", 0),
|
||||
}
|
||||
if doc_task.doc_type == "file"
|
||||
else {}
|
||||
),
|
||||
# News item-specific metadata
|
||||
**(
|
||||
{
|
||||
"feed_id": file_metadata.get("feed_id"),
|
||||
"feed_title": file_metadata.get("feed_title"),
|
||||
"author": file_metadata.get("author"),
|
||||
"pub_date": file_metadata.get("pub_date"),
|
||||
"starred": file_metadata.get("starred"),
|
||||
"unread": file_metadata.get("unread"),
|
||||
"url": file_metadata.get("url"),
|
||||
"guid_hash": file_metadata.get("guid_hash"),
|
||||
"enclosure_link": file_metadata.get("enclosure_link"),
|
||||
"enclosure_mime": file_metadata.get("enclosure_mime"),
|
||||
}
|
||||
if doc_task.doc_type == "news_item"
|
||||
else {}
|
||||
),
|
||||
# Deck card-specific metadata
|
||||
**(
|
||||
{
|
||||
"board_id": file_metadata.get("board_id"),
|
||||
"board_title": file_metadata.get("board_title"),
|
||||
"stack_id": file_metadata.get("stack_id"),
|
||||
"stack_title": file_metadata.get("stack_title"),
|
||||
"card_type": file_metadata.get("card_type"),
|
||||
"duedate": file_metadata.get("duedate"),
|
||||
"owner": file_metadata.get("owner"),
|
||||
}
|
||||
if doc_task.doc_type == "deck_card"
|
||||
else {}
|
||||
),
|
||||
# Highlighted page image (PDF only)
|
||||
**(
|
||||
{
|
||||
"highlighted_page_image": chunk_images[i]["image"],
|
||||
"highlighted_page_number": chunk_images[i]["page"],
|
||||
"highlight_count": chunk_images[i]["highlights"],
|
||||
}
|
||||
if i in chunk_images
|
||||
else {}
|
||||
),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# Upsert to Qdrant
|
||||
await qdrant_client.upsert(
|
||||
collection_name=settings.get_collection_name(),
|
||||
points=points,
|
||||
wait=True,
|
||||
)
|
||||
# Delete placeholder before writing real vectors
|
||||
# This prevents duplicates and cleans up the placeholder state
|
||||
try:
|
||||
await delete_placeholder_point(
|
||||
doc_id=doc_task.doc_id,
|
||||
doc_type=doc_task.doc_type,
|
||||
user_id=doc_task.user_id,
|
||||
)
|
||||
except Exception as e:
|
||||
# Log but don't fail indexing if placeholder deletion fails
|
||||
logger.warning(
|
||||
f"Failed to delete placeholder for {doc_task.doc_type}_{doc_task.doc_id}: {e}"
|
||||
)
|
||||
|
||||
# Upsert to Qdrant in batches to avoid timeout with large payloads
|
||||
# Each batch is limited to avoid WriteTimeout when sending large image payloads
|
||||
BATCH_SIZE = 10 # ~2MB per batch with images
|
||||
with trace_operation(
|
||||
"vector_sync.qdrant_upsert",
|
||||
attributes={
|
||||
"vector_sync.point_count": len(points),
|
||||
"vector_sync.collection": settings.get_collection_name(),
|
||||
"vector_sync.images_count": len(chunk_images),
|
||||
"vector_sync.batch_size": BATCH_SIZE,
|
||||
},
|
||||
):
|
||||
for batch_start in range(0, len(points), BATCH_SIZE):
|
||||
batch_end = min(batch_start + BATCH_SIZE, len(points))
|
||||
batch = points[batch_start:batch_end]
|
||||
await qdrant_client.upsert(
|
||||
collection_name=settings.get_collection_name(),
|
||||
points=batch,
|
||||
wait=True,
|
||||
)
|
||||
if batch_end < len(points):
|
||||
logger.debug(
|
||||
f"Upserted batch {batch_start // BATCH_SIZE + 1}/{(len(points) + BATCH_SIZE - 1) // BATCH_SIZE}"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Indexed {doc_task.doc_type}_{doc_task.doc_id} for {doc_task.user_id} "
|
||||
|
||||
@@ -89,31 +89,35 @@ async def get_qdrant_client() -> AsyncQdrantClient:
|
||||
if isinstance(vectors, dict):
|
||||
actual_dimension = vectors["dense"].size
|
||||
else:
|
||||
# Type narrowing: vectors must be VectorParams if not dict
|
||||
assert isinstance(vectors, VectorParams)
|
||||
actual_dimension = vectors.size
|
||||
|
||||
# Validate dimension matches
|
||||
if actual_dimension != expected_dimension:
|
||||
embedding_model = settings.get_embedding_model_name()
|
||||
raise ValueError(
|
||||
f"Dimension mismatch for collection '{collection_name}':\n"
|
||||
f" Expected: {expected_dimension} (from embedding model '{settings.ollama_embedding_model}')\n"
|
||||
f" Expected: {expected_dimension} (from embedding model '{embedding_model}')\n"
|
||||
f" Found: {actual_dimension}\n"
|
||||
f"This usually means you changed the embedding model.\n"
|
||||
f"Solutions:\n"
|
||||
f" 1. Delete the old collection: Collection will be recreated with new dimensions\n"
|
||||
f" 2. Set QDRANT_COLLECTION to use a different collection name\n"
|
||||
f" 3. Revert OLLAMA_EMBEDDING_MODEL to the original model"
|
||||
f" 3. Revert to the original embedding model"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Using existing Qdrant collection: {collection_name} "
|
||||
f"(dimension={actual_dimension}, model={settings.ollama_embedding_model})"
|
||||
f"(dimension={actual_dimension}, model={settings.get_embedding_model_name()})"
|
||||
)
|
||||
|
||||
else:
|
||||
# Collection doesn't exist - create it
|
||||
embedding_model = settings.get_embedding_model_name()
|
||||
logger.info(
|
||||
f"Collection '{collection_name}' not found, creating with "
|
||||
f"dimension={expected_dimension}, model={settings.ollama_embedding_model}..."
|
||||
f"dimension={expected_dimension}, model={embedding_model}..."
|
||||
)
|
||||
await _qdrant_client.create_collection(
|
||||
collection_name=collection_name,
|
||||
@@ -134,7 +138,7 @@ async def get_qdrant_client() -> AsyncQdrantClient:
|
||||
logger.info(
|
||||
f"Created Qdrant collection: {collection_name}\n"
|
||||
f" Dense vector dimension: {expected_dimension}\n"
|
||||
f" Dense embedding model: {settings.ollama_embedding_model}\n"
|
||||
f" Dense embedding model: {embedding_model}\n"
|
||||
f" Sparse vectors: BM25 (for hybrid search)\n"
|
||||
f" Distance: COSINE\n"
|
||||
f"Background sync will index all documents with dense + sparse vectors."
|
||||
|
||||
@@ -4,6 +4,7 @@ Periodically scans enabled users' content and queues changed documents for proce
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -16,6 +17,10 @@ from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.observability.metrics import record_vector_sync_scan
|
||||
from nextcloud_mcp_server.observability.tracing import trace_operation
|
||||
from nextcloud_mcp_server.vector.placeholder import (
|
||||
query_document_metadata,
|
||||
write_placeholder_point,
|
||||
)
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -26,10 +31,14 @@ class DocumentTask:
|
||||
"""Document task for processing queue."""
|
||||
|
||||
user_id: str
|
||||
doc_id: str
|
||||
doc_id: int | str # int for files/notes, str for legacy
|
||||
doc_type: str # "note", "file", "calendar"
|
||||
operation: str # "index" or "delete"
|
||||
modified_at: int
|
||||
file_path: str | None = None # File path for files (when doc_id is file_id)
|
||||
metadata: dict[str, int | str] | None = (
|
||||
None # Additional metadata (e.g., board_id/stack_id for deck_card)
|
||||
)
|
||||
|
||||
|
||||
# Track documents potentially deleted (grace period before actual deletion)
|
||||
@@ -73,9 +82,11 @@ async def get_last_indexed_timestamp(user_id: str) -> int | None:
|
||||
|
||||
if scroll_result[0]:
|
||||
timestamps = [
|
||||
point.payload.get("indexed_at", 0) for point in scroll_result[0]
|
||||
point.payload.get("indexed_at", 0)
|
||||
for point in scroll_result[0]
|
||||
if point.payload is not None
|
||||
]
|
||||
max_timestamp = max(timestamps)
|
||||
max_timestamp = max(timestamps) if timestamps else 0
|
||||
logger.info(
|
||||
f"Max indexed_at: {max_timestamp}, timestamps sample: {timestamps[:3]}"
|
||||
)
|
||||
@@ -182,8 +193,9 @@ async def scan_user_documents(
|
||||
f"[SCAN-{scan_id}] Using pruneBefore={prune_before} to optimize data transfer"
|
||||
)
|
||||
|
||||
# Get indexed state from Qdrant first (for incremental sync)
|
||||
indexed_docs = {}
|
||||
# For deletion tracking, get all doc_ids in Qdrant (for incremental sync)
|
||||
# Note: We no longer bulk-query indexed_at, instead check per-document
|
||||
indexed_doc_ids = set()
|
||||
if not initial_sync:
|
||||
qdrant_client = await get_qdrant_client()
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
@@ -194,17 +206,18 @@ async def scan_user_documents(
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="note")),
|
||||
]
|
||||
),
|
||||
with_payload=["doc_id", "indexed_at"],
|
||||
with_payload=["doc_id"],
|
||||
with_vectors=False,
|
||||
limit=10000,
|
||||
)
|
||||
|
||||
indexed_docs = {
|
||||
point.payload["doc_id"]: point.payload["indexed_at"]
|
||||
for point in scroll_result[0]
|
||||
indexed_doc_ids = {
|
||||
point.payload["doc_id"]
|
||||
for point in (scroll_result[0] or [])
|
||||
if point.payload is not None
|
||||
}
|
||||
|
||||
logger.debug(f"Found {len(indexed_docs)} indexed documents in Qdrant")
|
||||
logger.debug(f"Found {len(indexed_doc_ids)} indexed documents in Qdrant")
|
||||
|
||||
# Stream notes from Nextcloud and process immediately
|
||||
note_count = 0
|
||||
@@ -218,7 +231,14 @@ async def scan_user_documents(
|
||||
modified_at = note.get("modified", 0)
|
||||
|
||||
if initial_sync:
|
||||
# Send everything on first sync
|
||||
# Send everything on first sync - write placeholder first
|
||||
await write_placeholder_point(
|
||||
doc_id=doc_id,
|
||||
doc_type="note",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
etag=note.get("etag", ""),
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
@@ -230,9 +250,7 @@ async def scan_user_documents(
|
||||
)
|
||||
queued += 1
|
||||
else:
|
||||
# Incremental sync: compare with indexed state
|
||||
indexed_at = indexed_docs.get(doc_id)
|
||||
|
||||
# Incremental sync: check if document exists and compare modified_at
|
||||
# If document reappeared, remove from potentially_deleted
|
||||
doc_key = (user_id, doc_id)
|
||||
if doc_key in _potentially_deleted:
|
||||
@@ -241,8 +259,48 @@ async def scan_user_documents(
|
||||
)
|
||||
del _potentially_deleted[doc_key]
|
||||
|
||||
# Query Qdrant for existing entry (placeholder or real)
|
||||
existing_metadata = await query_document_metadata(
|
||||
doc_id=doc_id, doc_type="note", user_id=user_id
|
||||
)
|
||||
|
||||
# Send if never indexed or modified since last index
|
||||
if indexed_at is None or modified_at > indexed_at:
|
||||
# Compare against stored modified_at (not indexed_at!)
|
||||
needs_indexing = False
|
||||
if existing_metadata is None:
|
||||
# Never seen before
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("modified_at", 0) < modified_at:
|
||||
# Document modified since last indexing
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
# Placeholder exists - check if it's stale (processing may have failed)
|
||||
# Only requeue if placeholder is older than 5x scan interval
|
||||
# (Large PDFs can take 3-4 minutes to process)
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for note {doc_id} "
|
||||
f"(age={placeholder_age:.1f}s), requeuing"
|
||||
)
|
||||
needs_indexing = True
|
||||
else:
|
||||
logger.debug(
|
||||
f"Skipping note {doc_id} with recent placeholder "
|
||||
f"(age={placeholder_age:.1f}s < {stale_threshold:.1f}s)"
|
||||
)
|
||||
|
||||
if needs_indexing:
|
||||
# Write placeholder before queuing
|
||||
await write_placeholder_point(
|
||||
doc_id=doc_id,
|
||||
doc_type="note",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
etag=note.get("etag", ""),
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
@@ -270,7 +328,7 @@ async def scan_user_documents(
|
||||
) # Allow 1.5 scan intervals
|
||||
current_time = time.time()
|
||||
|
||||
for doc_id in indexed_docs:
|
||||
for doc_id in indexed_doc_ids:
|
||||
if doc_id not in nextcloud_doc_ids:
|
||||
doc_key = (user_id, doc_id)
|
||||
|
||||
@@ -309,7 +367,607 @@ async def scan_user_documents(
|
||||
)
|
||||
_potentially_deleted[doc_key] = current_time
|
||||
|
||||
# Scan tagged PDF files (after notes)
|
||||
# Get indexed file IDs from Qdrant (for deletion tracking)
|
||||
indexed_file_ids = set()
|
||||
if not initial_sync:
|
||||
file_scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="file")),
|
||||
]
|
||||
),
|
||||
limit=10000, # Reasonable limit for file count
|
||||
with_payload=["doc_id"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
indexed_file_ids = {
|
||||
point.payload["doc_id"]
|
||||
for point in (file_scroll_result[0] or [])
|
||||
if point.payload is not None
|
||||
}
|
||||
|
||||
logger.debug(f"Found {len(indexed_file_ids)} indexed files in Qdrant")
|
||||
|
||||
# Scan for tagged PDF files
|
||||
file_count = 0
|
||||
file_queued = 0
|
||||
nextcloud_file_ids = set()
|
||||
|
||||
try:
|
||||
# Find files with vector-index tag using OCS Tags API
|
||||
settings = get_settings()
|
||||
tag_name = os.getenv("VECTOR_SYNC_PDF_TAG", "vector-index")
|
||||
# Use NextcloudClient.find_files_by_tag() which uses proper OCS API
|
||||
# and filters by PDF MIME type
|
||||
tagged_files = await nc_client.find_files_by_tag(
|
||||
tag_name, mime_type_filter="application/pdf"
|
||||
)
|
||||
|
||||
for file_info in tagged_files:
|
||||
# Files are already filtered by MIME type in find_files_by_tag()
|
||||
file_count += 1
|
||||
file_id = file_info["id"] # Use numeric file ID, not path
|
||||
file_path = file_info["path"] # Keep path for logging
|
||||
nextcloud_file_ids.add(file_id)
|
||||
|
||||
# Use last_modified timestamp if available, otherwise use current time
|
||||
modified_at = file_info.get("last_modified_timestamp", int(time.time()))
|
||||
if isinstance(file_info.get("last_modified"), str):
|
||||
# Parse RFC 2822 date format if needed
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
try:
|
||||
dt = parsedate_to_datetime(file_info["last_modified"])
|
||||
modified_at = int(dt.timestamp())
|
||||
except (ValueError, KeyError):
|
||||
pass
|
||||
|
||||
if initial_sync:
|
||||
# Send everything on first sync - write placeholder first
|
||||
await write_placeholder_point(
|
||||
doc_id=file_id,
|
||||
doc_type="file",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
file_path=file_path,
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=file_id, # Use numeric file ID
|
||||
doc_type="file",
|
||||
operation="index",
|
||||
modified_at=modified_at,
|
||||
file_path=file_path, # Pass file path for content retrieval
|
||||
)
|
||||
)
|
||||
file_queued += 1
|
||||
else:
|
||||
# Incremental sync: check if file exists and compare modified_at
|
||||
# If file reappeared, remove from potentially_deleted
|
||||
file_key = (user_id, file_id)
|
||||
if file_key in _potentially_deleted:
|
||||
logger.debug(
|
||||
f"File {file_path} (ID: {file_id}) reappeared, removing from deletion grace period"
|
||||
)
|
||||
del _potentially_deleted[file_key]
|
||||
|
||||
# Query Qdrant for existing entry (placeholder or real)
|
||||
existing_metadata = await query_document_metadata(
|
||||
doc_id=file_id, doc_type="file", user_id=user_id
|
||||
)
|
||||
|
||||
# Send if never indexed or modified since last index
|
||||
# Compare against stored modified_at (not indexed_at!)
|
||||
needs_indexing = False
|
||||
if existing_metadata is None:
|
||||
# Never seen before
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("modified_at", 0) < modified_at:
|
||||
# File modified since last indexing
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
# Placeholder exists - check if it's stale (processing may have failed)
|
||||
# Only requeue if placeholder is older than 5x scan interval
|
||||
# (Large PDFs can take 3-4 minutes to process)
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for file {file_path} (ID: {file_id}) "
|
||||
f"(age={placeholder_age:.1f}s), requeuing"
|
||||
)
|
||||
needs_indexing = True
|
||||
else:
|
||||
logger.debug(
|
||||
f"Skipping file {file_path} (ID: {file_id}) with recent placeholder "
|
||||
f"(age={placeholder_age:.1f}s < {stale_threshold:.1f}s)"
|
||||
)
|
||||
|
||||
if needs_indexing:
|
||||
# Write placeholder before queuing
|
||||
await write_placeholder_point(
|
||||
doc_id=file_id,
|
||||
doc_type="file",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
file_path=file_path,
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=file_id, # Use numeric file ID
|
||||
doc_type="file",
|
||||
operation="index",
|
||||
modified_at=modified_at,
|
||||
file_path=file_path, # Pass file path for content retrieval
|
||||
)
|
||||
)
|
||||
file_queued += 1
|
||||
|
||||
logger.info(
|
||||
f"[SCAN-{scan_id}] Found {file_count} tagged PDFs for {user_id}"
|
||||
)
|
||||
record_vector_sync_scan(file_count)
|
||||
|
||||
# Check for deleted files (not initial sync)
|
||||
if not initial_sync:
|
||||
for file_id in indexed_file_ids:
|
||||
if file_id not in nextcloud_file_ids:
|
||||
file_key = (user_id, file_id)
|
||||
|
||||
if file_key in _potentially_deleted:
|
||||
# Check if grace period elapsed
|
||||
first_missing_time = _potentially_deleted[file_key]
|
||||
time_missing = current_time - first_missing_time
|
||||
|
||||
if time_missing >= grace_period:
|
||||
# Grace period elapsed, send for deletion
|
||||
logger.info(
|
||||
f"File ID {file_id} missing for {time_missing:.1f}s "
|
||||
f"(>{grace_period:.1f}s grace period), sending deletion"
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=file_id, # Use numeric file ID
|
||||
doc_type="file",
|
||||
operation="delete",
|
||||
modified_at=0,
|
||||
)
|
||||
)
|
||||
file_queued += 1
|
||||
del _potentially_deleted[file_key]
|
||||
else:
|
||||
# First time missing, add to grace period tracking
|
||||
logger.debug(
|
||||
f"File ID {file_id} missing for first time, starting grace period"
|
||||
)
|
||||
_potentially_deleted[file_key] = current_time
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to scan tagged files for {user_id}: {e}")
|
||||
|
||||
queued += file_queued
|
||||
|
||||
# Scan News items (starred + unread)
|
||||
news_queued = 0
|
||||
try:
|
||||
news_queued = await scan_news_items(
|
||||
user_id=user_id,
|
||||
send_stream=send_stream,
|
||||
nc_client=nc_client,
|
||||
initial_sync=initial_sync,
|
||||
scan_id=scan_id,
|
||||
)
|
||||
queued += news_queued
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to scan news items for {user_id}: {e}")
|
||||
|
||||
# Scan Deck cards
|
||||
deck_queued = 0
|
||||
try:
|
||||
deck_queued = await scan_deck_cards(
|
||||
user_id=user_id,
|
||||
send_stream=send_stream,
|
||||
nc_client=nc_client,
|
||||
initial_sync=initial_sync,
|
||||
scan_id=scan_id,
|
||||
)
|
||||
queued += deck_queued
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to scan deck cards for {user_id}: {e}")
|
||||
|
||||
if queued > 0:
|
||||
logger.info(f"Sent {queued} documents for incremental sync: {user_id}")
|
||||
logger.info(
|
||||
f"Sent {queued} documents ({file_queued} files, {news_queued} news items, {deck_queued} deck cards) for incremental sync: {user_id}"
|
||||
)
|
||||
else:
|
||||
logger.debug(f"No changes detected for {user_id}")
|
||||
|
||||
|
||||
async def scan_news_items(
|
||||
user_id: str,
|
||||
send_stream: MemoryObjectSendStream[DocumentTask],
|
||||
nc_client: NextcloudClient,
|
||||
initial_sync: bool,
|
||||
scan_id: int,
|
||||
) -> int:
|
||||
"""
|
||||
Scan user's News items and queue changed items for indexing.
|
||||
|
||||
Indexes all items from the user's feeds. The News app's auto-purge
|
||||
feature (default: 200 items per feed) naturally limits the total
|
||||
number of items, making explicit filtering unnecessary.
|
||||
|
||||
Args:
|
||||
user_id: User to scan
|
||||
send_stream: Stream to send changed documents to processors
|
||||
nc_client: Authenticated Nextcloud client
|
||||
initial_sync: If True, send all documents (first-time sync)
|
||||
scan_id: Scan identifier for logging
|
||||
|
||||
Returns:
|
||||
Number of items queued for processing
|
||||
"""
|
||||
from nextcloud_mcp_server.client.news import NewsItemType
|
||||
|
||||
settings = get_settings()
|
||||
queued = 0
|
||||
|
||||
# Get indexed news item IDs from Qdrant (for deletion tracking)
|
||||
indexed_item_ids: set[str] = set()
|
||||
if not initial_sync:
|
||||
qdrant_client = await get_qdrant_client()
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="news_item")),
|
||||
]
|
||||
),
|
||||
with_payload=["doc_id"],
|
||||
with_vectors=False,
|
||||
limit=10000,
|
||||
)
|
||||
indexed_item_ids = {
|
||||
point.payload["doc_id"]
|
||||
for point in (scroll_result[0] or [])
|
||||
if point.payload is not None
|
||||
}
|
||||
logger.debug(f"Found {len(indexed_item_ids)} indexed news items in Qdrant")
|
||||
|
||||
# Fetch all items (News app caps at ~200 per feed via auto-purge)
|
||||
all_items = await nc_client.news.get_items(
|
||||
batch_size=-1,
|
||||
type_=NewsItemType.ALL,
|
||||
get_read=True,
|
||||
)
|
||||
logger.debug(f"[SCAN-{scan_id}] Found {len(all_items)} news items")
|
||||
|
||||
item_count = len(all_items)
|
||||
nextcloud_item_ids: set[str] = set()
|
||||
|
||||
for item in all_items:
|
||||
doc_id = str(item["id"])
|
||||
nextcloud_item_ids.add(doc_id)
|
||||
|
||||
# Use lastModified timestamp (microseconds in News API)
|
||||
modified_at = item.get("lastModified", 0)
|
||||
# Convert to seconds if needed (News API uses microseconds)
|
||||
if modified_at > 10000000000: # > year 2286 in seconds
|
||||
modified_at = modified_at // 1000000
|
||||
|
||||
if initial_sync:
|
||||
# Send everything on first sync - write placeholder first
|
||||
await write_placeholder_point(
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
operation="index",
|
||||
modified_at=modified_at,
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
else:
|
||||
# Incremental sync: check if item exists and compare modified_at
|
||||
doc_key = (user_id, doc_id)
|
||||
if doc_key in _potentially_deleted:
|
||||
logger.debug(
|
||||
f"News item {doc_id} reappeared, removing from deletion grace period"
|
||||
)
|
||||
del _potentially_deleted[doc_key]
|
||||
|
||||
# Query Qdrant for existing entry
|
||||
existing_metadata = await query_document_metadata(
|
||||
doc_id=doc_id, doc_type="news_item", user_id=user_id
|
||||
)
|
||||
|
||||
needs_indexing = False
|
||||
if existing_metadata is None:
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("modified_at", 0) < modified_at:
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = settings.vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for news item {doc_id} "
|
||||
f"(age={placeholder_age:.1f}s), requeuing"
|
||||
)
|
||||
needs_indexing = True
|
||||
|
||||
if needs_indexing:
|
||||
await write_placeholder_point(
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
operation="index",
|
||||
modified_at=modified_at,
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
|
||||
logger.info(
|
||||
f"[SCAN-{scan_id}] Found {item_count} news items (starred+unread) for {user_id}"
|
||||
)
|
||||
record_vector_sync_scan(item_count)
|
||||
|
||||
# Check for deleted items (not initial sync)
|
||||
# Items become "deleted" when they are no longer starred AND become read
|
||||
if not initial_sync:
|
||||
grace_period = settings.vector_sync_scan_interval * 1.5
|
||||
current_time = time.time()
|
||||
|
||||
for doc_id in indexed_item_ids:
|
||||
if doc_id not in nextcloud_item_ids:
|
||||
doc_key = (user_id, doc_id)
|
||||
|
||||
if doc_key in _potentially_deleted:
|
||||
first_missing_time = _potentially_deleted[doc_key]
|
||||
time_missing = current_time - first_missing_time
|
||||
|
||||
if time_missing >= grace_period:
|
||||
logger.info(
|
||||
f"News item {doc_id} missing for {time_missing:.1f}s "
|
||||
f"(>{grace_period:.1f}s grace period), sending deletion"
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
operation="delete",
|
||||
modified_at=0,
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
del _potentially_deleted[doc_key]
|
||||
else:
|
||||
logger.debug(
|
||||
f"News item {doc_id} missing for first time, starting grace period"
|
||||
)
|
||||
_potentially_deleted[doc_key] = current_time
|
||||
|
||||
return queued
|
||||
|
||||
|
||||
async def scan_deck_cards(
|
||||
user_id: str,
|
||||
send_stream: MemoryObjectSendStream[DocumentTask],
|
||||
nc_client: NextcloudClient,
|
||||
initial_sync: bool,
|
||||
scan_id: int,
|
||||
) -> int:
|
||||
"""
|
||||
Scan user's Deck cards and queue changed cards for indexing.
|
||||
|
||||
Indexes cards from all non-archived boards and stacks.
|
||||
|
||||
Args:
|
||||
user_id: User to scan
|
||||
send_stream: Stream to send changed documents to processors
|
||||
nc_client: Authenticated Nextcloud client
|
||||
initial_sync: If True, send all documents (first-time sync)
|
||||
scan_id: Scan identifier for logging
|
||||
|
||||
Returns:
|
||||
Number of cards queued for processing
|
||||
"""
|
||||
settings = get_settings()
|
||||
queued = 0
|
||||
|
||||
# Get indexed deck card IDs from Qdrant (for deletion tracking)
|
||||
indexed_card_ids: set[str] = set()
|
||||
if not initial_sync:
|
||||
qdrant_client = await get_qdrant_client()
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="deck_card")),
|
||||
]
|
||||
),
|
||||
with_payload=["doc_id"],
|
||||
with_vectors=False,
|
||||
limit=10000,
|
||||
)
|
||||
indexed_card_ids = {
|
||||
point.payload["doc_id"]
|
||||
for point in (scroll_result[0] or [])
|
||||
if point.payload is not None
|
||||
}
|
||||
logger.debug(f"Found {len(indexed_card_ids)} indexed deck cards in Qdrant")
|
||||
|
||||
# Fetch all boards
|
||||
boards = await nc_client.deck.get_boards()
|
||||
logger.debug(f"[SCAN-{scan_id}] Found {len(boards)} deck boards")
|
||||
|
||||
card_count = 0
|
||||
nextcloud_card_ids: set[str] = set()
|
||||
|
||||
# Iterate through boards
|
||||
for board in boards:
|
||||
# Skip archived boards
|
||||
if board.archived:
|
||||
continue
|
||||
|
||||
# Skip deleted boards (soft delete: deletedAt > 0)
|
||||
if board.deletedAt > 0:
|
||||
logger.debug(f"[SCAN-{scan_id}] Skipping deleted board {board.id}")
|
||||
continue
|
||||
|
||||
# Get stacks for this board
|
||||
stacks = await nc_client.deck.get_stacks(board.id)
|
||||
|
||||
# Iterate through stacks
|
||||
for stack in stacks:
|
||||
# Skip if stack has no cards
|
||||
if not stack.cards:
|
||||
continue
|
||||
|
||||
# Iterate through cards in stack
|
||||
for card in stack.cards:
|
||||
# Skip archived cards
|
||||
if card.archived:
|
||||
continue
|
||||
|
||||
card_count += 1
|
||||
doc_id = str(card.id)
|
||||
nextcloud_card_ids.add(doc_id)
|
||||
|
||||
# Use lastModified timestamp if available
|
||||
modified_at = card.lastModified or 0
|
||||
|
||||
if initial_sync:
|
||||
# Send everything on first sync - write placeholder first
|
||||
await write_placeholder_point(
|
||||
doc_id=doc_id,
|
||||
doc_type="deck_card",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="deck_card",
|
||||
operation="index",
|
||||
modified_at=modified_at,
|
||||
metadata={"board_id": board.id, "stack_id": stack.id},
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
else:
|
||||
# Incremental sync: check if card exists and compare modified_at
|
||||
doc_key = (user_id, doc_id)
|
||||
if doc_key in _potentially_deleted:
|
||||
logger.debug(
|
||||
f"Deck card {doc_id} reappeared, removing from deletion grace period"
|
||||
)
|
||||
del _potentially_deleted[doc_key]
|
||||
|
||||
# Query Qdrant for existing entry
|
||||
existing_metadata = await query_document_metadata(
|
||||
doc_id=doc_id, doc_type="deck_card", user_id=user_id
|
||||
)
|
||||
|
||||
needs_indexing = False
|
||||
if existing_metadata is None:
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("modified_at", 0) < modified_at:
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = settings.vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for deck card {doc_id} "
|
||||
f"(age={placeholder_age:.1f}s), requeuing"
|
||||
)
|
||||
needs_indexing = True
|
||||
|
||||
if needs_indexing:
|
||||
await write_placeholder_point(
|
||||
doc_id=doc_id,
|
||||
doc_type="deck_card",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="deck_card",
|
||||
operation="index",
|
||||
modified_at=modified_at,
|
||||
metadata={"board_id": board.id, "stack_id": stack.id},
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
|
||||
logger.info(
|
||||
f"[SCAN-{scan_id}] Found {card_count} deck cards (non-archived) for {user_id}"
|
||||
)
|
||||
record_vector_sync_scan(card_count)
|
||||
|
||||
# Check for deleted cards (not initial sync)
|
||||
if not initial_sync:
|
||||
grace_period = settings.vector_sync_scan_interval * 1.5
|
||||
current_time = time.time()
|
||||
|
||||
for doc_id in indexed_card_ids:
|
||||
if doc_id not in nextcloud_card_ids:
|
||||
doc_key = (user_id, doc_id)
|
||||
|
||||
if doc_key in _potentially_deleted:
|
||||
first_missing_time = _potentially_deleted[doc_key]
|
||||
time_missing = current_time - first_missing_time
|
||||
|
||||
if time_missing >= grace_period:
|
||||
logger.info(
|
||||
f"Deck card {doc_id} missing for {time_missing:.1f}s "
|
||||
f"(>{grace_period:.1f}s grace period), sending deletion"
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="deck_card",
|
||||
operation="delete",
|
||||
modified_at=0,
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
del _potentially_deleted[doc_key]
|
||||
else:
|
||||
logger.debug(
|
||||
f"Deck card {doc_id} missing for first time, starting grace period"
|
||||
)
|
||||
_potentially_deleted[doc_key] = current_time
|
||||
|
||||
return queued
|
||||
|
||||
@@ -0,0 +1,190 @@
|
||||
"""Shared visualization utilities for PCA coordinate computation.
|
||||
|
||||
Extracts the PCA coordinate computation logic used by both:
|
||||
- viz_routes.py (session-based auth)
|
||||
- management.py (OAuth bearer token auth)
|
||||
|
||||
Both endpoints need to compute 3D PCA coordinates for search results,
|
||||
so this module provides the shared implementation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import anyio.to_thread
|
||||
import numpy as np
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.pca import PCA
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def compute_pca_coordinates(
|
||||
search_results: list[Any],
|
||||
query_embedding: np.ndarray | list[float],
|
||||
) -> dict[str, Any]:
|
||||
"""Compute PCA 3D coordinates for search results visualization.
|
||||
|
||||
This is the shared implementation used by both viz_routes.py and
|
||||
the management API. It retrieves vectors from Qdrant and applies
|
||||
PCA dimensionality reduction.
|
||||
|
||||
Args:
|
||||
search_results: List of SearchResult objects with point_id
|
||||
query_embedding: The query embedding vector
|
||||
|
||||
Returns:
|
||||
Dict with:
|
||||
- coordinates_3d: List of [x, y, z] for each result
|
||||
- query_coords: [x, y, z] for the query point
|
||||
- pca_variance: Dict with pc1, pc2, pc3 explained variance ratios
|
||||
"""
|
||||
settings = get_settings()
|
||||
|
||||
# Collect point IDs from search results for batch retrieval
|
||||
point_ids = [r.point_id for r in search_results if r.point_id]
|
||||
|
||||
if len(point_ids) < 2:
|
||||
return {"coordinates_3d": [], "query_coords": []}
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
# Batch retrieve vectors from Qdrant
|
||||
points_response = await qdrant_client.retrieve(
|
||||
collection_name=settings.get_collection_name(),
|
||||
ids=point_ids,
|
||||
with_vectors=["dense"],
|
||||
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
|
||||
)
|
||||
|
||||
# Build chunk_vectors_map from batch response
|
||||
chunk_vectors_map: dict[tuple[Any, Any, Any], Any] = {}
|
||||
for point in points_response:
|
||||
if point.vector is not None:
|
||||
# Extract dense vector (handle both named and unnamed vectors)
|
||||
if isinstance(point.vector, dict):
|
||||
vector = point.vector.get("dense")
|
||||
else:
|
||||
vector = point.vector
|
||||
|
||||
if vector is not None and point.payload:
|
||||
doc_id = point.payload.get("doc_id")
|
||||
chunk_start = point.payload.get("chunk_start_offset")
|
||||
chunk_end = point.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, chunk_start, chunk_end)
|
||||
chunk_vectors_map[chunk_key] = vector
|
||||
|
||||
if len(chunk_vectors_map) < 2:
|
||||
return {"coordinates_3d": [], "query_coords": []}
|
||||
|
||||
# Detect embedding dimension
|
||||
embedding_dim = None
|
||||
for vector in chunk_vectors_map.values():
|
||||
if vector is not None:
|
||||
embedding_dim = len(vector)
|
||||
break
|
||||
|
||||
if embedding_dim is None:
|
||||
return {"coordinates_3d": [], "query_coords": []}
|
||||
|
||||
logger.info(f"Detected embedding dimension: {embedding_dim}")
|
||||
|
||||
# Build chunk vectors array in search_results order (1:1 mapping)
|
||||
chunk_vectors = []
|
||||
for result in search_results:
|
||||
chunk_key = (result.id, result.chunk_start_offset, result.chunk_end_offset)
|
||||
if chunk_key in chunk_vectors_map:
|
||||
chunk_vectors.append(chunk_vectors_map[chunk_key])
|
||||
else:
|
||||
# Chunk not found in vectors (shouldn't happen)
|
||||
logger.warning(
|
||||
f"Chunk {chunk_key} not found in fetched vectors, using zero vector"
|
||||
)
|
||||
chunk_vectors.append(np.zeros(embedding_dim))
|
||||
|
||||
chunk_vectors = np.array(chunk_vectors)
|
||||
|
||||
# Ensure query_embedding is a numpy array
|
||||
if not isinstance(query_embedding, np.ndarray):
|
||||
query_embedding = np.array(query_embedding)
|
||||
|
||||
# Combine query vector with chunk vectors for PCA
|
||||
# Query will be the last point in the array
|
||||
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
|
||||
|
||||
# Normalize vectors to unit length (L2 normalization)
|
||||
# This is critical because Qdrant uses COSINE distance, which only measures
|
||||
# vector direction (angle), not magnitude. PCA uses Euclidean distance which
|
||||
# considers both direction and magnitude. By normalizing to unit length,
|
||||
# Euclidean distances in PCA space will match cosine distances.
|
||||
norms = np.linalg.norm(all_vectors, axis=1, keepdims=True)
|
||||
|
||||
# Check for zero-norm vectors (can happen with empty/corrupted embeddings)
|
||||
zero_norm_mask = norms[:, 0] < 1e-10
|
||||
if zero_norm_mask.any():
|
||||
zero_indices = np.where(zero_norm_mask)[0]
|
||||
logger.warning(
|
||||
f"Found {zero_norm_mask.sum()} zero-norm vectors at indices "
|
||||
f"{zero_indices.tolist()}. Replacing with small epsilon to avoid "
|
||||
"division by zero."
|
||||
)
|
||||
# Replace zero norms with small epsilon to avoid NaN
|
||||
norms[zero_norm_mask] = 1e-10
|
||||
|
||||
all_vectors_normalized = all_vectors / norms
|
||||
logger.info(
|
||||
f"Normalized vectors: query_norm={norms[-1][0]:.3f}, "
|
||||
f"doc_norm_range=[{norms[:-1].min():.3f}, {norms[:-1].max():.3f}]"
|
||||
)
|
||||
|
||||
# Apply PCA dimensionality reduction (768-dim → 3D)
|
||||
# Run in thread pool to avoid blocking the event loop (CPU-bound)
|
||||
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
|
||||
pca = PCA(n_components=3)
|
||||
coords = pca.fit_transform(vectors)
|
||||
return coords, pca
|
||||
|
||||
coords_3d, pca = await anyio.to_thread.run_sync(
|
||||
lambda: _compute_pca(all_vectors_normalized)
|
||||
)
|
||||
|
||||
# After fit, these attributes are guaranteed to be set
|
||||
assert pca.explained_variance_ratio_ is not None
|
||||
|
||||
# Check for NaN values in PCA output (numerical instability)
|
||||
nan_mask = np.isnan(coords_3d)
|
||||
if nan_mask.any():
|
||||
nan_rows = np.where(nan_mask.any(axis=1))[0]
|
||||
logger.error(
|
||||
f"Found NaN values in PCA output at {len(nan_rows)} points: "
|
||||
f"{nan_rows.tolist()[:10]}. Replacing NaN with 0.0 to prevent "
|
||||
"JSON serialization error."
|
||||
)
|
||||
# Replace NaN with 0 to allow JSON serialization
|
||||
coords_3d = np.nan_to_num(coords_3d, nan=0.0)
|
||||
|
||||
# Split query coords from chunk coords
|
||||
# Round to 2 decimal places for cleaner display
|
||||
query_coords_3d = [round(float(x), 2) for x in coords_3d[-1]] # Last point is query
|
||||
chunk_coords_3d = coords_3d[:-1] # All but last are chunks
|
||||
|
||||
logger.info(
|
||||
f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, "
|
||||
f"PC2={pca.explained_variance_ratio_[1]:.3f}, "
|
||||
f"PC3={pca.explained_variance_ratio_[2]:.3f}"
|
||||
)
|
||||
|
||||
# Coordinates already match search_results order (1:1 mapping)
|
||||
result_coords = [[round(float(x), 2) for x in coord] for coord in chunk_coords_3d]
|
||||
|
||||
return {
|
||||
"coordinates_3d": result_coords,
|
||||
"query_coords": query_coords_3d,
|
||||
"pca_variance": {
|
||||
"pc1": float(pca.explained_variance_ratio_[0]),
|
||||
"pc2": float(pca.explained_variance_ratio_[1]),
|
||||
"pc3": float(pca.explained_variance_ratio_[2]),
|
||||
},
|
||||
}
|
||||
+23
-6
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "nextcloud-mcp-server"
|
||||
version = "0.44.0"
|
||||
version = "0.56.1"
|
||||
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
|
||||
authors = [
|
||||
{name = "Chris Coutinho", email = "chris@coutinho.io"}
|
||||
@@ -10,7 +10,7 @@ license = {text = "AGPL-3.0-only"}
|
||||
requires-python = ">=3.11"
|
||||
keywords = ["nextcloud", "mcp", "model-context-protocol", "llm", "ai", "claude", "webdav", "caldav", "carddav"]
|
||||
dependencies = [
|
||||
"mcp[cli] (>=1.21,<1.22)",
|
||||
"mcp[cli] (>=1.23,<1.24)",
|
||||
"httpx (>=0.28.1,<0.29.0)",
|
||||
"pillow (>=10.3.0,<12.0.0)", # Compatible with fastembed
|
||||
"icalendar (>=6.0.0,<7.0.0)",
|
||||
@@ -20,6 +20,7 @@ dependencies = [
|
||||
"caldav",
|
||||
"pyjwt[crypto]>=2.8.0",
|
||||
"aiosqlite>=0.20.0", # Async SQLite for refresh token storage
|
||||
"alembic>=1.14.0", # Database migrations
|
||||
"authlib>=1.6.5",
|
||||
"qdrant-client>=1.7.0",
|
||||
"fastembed>=0.7.3", # BM25 sparse vector embeddings for hybrid search
|
||||
@@ -36,6 +37,11 @@ dependencies = [
|
||||
"python-json-logger>=3.2.0", # Structured JSON logging
|
||||
"jinja2>=3.1.6",
|
||||
"langchain-text-splitters>=1.0.0",
|
||||
"markdownify>=0.14.1", # HTML to Markdown conversion for News items
|
||||
"pymupdf>=1.26.6",
|
||||
"pymupdf4llm>=0.2.2",
|
||||
"pymupdf-layout>=1.26.6",
|
||||
"openai>=2.8.1",
|
||||
]
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
@@ -83,19 +89,29 @@ version_scheme = "pep440"
|
||||
version_provider = "uv"
|
||||
update_changelog_on_bump = true
|
||||
major_version_zero = true
|
||||
|
||||
# MCP server version files + Helm appVersion
|
||||
version_files = [
|
||||
"charts/nextcloud-mcp-server/Chart.yaml:appVersion",
|
||||
"charts/nextcloud-mcp-server/Chart.yaml:version"
|
||||
"charts/nextcloud-mcp-server/Chart.yaml:^appVersion:",
|
||||
]
|
||||
|
||||
# Ignore tags from other components
|
||||
ignored_tag_formats = [
|
||||
"nextcloud-mcp-server-*"
|
||||
"nextcloud-mcp-server-*", # Helm chart tags
|
||||
"astrolabe-v*", # Astrolabe tags
|
||||
]
|
||||
|
||||
# Filter commits by scope (all scopes except helm and astrolabe)
|
||||
[tool.commitizen.customize]
|
||||
changelog_pattern = "^(feat|fix|docs|refactor|perf|test|build|ci|chore)(?!\\((?:helm|astrolabe)\\))(\\([^)]+\\))?(!)?:"
|
||||
schema_pattern = "^(feat|fix|docs|refactor|perf|test|build|ci|chore)(?!\\((?:helm|astrolabe)\\))(\\([^)]+\\))?(!)?:\\s.+"
|
||||
|
||||
[tool.ruff.lint]
|
||||
extend-select = ["I"]
|
||||
|
||||
[tool.uv.sources]
|
||||
caldav = { git = "https://github.com/cbcoutinho/caldav", branch = "feature/httpx" }
|
||||
qdrant-client = { git = "https://github.com/cbcoutinho/qdrant-client", branch = "fix/fusion-score-threshold" }
|
||||
|
||||
[build-system]
|
||||
requires = ["uv_build>=0.9.4,<0.10.0"]
|
||||
@@ -122,7 +138,8 @@ dev = [
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
nextcloud-mcp-server = "nextcloud_mcp_server.cli:run"
|
||||
nextcloud-mcp-server = "nextcloud_mcp_server.cli:cli"
|
||||
smithery-main = "nextcloud_mcp_server.smithery_main:main"
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "testpypi"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user