Compare commits

...

27 Commits

Author SHA1 Message Date
github-actions[bot] 959cb8b21a bump: version 0.46.1 → 0.46.2 2025-11-22 21:02:53 +00:00
Chris Coutinho f8a2410a0a Merge pull request #344 from cbcoutinho/fix/smithery-json-response
fix(smithery): Enable JSON response format for scanner compatibility
2025-11-22 22:02:24 +01:00
Chris Coutinho 03b984d5a7 fix(smithery): Enable JSON response format for scanner compatibility
The Smithery scanner was reporting "0 tools" despite the server returning
valid tool definitions. Root cause: the server was returning SSE-formatted
responses (event: message\ndata: {...}) which the scanner couldn't parse.

Changes:
- Add json_response=True to FastMCP for Smithery stateless mode
- Clean up verbose docstring examples in semantic.py and webdav.py

The MCP spec allows both SSE and plain JSON responses for HTTP transport.
Setting json_response=True returns Content-Type: application/json with
plain JSON-RPC instead of text/event-stream with SSE format.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 22:01:18 +01:00
github-actions[bot] 57db18c6a3 bump: version 0.46.0 → 0.46.1 2025-11-22 18:54:11 +00:00
Chris Coutinho ea79e94842 Merge pull request #343 from cbcoutinho/fix/vector-viz-search
perf: Optimize vector viz search performance
2025-11-22 19:53:40 +01:00
Chris Coutinho b0612cfa0f perf: Optimize vector viz search performance
- Replace sequential Qdrant scroll calls with batch retrieve
  (50 HTTP requests → 1 request, ~50x faster vector fetch)

- Add point_id to SearchResult to enable batch retrieval by Qdrant point ID

- Reuse query embedding from search algorithm in viz_routes
  (eliminates redundant embedding call, saves ~30ms)

- Make BM25 encode() async with thread pool to avoid blocking event loop
  (~4.4s was blocking, now properly async)

- Run PCA computation in thread pool to avoid blocking event loop
  (~1.2s was blocking, now properly async)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 19:47:43 +01:00
github-actions[bot] 4e61d73da5 bump: version 0.45.0 → 0.46.0 2025-11-22 18:40:24 +00:00
Chris Coutinho 3b41776110 Merge pull request #342 from cbcoutinho/feature/smithery
feat: Add Smithery stateless deployment support (ADR-016)
2025-11-22 19:39:53 +01:00
github-actions[bot] f9da19d1a1 bump: version 0.44.1 → 0.45.0 2025-11-22 16:14:35 +00:00
Chris Coutinho d2b6a26fe4 Merge pull request #341 from cbcoutinho/fix/async-await-and-pdf-metadata
fix: Async/await patterns, PDF metadata, and vector visualization improvements
2025-11-22 17:14:06 +01:00
Chris Coutinho 34fd17ba55 fix: Use alpha_composite for proper RGBA highlight blending
Drawing directly with ImageDraw on RGBA mode doesn't blend alpha
properly. Use Image.alpha_composite() with a transparent overlay
to achieve correct semi-transparent highlight fills.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 17:04:29 +01:00
Chris Coutinho 8baa07db84 fix: Remove pymupdf.layout.activate() to fix page_chunks behavior
pymupdf.layout.activate() causes pymupdf4llm.to_markdown() to ignore the
page_chunks=True option, returning a single string instead of list[dict].
This broke per-page chunking needed for semantic search indexing.

See: https://github.com/pymupdf/pymupdf4llm/issues/323

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 16:58:35 +01:00
Chris Coutinho 8c79993280 Merge pull request #334 from cbcoutinho/renovate/docker.io-library-redis-alpine
chore(deps): update docker.io/library/redis:alpine docker digest to 6cbef35
2025-11-21 14:24:54 +01:00
Chris Coutinho 8a0672a6be Merge pull request #339 from cbcoutinho/renovate/astral-sh-setup-uv-7.x
chore(deps): update astral-sh/setup-uv action to v7.1.4
2025-11-21 14:24:42 +01:00
Chris Coutinho 395f798ee2 Merge pull request #340 from cbcoutinho/renovate/ollama-1.x
chore(deps): update helm release ollama to v1.35.0
2025-11-21 14:24:26 +01:00
renovate-bot-cbcoutinho[bot] debff75221 chore(deps): update helm release ollama to v1.35.0 2025-11-21 11:09:18 +00:00
renovate-bot-cbcoutinho[bot] 4bf0a6c22e chore(deps): update astral-sh/setup-uv action to v7.1.4 2025-11-21 11:08:53 +00:00
Chris Coutinho fb025821cb Merge pull request #335 from cbcoutinho/renovate/ghcr.io-astral-sh-uv-0.x
chore(deps): update ghcr.io/astral-sh/uv docker tag to v0.9.11
2025-11-21 09:45:31 +01:00
Chris Coutinho ff880fd4c9 Merge pull request #338 from cbcoutinho/renovate/docker.io-library-nextcloud-32.x
chore(deps): update docker.io/library/nextcloud docker tag to v32.0.2
2025-11-21 09:34:20 +01:00
renovate-bot-cbcoutinho[bot] 03495d901d chore(deps): update docker.io/library/nextcloud docker tag to v32.0.2 2025-11-21 05:14:28 +00:00
github-actions[bot] 798958f20a bump: version 0.44.0 → 0.44.1 2025-11-21 00:39:23 +00:00
Chris Coutinho 699295c5be Merge pull request #336 from cbcoutinho/renovate/mcp-1.x
fix(deps): update dependency mcp to >=1.22,<1.23
2025-11-21 01:38:50 +01:00
renovate-bot-cbcoutinho[bot] d4fc1de80d fix(deps): update dependency mcp to >=1.22,<1.23 2025-11-20 23:11:11 +00:00
renovate-bot-cbcoutinho[bot] 0902b5653f chore(deps): update ghcr.io/astral-sh/uv docker tag to v0.9.11 2025-11-20 23:10:47 +00:00
renovate-bot-cbcoutinho[bot] 0b6a02075c chore(deps): update docker.io/library/redis:alpine docker digest to 6cbef35 2025-11-20 23:10:43 +00:00
Chris Coutinho 7880a8de30 Merge pull request #333 from cbcoutinho/renovate/actions-checkout-6.x
chore(deps): update actions/checkout action to v6
2025-11-20 20:17:21 +01:00
renovate-bot-cbcoutinho[bot] 2abedd6b4b chore(deps): update actions/checkout action to v6 2025-11-20 17:12:30 +00:00
22 changed files with 192 additions and 131 deletions
+1 -1
View File
@@ -15,7 +15,7 @@ jobs:
packages: write
steps:
- name: Check out
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
with:
fetch-depth: 0
token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
+1 -1
View File
@@ -12,7 +12,7 @@ jobs:
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
- name: Docker meta
id: meta
+1 -1
View File
@@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
with:
fetch-depth: 0
+2 -2
View File
@@ -18,9 +18,9 @@ jobs:
contents: read
steps:
- name: Checkout
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
- name: Install uv
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
- name: Install Python 3.11
run: uv python install 3.11
- name: Build
+4 -4
View File
@@ -9,9 +9,9 @@ jobs:
linting:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install the latest version of uv
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
- name: Check format
run: |
uv run --frozen ruff format --diff
@@ -27,7 +27,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with:
submodules: 'true'
@@ -56,7 +56,7 @@ jobs:
up-flags: "--build"
- name: Install the latest version of uv
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
- name: Install Playwright dependencies
run: |
+64
View File
@@ -1,3 +1,67 @@
## v0.46.2 (2025-11-22)
### Fix
- **smithery**: Enable JSON response format for scanner compatibility
## v0.46.1 (2025-11-22)
### Perf
- Optimize vector viz search performance
## v0.46.0 (2025-11-22)
### Feat
- Add Smithery CLI deployment support
- Implement ADR-016 Smithery stateless deployment mode
### Fix
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
- **smithery**: Use container runtime pattern for config discovery
- Add Smithery lifespan and auth mode detection
## v0.45.0 (2025-11-22)
### Feat
- Add context expansion to semantic search with chunk overlap removal
- Use Ollama native batch API in embed_batch()
- Implement Qdrant placeholder state management
- Switch files to use numeric IDs with file_path resolution
- Implement per-chunk vector visualization with context expansion
### Fix
- Use alpha_composite for proper RGBA highlight blending
- Remove pymupdf.layout.activate() to fix page_chunks behavior
- Centralize PDF processing and generate separate images per chunk
- Set is_placeholder=False in processor to fix search filtering
- Increase placeholder staleness threshold to 5x scan interval
- Add placeholder staleness check to prevent duplicate processing
- Use empty SparseVector instead of None for placeholders
- Return empty array instead of null for query_coords when no results
- Align PDF text extraction between indexing and context expansion
- Update models and viz to use int-only doc_id
- Reconstruct full content for notes to match indexed offsets
- Add async/await, PDF metadata, and type safety fixes
### Refactor
- Simplify PDF text extraction with single to_markdown call
### Perf
- Optimize PDF processing with parallel extraction and single-render highlights
## v0.44.1 (2025-11-21)
### Fix
- **deps**: update dependency mcp to >=1.22,<1.23
## v0.44.0 (2025-11-19)
### Feat
+2 -1
View File
@@ -1,6 +1,6 @@
FROM docker.io/library/python:3.12-slim-trixie@sha256:2e683fc3e18a248aa23b8022f2a3474b072b04fb851efe9b49f6b516a8944939
COPY --from=ghcr.io/astral-sh/uv:0.9.10@sha256:29bd45092ea8902c0bbb7f0a338f0494a382b1f4b18355df5be270ade679ff1d /uv /uvx /bin/
COPY --from=ghcr.io/astral-sh/uv:0.9.11@sha256:5aa820129de0a600924f166aec9cb51613b15b68f1dcd2a02f31a500d2ede568 /uv /uvx /bin/
# Install dependencies
# 1. git (required for caldav dependency from git)
@@ -18,6 +18,7 @@ RUN uv sync --locked --no-dev --no-editable --no-cache
ENV PYTHONUNBUFFERED=1
ENV VIRTUAL_ENV=/app/.venv
ENV PATH=/app/.vnev/bin:$PATH
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"]
+3 -3
View File
@@ -4,6 +4,6 @@ dependencies:
version: 1.16.0
- name: ollama
repository: https://otwld.github.io/ollama-helm
version: 1.34.0
digest: sha256:9dfb8d6e3d5488f669d4c37f3a766213b598ff3de2aead2c734789736c7835b4
generated: "2025-11-17T17:08:48.055530019Z"
version: 1.35.0
digest: sha256:da8db198b12ce0252df220fabb297cfe69186edb8e67952c52e05de778189b92
generated: "2025-11-21T11:09:07.997781541Z"
+3 -3
View File
@@ -2,8 +2,8 @@ apiVersion: v2
name: nextcloud-mcp-server
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
type: application
version: 0.44.0
appVersion: "0.44.0"
version: 0.46.2
appVersion: "0.46.2"
keywords:
- nextcloud
- mcp
@@ -31,6 +31,6 @@ dependencies:
repository: https://qdrant.github.io/qdrant-helm
condition: qdrant.networkMode.deploySubchart
- name: ollama
version: "1.34.0"
version: "1.35.0"
repository: https://otwld.github.io/ollama-helm
condition: ollama.enabled
+2 -2
View File
@@ -17,11 +17,11 @@ services:
# Note: Redis is an external service. You can find more information about the configuration here:
# https://hub.docker.com/_/redis
redis:
image: docker.io/library/redis:alpine@sha256:5013e94192ef18a5d8368179c7522e5300f9265cc339cadac76c7b93303a2752
image: docker.io/library/redis:alpine@sha256:6cbef353e480a8a6e7f10ec545f13d7d3fa85a212cdcc5ffaf5a1c818b9d3798
restart: always
app:
image: docker.io/library/nextcloud:32.0.1@sha256:d572839eeb693026d72a0c6aa48076df0bb8930797ea321e604936ef7189d06e
image: docker.io/library/nextcloud:32.0.2@sha256:ac08482d73ffd85d94069ba291bbd5fb39a70ff21502030a2e3e2d89a7246a48
restart: always
ports:
- 0.0.0.0:8080:80
+5 -1
View File
@@ -1072,7 +1072,11 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
# ADR-016: Use Smithery lifespan for stateless mode, BasicAuth otherwise
if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
logger.info("Configuring MCP server for Smithery stateless mode")
mcp = FastMCP("Nextcloud MCP", lifespan=app_lifespan_smithery)
# json_response=True returns plain JSON-RPC instead of SSE format,
# required for Smithery scanner compatibility
mcp = FastMCP(
"Nextcloud MCP", lifespan=app_lifespan_smithery, json_response=True
)
else:
logger.info("Configuring MCP server for BasicAuth mode")
mcp = FastMCP("Nextcloud MCP", lifespan=app_lifespan_basic)
+44 -57
View File
@@ -218,71 +218,41 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
}
)
# Fetch vectors for specific matching chunks from Qdrant
# Fetch vectors for specific matching chunks from Qdrant using batch retrieve
vector_fetch_start = time.perf_counter()
qdrant_client = await get_qdrant_client()
# Build filters for each specific chunk
from qdrant_client.models import FieldCondition, Filter, MatchValue
chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector
# Fetch vectors in batches by filtering on chunk-specific fields
for result in search_results:
chunk_start = result.chunk_start_offset
chunk_end = result.chunk_end_offset
# Collect point IDs from search results for batch retrieval
# point_id is the Qdrant internal ID returned by search algorithms
point_ids = [r.point_id for r in search_results if r.point_id]
# Build filter for this specific chunk
must_conditions = [
get_placeholder_filter(), # Always exclude placeholders from user-facing queries
FieldCondition(
key="doc_id",
match=MatchValue(value=result.id),
),
FieldCondition(
key="user_id",
match=MatchValue(value=username),
),
]
# Add chunk position filters if available
if chunk_start is not None:
must_conditions.append(
FieldCondition(
key="chunk_start_offset",
match=MatchValue(value=chunk_start),
)
)
if chunk_end is not None:
must_conditions.append(
FieldCondition(
key="chunk_end_offset",
match=MatchValue(value=chunk_end),
)
)
# Fetch this specific chunk vector
points_response = await qdrant_client.scroll(
if point_ids:
# Single batch retrieve call instead of N sequential scroll calls
# This is ~50x faster for 50 results (1 HTTP request vs 50)
points_response = await qdrant_client.retrieve(
collection_name=settings.get_collection_name(),
scroll_filter=Filter(must=must_conditions),
limit=1, # Only need the first match
ids=point_ids,
with_vectors=["dense"],
with_payload=False,
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
)
points = points_response[0]
if points:
# Extract dense vector
point = points[0]
# Build chunk_vectors_map from batch response
for point in points_response:
if point.vector is not None:
# If named vectors (dict), extract "dense"
# Extract dense vector (handle both named and unnamed vectors)
if isinstance(point.vector, dict):
vector = point.vector.get("dense")
else:
vector = point.vector
chunk_key = (result.id, chunk_start, chunk_end)
chunk_vectors_map[chunk_key] = vector
if vector is not None and point.payload:
doc_id = point.payload.get("doc_id")
chunk_start = point.payload.get("chunk_start_offset")
chunk_end = point.payload.get("chunk_end_offset")
chunk_key = (doc_id, chunk_start, chunk_end)
chunk_vectors_map[chunk_key] = vector
vector_fetch_duration = time.perf_counter() - vector_fetch_start
@@ -341,16 +311,23 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
chunk_vectors = np.array(chunk_vectors)
# Generate query embedding for visualization
# Reuse query embedding from search algorithm (avoids redundant embedding call)
query_embed_start = time.perf_counter()
from nextcloud_mcp_server.embedding.service import get_embedding_service
if search_algo.query_embedding is not None:
query_embedding = search_algo.query_embedding
logger.info(
f"Reusing query embedding from search algorithm "
f"(dimension={len(query_embedding)})"
)
else:
# Fallback: generate embedding if not available from search
from nextcloud_mcp_server.embedding.service import get_embedding_service
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
query_embed_duration = time.perf_counter() - query_embed_start
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
# Combine query vector with chunk vectors for PCA
# Query will be the last point in the array
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
@@ -380,9 +357,19 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
)
# Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
# Run in thread pool to avoid blocking the event loop (CPU-bound)
pca_start = time.perf_counter()
pca = PCA(n_components=3)
coords_3d = pca.fit_transform(all_vectors_normalized)
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
pca = PCA(n_components=3)
coords = pca.fit_transform(vectors)
return coords, pca
import anyio
coords_3d, pca = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
lambda: _compute_pca(all_vectors_normalized)
)
pca_duration = time.perf_counter() - pca_start
# After fit, these attributes are guaranteed to be set
@@ -6,15 +6,15 @@ import tempfile
from collections.abc import Awaitable, Callable
from typing import Any, Optional
# NOTE: Do NOT call pymupdf.layout.activate() here!
# It changes the behavior of pymupdf4llm.to_markdown() when page_chunks=True,
# causing it to return a string instead of a list[dict].
# See: https://github.com/pymupdf/pymupdf4llm/issues/323
import pymupdf
import pymupdf.layout
import pymupdf4llm
from .base import DocumentProcessor, ProcessingResult, ProcessorError
# Activate layout analysis for better text extraction
pymupdf.layout.activate()
import pymupdf4llm # noqa
logger = logging.getLogger(__name__)
@@ -37,7 +37,9 @@ class BM25SparseEmbeddingProvider:
def encode(self, text: str) -> dict[str, Any]:
"""
Generate BM25 sparse embedding for a single text.
Generate BM25 sparse embedding for a single text (synchronous).
Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
Args:
text: Input text to encode
@@ -53,6 +55,23 @@ class BM25SparseEmbeddingProvider:
"values": sparse_embedding.values.tolist(),
}
async def encode_async(self, text: str) -> dict[str, Any]:
"""
Generate BM25 sparse embedding for a single text (async).
Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
Args:
text: Input text to encode
Returns:
Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
"""
import anyio
# Run CPU-bound BM25 encoding in thread pool
return await anyio.to_thread.run_sync(lambda: self.encode(text)) # type: ignore[attr-defined]
async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
"""
Generate BM25 sparse embeddings for multiple texts (batched).
@@ -140,6 +140,7 @@ class SearchResult:
page_number: Page number for PDF documents (None for other doc types)
chunk_index: Zero-based index of this chunk in the document
total_chunks: Total number of chunks in the document
point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
"""
id: int
@@ -153,6 +154,7 @@ class SearchResult:
page_number: int | None = None
chunk_index: int = 0
total_chunks: int = 1
point_id: str | None = None
def __post_init__(self):
"""Validate score is non-negative.
@@ -172,8 +174,15 @@ class SearchAlgorithm(ABC):
All search algorithms must implement the search() method with consistent
interface, allowing them to be used interchangeably.
Attributes:
query_embedding: The query embedding generated during the last search.
Available after search() completes for algorithms that use embeddings.
Can be reused by callers to avoid redundant embedding generation.
"""
query_embedding: list[float] | None = None
@abstractmethod
async def search(
self,
+4 -1
View File
@@ -101,11 +101,13 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
# Generate dense embedding for semantic search
embedding_service = get_embedding_service()
dense_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = dense_embedding
logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
# Generate sparse embedding for BM25 keyword search
bm25_service = get_bm25_service()
sparse_embedding = bm25_service.encode(query)
sparse_embedding = await bm25_service.encode_async(query)
logger.debug(
f"Generated sparse embedding "
f"({len(sparse_embedding['indices'])} non-zero terms)"
@@ -218,6 +220,7 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
)
)
+12 -6
View File
@@ -845,9 +845,8 @@ class PDFHighlighter:
logger.warning(f"Chunk {chunk_index}: could not find bbox")
continue
# Copy base image and draw highlight using PIL (fast!)
# Copy base image for this chunk
chunk_image = base_image.copy()
draw = ImageDraw.Draw(chunk_image, "RGBA")
# Scale bbox coordinates to pixmap coordinates
scale_x = base_pix.width / page_rect.width
@@ -859,10 +858,17 @@ class PDFHighlighter:
int(bbox[3] * scale_y),
)
# Draw semi-transparent fill
draw.rectangle(pil_bbox, fill=fill_color)
# Draw dashed border (PIL doesn't support dashes, use solid)
draw.rectangle(pil_bbox, outline=pil_color, width=3)
# Create transparent overlay for fill (proper alpha blending)
overlay = Image.new("RGBA", chunk_image.size, (0, 0, 0, 0))
overlay_draw = ImageDraw.Draw(overlay)
overlay_draw.rectangle(pil_bbox, fill=fill_color)
# Alpha composite the overlay onto the chunk image
chunk_image = Image.alpha_composite(chunk_image, overlay)
# Draw border on top (solid, not transparent)
border_draw = ImageDraw.Draw(chunk_image)
border_draw.rectangle(pil_bbox, outline=pil_color, width=3)
# Convert back to PNG bytes
output = BytesIO()
+3
View File
@@ -78,6 +78,8 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
# Generate embedding for query
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = query_embedding
logger.debug(
f"Generated embedding for query (dimension={len(query_embedding)})"
)
@@ -164,6 +166,7 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
)
)
-21
View File
@@ -335,27 +335,6 @@ def configure_semantic_tools(mcp: FastMCP):
Note: Requires MCP client to support sampling. If sampling is unavailable,
the tool gracefully degrades to returning documents with an explanation.
The client may prompt the user to approve the sampling request.
Examples:
>>> # Query about objectives across multiple apps
>>> result = await nc_semantic_search_answer(
... query="What are my Q1 2025 project goals?",
... ctx=ctx
... )
>>> print(result.generated_answer)
"Based on Document 1 (note: Project Kickoff), Document 2 (calendar event:
Q1 Planning Meeting), and Document 3 (deck card: Implement semantic search),
your main goals are: 1) Improve semantic search accuracy by 20%,
2) Deploy new embedding model, 3) Reduce indexing latency..."
>>> # Query about appointments
>>> result = await nc_semantic_search_answer(
... query="When is my next dentist appointment?",
... ctx=ctx,
... limit=10
... )
>>> len(result.sources) # Calendar events and related notes
3
"""
# 1. Retrieve relevant documents via existing semantic search
search_response = await nc_semantic_search(
-14
View File
@@ -64,20 +64,6 @@ def configure_webdav_tools(mcp: FastMCP):
- Text files are decoded to UTF-8
- Documents (PDF, DOCX, etc.) are parsed and text is extracted
- Other binary files are base64 encoded
Examples:
# Read a text file
result = await nc_webdav_read_file("Documents/readme.txt")
logger.info(result['content']) # Decoded text content
# Read a PDF document (automatically parsed)
result = await nc_webdav_read_file("Documents/report.pdf")
logger.info(result['content']) # Extracted text from PDF
logger.info(result['parsing_metadata']) # Document parsing info
# Read a binary file
result = await nc_webdav_read_file("Images/photo.jpg")
logger.info(result['encoding']) # 'base64'
"""
client = await get_client(ctx)
content, content_type = await client.webdav.read_file(path)
+2 -2
View File
@@ -1,6 +1,6 @@
[project]
name = "nextcloud-mcp-server"
version = "0.44.0"
version = "0.46.2"
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
authors = [
{name = "Chris Coutinho", email = "chris@coutinho.io"}
@@ -10,7 +10,7 @@ license = {text = "AGPL-3.0-only"}
requires-python = ">=3.11"
keywords = ["nextcloud", "mcp", "model-context-protocol", "llm", "ai", "claude", "webdav", "caldav", "carddav"]
dependencies = [
"mcp[cli] (>=1.21,<1.22)",
"mcp[cli] (>=1.22,<1.23)",
"httpx (>=0.28.1,<0.29.0)",
"pillow (>=10.3.0,<12.0.0)", # Compatible with fastembed
"icalendar (>=6.0.0,<7.0.0)",
Generated
+5 -5
View File
@@ -1645,7 +1645,7 @@ wheels = [
[[package]]
name = "mcp"
version = "1.21.1"
version = "1.22.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -1663,9 +1663,9 @@ dependencies = [
{ name = "typing-inspection" },
{ name = "uvicorn", marker = "sys_platform != 'emscripten'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f7/25/4df633e7574254ada574822db2245bbee424725d1b01bccae10bf128794e/mcp-1.21.1.tar.gz", hash = "sha256:540e6ac4b12b085c43f14879fde04cbdb10148a09ea9492ff82d8c7ba651a302", size = 469071, upload-time = "2025-11-13T20:33:46.139Z" }
sdist = { url = "https://files.pythonhosted.org/packages/a3/a2/c5ec0ab38b35ade2ae49a90fada718fbc76811dc5aa1760414c6aaa6b08a/mcp-1.22.0.tar.gz", hash = "sha256:769b9ac90ed42134375b19e777a2858ca300f95f2e800982b3e2be62dfc0ba01", size = 471788, upload-time = "2025-11-20T20:11:28.095Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/49/af/01fb42df59ad15925ffc1e2e609adafddd3ac4572f606faae0dc8b55ba0c/mcp-1.21.1-py3-none-any.whl", hash = "sha256:dd35abe36d68530a8a1291daa25d50276d8731e545c0434d6e250a3700dd2a6d", size = 174852, upload-time = "2025-11-13T20:33:44.502Z" },
{ url = "https://files.pythonhosted.org/packages/a9/bb/711099f9c6bb52770f56e56401cdfb10da5b67029f701e0df29362df4c8e/mcp-1.22.0-py3-none-any.whl", hash = "sha256:bed758e24df1ed6846989c909ba4e3df339a27b4f30f1b8b627862a4bade4e98", size = 175489, upload-time = "2025-11-20T20:11:26.542Z" },
]
[package.optional-dependencies]
@@ -1936,7 +1936,7 @@ wheels = [
[[package]]
name = "nextcloud-mcp-server"
version = "0.44.0"
version = "0.46.2"
source = { editable = "." }
dependencies = [
{ name = "aiosqlite" },
@@ -1998,7 +1998,7 @@ requires-dist = [
{ name = "icalendar", specifier = ">=6.0.0,<7.0.0" },
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "langchain-text-splitters", specifier = ">=1.0.0" },
{ name = "mcp", extras = ["cli"], specifier = ">=1.21,<1.22" },
{ name = "mcp", extras = ["cli"], specifier = ">=1.22,<1.23" },
{ name = "opentelemetry-api", specifier = ">=1.28.2" },
{ name = "opentelemetry-exporter-otlp-proto-grpc", specifier = ">=1.28.2" },
{ name = "opentelemetry-instrumentation-asgi", specifier = ">=0.49b2" },