Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 959cb8b21a | |||
| f8a2410a0a | |||
| 03b984d5a7 | |||
| 57db18c6a3 | |||
| ea79e94842 | |||
| b0612cfa0f | |||
| 4e61d73da5 | |||
| 3b41776110 | |||
| f9da19d1a1 | |||
| d2b6a26fe4 | |||
| 34fd17ba55 | |||
| 8baa07db84 | |||
| 8c79993280 | |||
| 8a0672a6be | |||
| 395f798ee2 | |||
| debff75221 | |||
| 4bf0a6c22e | |||
| fb025821cb | |||
| ff880fd4c9 | |||
| 03495d901d | |||
| 798958f20a | |||
| 699295c5be | |||
| d4fc1de80d | |||
| 0902b5653f | |||
| 0b6a02075c | |||
| 7880a8de30 | |||
| 2abedd6b4b |
@@ -15,7 +15,7 @@ jobs:
|
||||
packages: write
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
|
||||
|
||||
@@ -12,7 +12,7 @@ jobs:
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
|
||||
@@ -14,7 +14,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
||||
@@ -18,9 +18,9 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
|
||||
- name: Install Python 3.11
|
||||
run: uv python install 3.11
|
||||
- name: Build
|
||||
|
||||
@@ -9,9 +9,9 @@ jobs:
|
||||
linting:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
|
||||
- name: Check format
|
||||
run: |
|
||||
uv run --frozen ruff format --diff
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
up-flags: "--build"
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
|
||||
|
||||
- name: Install Playwright dependencies
|
||||
run: |
|
||||
|
||||
@@ -1,3 +1,67 @@
|
||||
## v0.46.2 (2025-11-22)
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Enable JSON response format for scanner compatibility
|
||||
|
||||
## v0.46.1 (2025-11-22)
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize vector viz search performance
|
||||
|
||||
## v0.46.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add Smithery CLI deployment support
|
||||
- Implement ADR-016 Smithery stateless deployment mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
|
||||
- **smithery**: Use container runtime pattern for config discovery
|
||||
- Add Smithery lifespan and auth mode detection
|
||||
|
||||
## v0.45.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add context expansion to semantic search with chunk overlap removal
|
||||
- Use Ollama native batch API in embed_batch()
|
||||
- Implement Qdrant placeholder state management
|
||||
- Switch files to use numeric IDs with file_path resolution
|
||||
- Implement per-chunk vector visualization with context expansion
|
||||
|
||||
### Fix
|
||||
|
||||
- Use alpha_composite for proper RGBA highlight blending
|
||||
- Remove pymupdf.layout.activate() to fix page_chunks behavior
|
||||
- Centralize PDF processing and generate separate images per chunk
|
||||
- Set is_placeholder=False in processor to fix search filtering
|
||||
- Increase placeholder staleness threshold to 5x scan interval
|
||||
- Add placeholder staleness check to prevent duplicate processing
|
||||
- Use empty SparseVector instead of None for placeholders
|
||||
- Return empty array instead of null for query_coords when no results
|
||||
- Align PDF text extraction between indexing and context expansion
|
||||
- Update models and viz to use int-only doc_id
|
||||
- Reconstruct full content for notes to match indexed offsets
|
||||
- Add async/await, PDF metadata, and type safety fixes
|
||||
|
||||
### Refactor
|
||||
|
||||
- Simplify PDF text extraction with single to_markdown call
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize PDF processing with parallel extraction and single-render highlights
|
||||
|
||||
## v0.44.1 (2025-11-21)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.22,<1.23
|
||||
|
||||
## v0.44.0 (2025-11-19)
|
||||
|
||||
### Feat
|
||||
|
||||
+2
-1
@@ -1,6 +1,6 @@
|
||||
FROM docker.io/library/python:3.12-slim-trixie@sha256:2e683fc3e18a248aa23b8022f2a3474b072b04fb851efe9b49f6b516a8944939
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.10@sha256:29bd45092ea8902c0bbb7f0a338f0494a382b1f4b18355df5be270ade679ff1d /uv /uvx /bin/
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.11@sha256:5aa820129de0a600924f166aec9cb51613b15b68f1dcd2a02f31a500d2ede568 /uv /uvx /bin/
|
||||
|
||||
# Install dependencies
|
||||
# 1. git (required for caldav dependency from git)
|
||||
@@ -18,6 +18,7 @@ RUN uv sync --locked --no-dev --no-editable --no-cache
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV VIRTUAL_ENV=/app/.venv
|
||||
ENV PATH=/app/.vnev/bin:$PATH
|
||||
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
|
||||
|
||||
ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"]
|
||||
|
||||
@@ -4,6 +4,6 @@ dependencies:
|
||||
version: 1.16.0
|
||||
- name: ollama
|
||||
repository: https://otwld.github.io/ollama-helm
|
||||
version: 1.34.0
|
||||
digest: sha256:9dfb8d6e3d5488f669d4c37f3a766213b598ff3de2aead2c734789736c7835b4
|
||||
generated: "2025-11-17T17:08:48.055530019Z"
|
||||
version: 1.35.0
|
||||
digest: sha256:da8db198b12ce0252df220fabb297cfe69186edb8e67952c52e05de778189b92
|
||||
generated: "2025-11-21T11:09:07.997781541Z"
|
||||
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: nextcloud-mcp-server
|
||||
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
|
||||
type: application
|
||||
version: 0.44.0
|
||||
appVersion: "0.44.0"
|
||||
version: 0.46.2
|
||||
appVersion: "0.46.2"
|
||||
keywords:
|
||||
- nextcloud
|
||||
- mcp
|
||||
@@ -31,6 +31,6 @@ dependencies:
|
||||
repository: https://qdrant.github.io/qdrant-helm
|
||||
condition: qdrant.networkMode.deploySubchart
|
||||
- name: ollama
|
||||
version: "1.34.0"
|
||||
version: "1.35.0"
|
||||
repository: https://otwld.github.io/ollama-helm
|
||||
condition: ollama.enabled
|
||||
|
||||
+2
-2
@@ -17,11 +17,11 @@ services:
|
||||
# Note: Redis is an external service. You can find more information about the configuration here:
|
||||
# https://hub.docker.com/_/redis
|
||||
redis:
|
||||
image: docker.io/library/redis:alpine@sha256:5013e94192ef18a5d8368179c7522e5300f9265cc339cadac76c7b93303a2752
|
||||
image: docker.io/library/redis:alpine@sha256:6cbef353e480a8a6e7f10ec545f13d7d3fa85a212cdcc5ffaf5a1c818b9d3798
|
||||
restart: always
|
||||
|
||||
app:
|
||||
image: docker.io/library/nextcloud:32.0.1@sha256:d572839eeb693026d72a0c6aa48076df0bb8930797ea321e604936ef7189d06e
|
||||
image: docker.io/library/nextcloud:32.0.2@sha256:ac08482d73ffd85d94069ba291bbd5fb39a70ff21502030a2e3e2d89a7246a48
|
||||
restart: always
|
||||
ports:
|
||||
- 0.0.0.0:8080:80
|
||||
|
||||
@@ -1072,7 +1072,11 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
# ADR-016: Use Smithery lifespan for stateless mode, BasicAuth otherwise
|
||||
if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
|
||||
logger.info("Configuring MCP server for Smithery stateless mode")
|
||||
mcp = FastMCP("Nextcloud MCP", lifespan=app_lifespan_smithery)
|
||||
# json_response=True returns plain JSON-RPC instead of SSE format,
|
||||
# required for Smithery scanner compatibility
|
||||
mcp = FastMCP(
|
||||
"Nextcloud MCP", lifespan=app_lifespan_smithery, json_response=True
|
||||
)
|
||||
else:
|
||||
logger.info("Configuring MCP server for BasicAuth mode")
|
||||
mcp = FastMCP("Nextcloud MCP", lifespan=app_lifespan_basic)
|
||||
|
||||
@@ -218,71 +218,41 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
}
|
||||
)
|
||||
|
||||
# Fetch vectors for specific matching chunks from Qdrant
|
||||
# Fetch vectors for specific matching chunks from Qdrant using batch retrieve
|
||||
vector_fetch_start = time.perf_counter()
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
# Build filters for each specific chunk
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector
|
||||
|
||||
# Fetch vectors in batches by filtering on chunk-specific fields
|
||||
for result in search_results:
|
||||
chunk_start = result.chunk_start_offset
|
||||
chunk_end = result.chunk_end_offset
|
||||
# Collect point IDs from search results for batch retrieval
|
||||
# point_id is the Qdrant internal ID returned by search algorithms
|
||||
point_ids = [r.point_id for r in search_results if r.point_id]
|
||||
|
||||
# Build filter for this specific chunk
|
||||
must_conditions = [
|
||||
get_placeholder_filter(), # Always exclude placeholders from user-facing queries
|
||||
FieldCondition(
|
||||
key="doc_id",
|
||||
match=MatchValue(value=result.id),
|
||||
),
|
||||
FieldCondition(
|
||||
key="user_id",
|
||||
match=MatchValue(value=username),
|
||||
),
|
||||
]
|
||||
|
||||
# Add chunk position filters if available
|
||||
if chunk_start is not None:
|
||||
must_conditions.append(
|
||||
FieldCondition(
|
||||
key="chunk_start_offset",
|
||||
match=MatchValue(value=chunk_start),
|
||||
)
|
||||
)
|
||||
if chunk_end is not None:
|
||||
must_conditions.append(
|
||||
FieldCondition(
|
||||
key="chunk_end_offset",
|
||||
match=MatchValue(value=chunk_end),
|
||||
)
|
||||
)
|
||||
|
||||
# Fetch this specific chunk vector
|
||||
points_response = await qdrant_client.scroll(
|
||||
if point_ids:
|
||||
# Single batch retrieve call instead of N sequential scroll calls
|
||||
# This is ~50x faster for 50 results (1 HTTP request vs 50)
|
||||
points_response = await qdrant_client.retrieve(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(must=must_conditions),
|
||||
limit=1, # Only need the first match
|
||||
ids=point_ids,
|
||||
with_vectors=["dense"],
|
||||
with_payload=False,
|
||||
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
|
||||
)
|
||||
|
||||
points = points_response[0]
|
||||
if points:
|
||||
# Extract dense vector
|
||||
point = points[0]
|
||||
# Build chunk_vectors_map from batch response
|
||||
for point in points_response:
|
||||
if point.vector is not None:
|
||||
# If named vectors (dict), extract "dense"
|
||||
# Extract dense vector (handle both named and unnamed vectors)
|
||||
if isinstance(point.vector, dict):
|
||||
vector = point.vector.get("dense")
|
||||
else:
|
||||
vector = point.vector
|
||||
|
||||
chunk_key = (result.id, chunk_start, chunk_end)
|
||||
chunk_vectors_map[chunk_key] = vector
|
||||
if vector is not None and point.payload:
|
||||
doc_id = point.payload.get("doc_id")
|
||||
chunk_start = point.payload.get("chunk_start_offset")
|
||||
chunk_end = point.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, chunk_start, chunk_end)
|
||||
chunk_vectors_map[chunk_key] = vector
|
||||
|
||||
vector_fetch_duration = time.perf_counter() - vector_fetch_start
|
||||
|
||||
@@ -341,16 +311,23 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
|
||||
chunk_vectors = np.array(chunk_vectors)
|
||||
|
||||
# Generate query embedding for visualization
|
||||
# Reuse query embedding from search algorithm (avoids redundant embedding call)
|
||||
query_embed_start = time.perf_counter()
|
||||
from nextcloud_mcp_server.embedding.service import get_embedding_service
|
||||
if search_algo.query_embedding is not None:
|
||||
query_embedding = search_algo.query_embedding
|
||||
logger.info(
|
||||
f"Reusing query embedding from search algorithm "
|
||||
f"(dimension={len(query_embedding)})"
|
||||
)
|
||||
else:
|
||||
# Fallback: generate embedding if not available from search
|
||||
from nextcloud_mcp_server.embedding.service import get_embedding_service
|
||||
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
|
||||
query_embed_duration = time.perf_counter() - query_embed_start
|
||||
|
||||
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
|
||||
|
||||
# Combine query vector with chunk vectors for PCA
|
||||
# Query will be the last point in the array
|
||||
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
|
||||
@@ -380,9 +357,19 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
)
|
||||
|
||||
# Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
|
||||
# Run in thread pool to avoid blocking the event loop (CPU-bound)
|
||||
pca_start = time.perf_counter()
|
||||
pca = PCA(n_components=3)
|
||||
coords_3d = pca.fit_transform(all_vectors_normalized)
|
||||
|
||||
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
|
||||
pca = PCA(n_components=3)
|
||||
coords = pca.fit_transform(vectors)
|
||||
return coords, pca
|
||||
|
||||
import anyio
|
||||
|
||||
coords_3d, pca = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
lambda: _compute_pca(all_vectors_normalized)
|
||||
)
|
||||
pca_duration = time.perf_counter() - pca_start
|
||||
|
||||
# After fit, these attributes are guaranteed to be set
|
||||
|
||||
@@ -6,15 +6,15 @@ import tempfile
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, Optional
|
||||
|
||||
# NOTE: Do NOT call pymupdf.layout.activate() here!
|
||||
# It changes the behavior of pymupdf4llm.to_markdown() when page_chunks=True,
|
||||
# causing it to return a string instead of a list[dict].
|
||||
# See: https://github.com/pymupdf/pymupdf4llm/issues/323
|
||||
import pymupdf
|
||||
import pymupdf.layout
|
||||
import pymupdf4llm
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
|
||||
# Activate layout analysis for better text extraction
|
||||
pymupdf.layout.activate()
|
||||
import pymupdf4llm # noqa
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
||||
@@ -37,7 +37,9 @@ class BM25SparseEmbeddingProvider:
|
||||
|
||||
def encode(self, text: str) -> dict[str, Any]:
|
||||
"""
|
||||
Generate BM25 sparse embedding for a single text.
|
||||
Generate BM25 sparse embedding for a single text (synchronous).
|
||||
|
||||
Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
|
||||
|
||||
Args:
|
||||
text: Input text to encode
|
||||
@@ -53,6 +55,23 @@ class BM25SparseEmbeddingProvider:
|
||||
"values": sparse_embedding.values.tolist(),
|
||||
}
|
||||
|
||||
async def encode_async(self, text: str) -> dict[str, Any]:
|
||||
"""
|
||||
Generate BM25 sparse embedding for a single text (async).
|
||||
|
||||
Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
|
||||
|
||||
Args:
|
||||
text: Input text to encode
|
||||
|
||||
Returns:
|
||||
Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
|
||||
"""
|
||||
import anyio
|
||||
|
||||
# Run CPU-bound BM25 encoding in thread pool
|
||||
return await anyio.to_thread.run_sync(lambda: self.encode(text)) # type: ignore[attr-defined]
|
||||
|
||||
async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Generate BM25 sparse embeddings for multiple texts (batched).
|
||||
|
||||
@@ -140,6 +140,7 @@ class SearchResult:
|
||||
page_number: Page number for PDF documents (None for other doc types)
|
||||
chunk_index: Zero-based index of this chunk in the document
|
||||
total_chunks: Total number of chunks in the document
|
||||
point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
|
||||
"""
|
||||
|
||||
id: int
|
||||
@@ -153,6 +154,7 @@ class SearchResult:
|
||||
page_number: int | None = None
|
||||
chunk_index: int = 0
|
||||
total_chunks: int = 1
|
||||
point_id: str | None = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate score is non-negative.
|
||||
@@ -172,8 +174,15 @@ class SearchAlgorithm(ABC):
|
||||
|
||||
All search algorithms must implement the search() method with consistent
|
||||
interface, allowing them to be used interchangeably.
|
||||
|
||||
Attributes:
|
||||
query_embedding: The query embedding generated during the last search.
|
||||
Available after search() completes for algorithms that use embeddings.
|
||||
Can be reused by callers to avoid redundant embedding generation.
|
||||
"""
|
||||
|
||||
query_embedding: list[float] | None = None
|
||||
|
||||
@abstractmethod
|
||||
async def search(
|
||||
self,
|
||||
|
||||
@@ -101,11 +101,13 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
# Generate dense embedding for semantic search
|
||||
embedding_service = get_embedding_service()
|
||||
dense_embedding = await embedding_service.embed(query)
|
||||
# Store for reuse by callers (e.g., viz_routes PCA visualization)
|
||||
self.query_embedding = dense_embedding
|
||||
logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
|
||||
|
||||
# Generate sparse embedding for BM25 keyword search
|
||||
bm25_service = get_bm25_service()
|
||||
sparse_embedding = bm25_service.encode(query)
|
||||
sparse_embedding = await bm25_service.encode_async(query)
|
||||
logger.debug(
|
||||
f"Generated sparse embedding "
|
||||
f"({len(sparse_embedding['indices'])} non-zero terms)"
|
||||
@@ -218,6 +220,7 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
page_number=result.payload.get("page_number"),
|
||||
chunk_index=result.payload.get("chunk_index", 0),
|
||||
total_chunks=result.payload.get("total_chunks", 1),
|
||||
point_id=str(result.id), # Qdrant point ID for batch retrieval
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -845,9 +845,8 @@ class PDFHighlighter:
|
||||
logger.warning(f"Chunk {chunk_index}: could not find bbox")
|
||||
continue
|
||||
|
||||
# Copy base image and draw highlight using PIL (fast!)
|
||||
# Copy base image for this chunk
|
||||
chunk_image = base_image.copy()
|
||||
draw = ImageDraw.Draw(chunk_image, "RGBA")
|
||||
|
||||
# Scale bbox coordinates to pixmap coordinates
|
||||
scale_x = base_pix.width / page_rect.width
|
||||
@@ -859,10 +858,17 @@ class PDFHighlighter:
|
||||
int(bbox[3] * scale_y),
|
||||
)
|
||||
|
||||
# Draw semi-transparent fill
|
||||
draw.rectangle(pil_bbox, fill=fill_color)
|
||||
# Draw dashed border (PIL doesn't support dashes, use solid)
|
||||
draw.rectangle(pil_bbox, outline=pil_color, width=3)
|
||||
# Create transparent overlay for fill (proper alpha blending)
|
||||
overlay = Image.new("RGBA", chunk_image.size, (0, 0, 0, 0))
|
||||
overlay_draw = ImageDraw.Draw(overlay)
|
||||
overlay_draw.rectangle(pil_bbox, fill=fill_color)
|
||||
|
||||
# Alpha composite the overlay onto the chunk image
|
||||
chunk_image = Image.alpha_composite(chunk_image, overlay)
|
||||
|
||||
# Draw border on top (solid, not transparent)
|
||||
border_draw = ImageDraw.Draw(chunk_image)
|
||||
border_draw.rectangle(pil_bbox, outline=pil_color, width=3)
|
||||
|
||||
# Convert back to PNG bytes
|
||||
output = BytesIO()
|
||||
|
||||
@@ -78,6 +78,8 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
# Generate embedding for query
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
# Store for reuse by callers (e.g., viz_routes PCA visualization)
|
||||
self.query_embedding = query_embedding
|
||||
logger.debug(
|
||||
f"Generated embedding for query (dimension={len(query_embedding)})"
|
||||
)
|
||||
@@ -164,6 +166,7 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
page_number=result.payload.get("page_number"),
|
||||
chunk_index=result.payload.get("chunk_index", 0),
|
||||
total_chunks=result.payload.get("total_chunks", 1),
|
||||
point_id=str(result.id), # Qdrant point ID for batch retrieval
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -335,27 +335,6 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
Note: Requires MCP client to support sampling. If sampling is unavailable,
|
||||
the tool gracefully degrades to returning documents with an explanation.
|
||||
The client may prompt the user to approve the sampling request.
|
||||
|
||||
Examples:
|
||||
>>> # Query about objectives across multiple apps
|
||||
>>> result = await nc_semantic_search_answer(
|
||||
... query="What are my Q1 2025 project goals?",
|
||||
... ctx=ctx
|
||||
... )
|
||||
>>> print(result.generated_answer)
|
||||
"Based on Document 1 (note: Project Kickoff), Document 2 (calendar event:
|
||||
Q1 Planning Meeting), and Document 3 (deck card: Implement semantic search),
|
||||
your main goals are: 1) Improve semantic search accuracy by 20%,
|
||||
2) Deploy new embedding model, 3) Reduce indexing latency..."
|
||||
|
||||
>>> # Query about appointments
|
||||
>>> result = await nc_semantic_search_answer(
|
||||
... query="When is my next dentist appointment?",
|
||||
... ctx=ctx,
|
||||
... limit=10
|
||||
... )
|
||||
>>> len(result.sources) # Calendar events and related notes
|
||||
3
|
||||
"""
|
||||
# 1. Retrieve relevant documents via existing semantic search
|
||||
search_response = await nc_semantic_search(
|
||||
|
||||
@@ -64,20 +64,6 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
- Text files are decoded to UTF-8
|
||||
- Documents (PDF, DOCX, etc.) are parsed and text is extracted
|
||||
- Other binary files are base64 encoded
|
||||
|
||||
Examples:
|
||||
# Read a text file
|
||||
result = await nc_webdav_read_file("Documents/readme.txt")
|
||||
logger.info(result['content']) # Decoded text content
|
||||
|
||||
# Read a PDF document (automatically parsed)
|
||||
result = await nc_webdav_read_file("Documents/report.pdf")
|
||||
logger.info(result['content']) # Extracted text from PDF
|
||||
logger.info(result['parsing_metadata']) # Document parsing info
|
||||
|
||||
# Read a binary file
|
||||
result = await nc_webdav_read_file("Images/photo.jpg")
|
||||
logger.info(result['encoding']) # 'base64'
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
content, content_type = await client.webdav.read_file(path)
|
||||
|
||||
+2
-2
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "nextcloud-mcp-server"
|
||||
version = "0.44.0"
|
||||
version = "0.46.2"
|
||||
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
|
||||
authors = [
|
||||
{name = "Chris Coutinho", email = "chris@coutinho.io"}
|
||||
@@ -10,7 +10,7 @@ license = {text = "AGPL-3.0-only"}
|
||||
requires-python = ">=3.11"
|
||||
keywords = ["nextcloud", "mcp", "model-context-protocol", "llm", "ai", "claude", "webdav", "caldav", "carddav"]
|
||||
dependencies = [
|
||||
"mcp[cli] (>=1.21,<1.22)",
|
||||
"mcp[cli] (>=1.22,<1.23)",
|
||||
"httpx (>=0.28.1,<0.29.0)",
|
||||
"pillow (>=10.3.0,<12.0.0)", # Compatible with fastembed
|
||||
"icalendar (>=6.0.0,<7.0.0)",
|
||||
|
||||
@@ -1645,7 +1645,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "mcp"
|
||||
version = "1.21.1"
|
||||
version = "1.22.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
@@ -1663,9 +1663,9 @@ dependencies = [
|
||||
{ name = "typing-inspection" },
|
||||
{ name = "uvicorn", marker = "sys_platform != 'emscripten'" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f7/25/4df633e7574254ada574822db2245bbee424725d1b01bccae10bf128794e/mcp-1.21.1.tar.gz", hash = "sha256:540e6ac4b12b085c43f14879fde04cbdb10148a09ea9492ff82d8c7ba651a302", size = 469071, upload-time = "2025-11-13T20:33:46.139Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/a2/c5ec0ab38b35ade2ae49a90fada718fbc76811dc5aa1760414c6aaa6b08a/mcp-1.22.0.tar.gz", hash = "sha256:769b9ac90ed42134375b19e777a2858ca300f95f2e800982b3e2be62dfc0ba01", size = 471788, upload-time = "2025-11-20T20:11:28.095Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/49/af/01fb42df59ad15925ffc1e2e609adafddd3ac4572f606faae0dc8b55ba0c/mcp-1.21.1-py3-none-any.whl", hash = "sha256:dd35abe36d68530a8a1291daa25d50276d8731e545c0434d6e250a3700dd2a6d", size = 174852, upload-time = "2025-11-13T20:33:44.502Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/bb/711099f9c6bb52770f56e56401cdfb10da5b67029f701e0df29362df4c8e/mcp-1.22.0-py3-none-any.whl", hash = "sha256:bed758e24df1ed6846989c909ba4e3df339a27b4f30f1b8b627862a4bade4e98", size = 175489, upload-time = "2025-11-20T20:11:26.542Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
@@ -1936,7 +1936,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "nextcloud-mcp-server"
|
||||
version = "0.44.0"
|
||||
version = "0.46.2"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "aiosqlite" },
|
||||
@@ -1998,7 +1998,7 @@ requires-dist = [
|
||||
{ name = "icalendar", specifier = ">=6.0.0,<7.0.0" },
|
||||
{ name = "jinja2", specifier = ">=3.1.6" },
|
||||
{ name = "langchain-text-splitters", specifier = ">=1.0.0" },
|
||||
{ name = "mcp", extras = ["cli"], specifier = ">=1.21,<1.22" },
|
||||
{ name = "mcp", extras = ["cli"], specifier = ">=1.22,<1.23" },
|
||||
{ name = "opentelemetry-api", specifier = ">=1.28.2" },
|
||||
{ name = "opentelemetry-exporter-otlp-proto-grpc", specifier = ">=1.28.2" },
|
||||
{ name = "opentelemetry-instrumentation-asgi", specifier = ">=0.49b2" },
|
||||
|
||||
Reference in New Issue
Block a user