diff --git a/Dockerfile b/Dockerfile index d268fe6..91bc485 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,7 @@ COPY --from=ghcr.io/astral-sh/uv:0.9.11@sha256:5aa820129de0a600924f166aec9cb5161 # 2. sqlite for development with token db RUN apt update && apt install --no-install-recommends --no-install-suggests -y \ git \ + tesseract-ocr \ sqlite3 && apt clean WORKDIR /app @@ -17,5 +18,7 @@ RUN uv sync --locked --no-dev --no-editable --no-cache ENV PYTHONUNBUFFERED=1 ENV VIRTUAL_ENV=/app/.venv +ENV PATH=/app/.vnev/bin:$PATH +ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"] diff --git a/nextcloud_mcp_server/app.py b/nextcloud_mcp_server/app.py index 56fd4f5..f532bd4 100644 --- a/nextcloud_mcp_server/app.py +++ b/nextcloud_mcp_server/app.py @@ -122,6 +122,26 @@ def initialize_document_processors(): except Exception as e: logger.warning(f"Failed to register Tesseract processor: {e}") + # Register PyMuPDF processor (high priority, local, no API required) + if "pymupdf" in config["processors"]: + pymupdf_config = config["processors"]["pymupdf"] + try: + from nextcloud_mcp_server.document_processors.pymupdf import ( + PyMuPDFProcessor, + ) + + processor = PyMuPDFProcessor( + extract_images=pymupdf_config.get("extract_images", True), + image_dir=pymupdf_config.get("image_dir"), + ) + registry.register(processor, priority=15) # Higher than unstructured + logger.info( + f"Registered PyMuPDF processor: extract_images={pymupdf_config.get('extract_images', True)}" + ) + registered_count += 1 + except Exception as e: + logger.warning(f"Failed to register PyMuPDF processor: {e}") + # Register custom processor if "custom" in config["processors"]: custom_config = config["processors"]["custom"] diff --git a/nextcloud_mcp_server/auth/static/vector-viz.css b/nextcloud_mcp_server/auth/static/vector-viz.css index 3e9398b..9d94d8c 100644 --- a/nextcloud_mcp_server/auth/static/vector-viz.css +++ b/nextcloud_mcp_server/auth/static/vector-viz.css @@ -190,3 +190,30 @@ color: var(--color-text-maxcontrast); font-style: italic; } + +/* PDF highlighted image styles */ +.chunk-image-container { + margin-bottom: 16px; + border: 1px solid var(--color-border); + border-radius: var(--border-radius); + overflow: hidden; + background: #fff; +} +.chunk-image-header { + background: var(--color-background-dark); + padding: 8px 12px; + font-size: 12px; + font-weight: 500; + color: var(--color-text-maxcontrast); + border-bottom: 1px solid var(--color-border); + font-family: var(--font-face); +} +.chunk-highlighted-image { + display: block; + max-width: 100%; + height: auto; + cursor: zoom-in; +} +.chunk-highlighted-image:hover { + opacity: 0.95; +} diff --git a/nextcloud_mcp_server/auth/static/vector-viz.js b/nextcloud_mcp_server/auth/static/vector-viz.js index c01c1e7..f59f46c 100644 --- a/nextcloud_mcp_server/auth/static/vector-viz.js +++ b/nextcloud_mcp_server/auth/static/vector-viz.js @@ -217,7 +217,7 @@ function vizApp() { }, async toggleChunk(result) { - const resultKey = `${result.doc_type}_${result.id}`; + const resultKey = `${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`; if (this.isChunkExpanded(resultKey)) { delete this.expandedChunks[resultKey]; diff --git a/nextcloud_mcp_server/auth/templates/vector_viz.html b/nextcloud_mcp_server/auth/templates/vector_viz.html index a052d9e..c36a0eb 100644 --- a/nextcloud_mcp_server/auth/templates/vector_viz.html +++ b/nextcloud_mcp_server/auth/templates/vector_viz.html @@ -117,12 +117,13 @@