From d5544a773132d646c2edb901a86a24467a1b99f3 Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Mon, 26 Jan 2026 20:04:57 +0100 Subject: [PATCH] refactor(astrolabe): replace client-side PDF.js with server-side PyMuPDF rendering Replace the client-side PDF.js viewer with server-side rendering using PyMuPDF. This avoids CSP worker restrictions and ES private field access issues that affected Chromium browsers. Changes: - Add /api/v1/pdf-preview endpoint to MCP server (management.py) - Add pdf-preview route and controller action in Astrolabe PHP backend - Refactor PDFViewer.vue to display server-rendered PNG images - Remove pdfjs-dist dependency and client-side PDF loading code - Use @nextcloud/axios for CSRF token handling in PDFViewer The server downloads the PDF via WebDAV, renders the requested page with PyMuPDF at the specified scale, and returns a base64-encoded PNG image. Co-Authored-By: Claude Opus 4.5 --- nextcloud_mcp_server/api/management.py | 136 +++++++++++++ nextcloud_mcp_server/app.py | 5 +- third_party/astrolabe/appinfo/routes.php | 5 + .../lib/Controller/ApiController.php | 57 ++++++ .../astrolabe/lib/Service/McpServerClient.php | 57 ++++++ third_party/astrolabe/package.json | 1 - .../astrolabe/src/components/PDFViewer.vue | 185 ++++++------------ third_party/astrolabe/templates/index.php | 4 - third_party/astrolabe/vite.config.js | 37 +--- 9 files changed, 325 insertions(+), 162 deletions(-) diff --git a/nextcloud_mcp_server/api/management.py b/nextcloud_mcp_server/api/management.py index bfded9e..d54ceda 100644 --- a/nextcloud_mcp_server/api/management.py +++ b/nextcloud_mcp_server/api/management.py @@ -1825,3 +1825,139 @@ async def get_chunk_context(request: Request) -> JSONResponse: {"error": error_msg}, status_code=500, ) + + +async def get_pdf_preview(request: Request) -> JSONResponse: + """GET /api/v1/pdf-preview - Render PDF page to PNG image. + + Server-side PDF rendering using PyMuPDF. This endpoint allows Astrolabe + to display PDF pages without requiring client-side PDF.js, avoiding CSP + worker restrictions and ES private field issues in Chromium. + + Query parameters: + file_path: WebDAV path to PDF file (e.g., "/Documents/report.pdf") + page: Page number (1-indexed, default: 1) + scale: Zoom factor for rendering (default: 2.0 = 144 DPI) + + Returns: + { + "success": true, + "image": "", + "page_number": 1, + "total_pages": 10 + } + + Requires OAuth bearer token for authentication. + """ + # Log incoming request + file_path_param = request.query_params.get("file_path", "") + page_param = request.query_params.get("page", "1") + logger.info(f"PDF preview request: file_path={file_path_param}, page={page_param}") + + try: + # Validate OAuth token and extract user + user_id, validated = await validate_token_and_get_user(request) + logger.info(f"PDF preview authenticated for user: {user_id}") + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/pdf-preview: {e}") + return JSONResponse( + { + "success": False, + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "get_pdf_preview"), + }, + status_code=401, + ) + + try: + # Parse and validate parameters + file_path = request.query_params.get("file_path") + if not file_path: + return JSONResponse( + {"success": False, "error": "Missing required parameter: file_path"}, + status_code=400, + ) + + try: + page_num = _parse_int_param( + request.query_params.get("page"), 1, 1, 10000, "page" + ) + scale = _parse_float_param( + request.query_params.get("scale"), 2.0, 0.5, 5.0, "scale" + ) + except ValueError as e: + return JSONResponse({"success": False, "error": str(e)}, status_code=400) + + # Get bearer token for WebDAV authentication + token = extract_bearer_token(request) + if not token: + raise ValueError("Missing token") + + # Get Nextcloud host from OAuth context + oauth_ctx = request.app.state.oauth_context + nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") + + if not nextcloud_host: + raise ValueError("Nextcloud host not configured") + + # Download PDF via WebDAV using user's token + from nextcloud_mcp_server.client import NextcloudClient + + async with NextcloudClient.from_token( + base_url=nextcloud_host, token=token, username=user_id + ) as nc_client: + pdf_bytes, _ = await nc_client.webdav.read_file(file_path) + + # Render page with PyMuPDF + import pymupdf + + doc = pymupdf.open(stream=pdf_bytes, filetype="pdf") + total_pages = doc.page_count + + # Validate page number + if page_num > total_pages: + doc.close() + return JSONResponse( + { + "success": False, + "error": f"Page {page_num} does not exist (document has {total_pages} pages)", + }, + status_code=400, + ) + + page = doc[page_num - 1] # 0-indexed + mat = pymupdf.Matrix(scale, scale) + pix = page.get_pixmap(matrix=mat, alpha=False) + png_bytes = pix.tobytes("png") + doc.close() + + # Encode as base64 + image_b64 = base64.b64encode(png_bytes).decode("ascii") + + logger.info( + f"Rendered PDF preview: {file_path} page {page_num}/{total_pages}, " + f"{len(png_bytes):,} bytes" + ) + + return JSONResponse( + { + "success": True, + "image": image_b64, + "page_number": page_num, + "total_pages": total_pages, + } + ) + + except FileNotFoundError: + logger.warning(f"PDF file not found: {file_path_param}") + return JSONResponse( + {"success": False, "error": "PDF file not found"}, + status_code=404, + ) + except Exception as e: + logger.error(f"PDF preview error: {e}", exc_info=True) + error_msg = _sanitize_error_for_client(e, "get_pdf_preview") + return JSONResponse( + {"success": False, "error": error_msg}, + status_code=500, + ) diff --git a/nextcloud_mcp_server/app.py b/nextcloud_mcp_server/app.py index 2579408..25e81d5 100644 --- a/nextcloud_mcp_server/app.py +++ b/nextcloud_mcp_server/app.py @@ -2119,6 +2119,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None = get_app_password_status, get_chunk_context, get_installed_apps, + get_pdf_preview, get_server_status, get_user_session, get_vector_sync_status, @@ -2179,6 +2180,8 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None = routes.append( Route("/api/v1/chunk-context", get_chunk_context, methods=["GET"]) ) + # PDF preview endpoint for Astrolabe (server-side rendering) + routes.append(Route("/api/v1/pdf-preview", get_pdf_preview, methods=["GET"])) # ADR-018: Unified search endpoint for Nextcloud PHP app integration routes.append(Route("/api/v1/search", unified_search, methods=["POST"])) routes.append(Route("/api/v1/apps", get_installed_apps, methods=["GET"])) @@ -2193,7 +2196,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None = "/api/v1/users/{user_id}/session, /api/v1/users/{user_id}/revoke, " "/api/v1/users/{user_id}/app-password, " "/api/v1/vector-viz/search, /api/v1/search, /api/v1/apps, " - "/api/v1/webhooks" + "/api/v1/webhooks, /api/v1/pdf-preview" ) # ADR-016: Add Smithery well-known config endpoint for container runtime discovery diff --git a/third_party/astrolabe/appinfo/routes.php b/third_party/astrolabe/appinfo/routes.php index 5f969c8..35d0c8d 100644 --- a/third_party/astrolabe/appinfo/routes.php +++ b/third_party/astrolabe/appinfo/routes.php @@ -72,6 +72,11 @@ return [ 'url' => '/api/chunk-context', 'verb' => 'GET', ], + [ + 'name' => 'api#pdfPreview', + 'url' => '/api/pdf-preview', + 'verb' => 'GET', + ], // Admin settings routes [ diff --git a/third_party/astrolabe/lib/Controller/ApiController.php b/third_party/astrolabe/lib/Controller/ApiController.php index a2139b9..a28ac5b 100644 --- a/third_party/astrolabe/lib/Controller/ApiController.php +++ b/third_party/astrolabe/lib/Controller/ApiController.php @@ -788,4 +788,61 @@ class ApiController extends Controller { return new JSONResponse($result); } + + /** + * Get PDF page preview (server-side rendered). + * + * AJAX endpoint for PDF viewer in semantic search UI. + * Uses server-side PyMuPDF rendering to avoid CSP/worker issues. + * + * @param string $file_path WebDAV path to PDF file + * @param int $page Page number (1-indexed, default: 1) + * @param float $scale Zoom factor (default: 2.0) + * @return JSONResponse + */ + #[NoAdminRequired] + public function pdfPreview( + string $file_path, + int $page = 1, + float $scale = 2.0, + ): JSONResponse { + $user = $this->userSession->getUser(); + if (!$user) { + return new JSONResponse(['success' => false, 'error' => 'User not authenticated'], Http::STATUS_UNAUTHORIZED); + } + + $userId = $user->getUID(); + + // Create refresh callback + $refreshCallback = function (string $refreshToken) { + $newTokenData = $this->tokenRefresher->refreshAccessToken($refreshToken); + + if (!$newTokenData) { + return null; + } + + return [ + 'access_token' => $newTokenData['access_token'], + 'refresh_token' => $newTokenData['refresh_token'] ?? $refreshToken, + 'expires_in' => $newTokenData['expires_in'] ?? 3600, + ]; + }; + + // Get user's OAuth token for MCP server with automatic refresh + $accessToken = $this->tokenStorage->getAccessToken($userId, $refreshCallback); + if (!$accessToken) { + return new JSONResponse([ + 'success' => false, + 'error' => 'MCP server authorization required.' + ], Http::STATUS_UNAUTHORIZED); + } + + $result = $this->client->getPdfPreview($file_path, $page, $scale, $accessToken); + + if (isset($result['error'])) { + return new JSONResponse(['success' => false, 'error' => $result['error']], Http::STATUS_INTERNAL_SERVER_ERROR); + } + + return new JSONResponse($result); + } } diff --git a/third_party/astrolabe/lib/Service/McpServerClient.php b/third_party/astrolabe/lib/Service/McpServerClient.php index 4316ed9..8cf1732 100644 --- a/third_party/astrolabe/lib/Service/McpServerClient.php +++ b/third_party/astrolabe/lib/Service/McpServerClient.php @@ -605,4 +605,61 @@ class McpServerClient { return ['error' => $e->getMessage()]; } } + + /** + * Get PDF page preview (server-side rendered). + * + * Renders a PDF page to PNG using PyMuPDF on the server. + * This avoids client-side PDF.js issues with CSP and ES private fields. + * + * Requires OAuth bearer token for authentication. + * + * @param string $filePath WebDAV path to PDF file + * @param int $page Page number (1-indexed) + * @param float $scale Zoom factor (default: 2.0) + * @param string $token OAuth bearer token + * @return array{ + * success?: bool, + * image?: string, + * page_number?: int, + * total_pages?: int, + * error?: string + * } + */ + public function getPdfPreview( + string $filePath, + int $page, + float $scale, + string $token, + ): array { + try { + $response = $this->httpClient->get( + $this->baseUrl . '/api/v1/pdf-preview', + [ + 'headers' => [ + 'Authorization' => 'Bearer ' . $token + ], + 'query' => [ + 'file_path' => $filePath, + 'page' => $page, + 'scale' => $scale, + ] + ] + ); + $data = json_decode($response->getBody(), true); + + if (json_last_error() !== JSON_ERROR_NONE) { + throw new \RuntimeException('Invalid JSON response from server'); + } + + return $data; + } catch (\Exception $e) { + $this->logger->error('Failed to get PDF preview', [ + 'error' => $e->getMessage(), + 'file_path' => $filePath, + 'page' => $page, + ]); + return ['error' => $e->getMessage()]; + } + } } diff --git a/third_party/astrolabe/package.json b/third_party/astrolabe/package.json index a0a0751..c7ec153 100644 --- a/third_party/astrolabe/package.json +++ b/third_party/astrolabe/package.json @@ -25,7 +25,6 @@ "@nextcloud/router": "^3.0.1", "@nextcloud/vue": "^9.3.3", "markdown-it": "^14.1.0", - "pdfjs-dist": "^4.0.379", "plotly.js-dist-min": "^3.0.0", "vue": "^3.0.0", "vue-material-design-icons": "^5.3.1" diff --git a/third_party/astrolabe/src/components/PDFViewer.vue b/third_party/astrolabe/src/components/PDFViewer.vue index 596078f..e4081a1 100644 --- a/third_party/astrolabe/src/components/PDFViewer.vue +++ b/third_party/astrolabe/src/components/PDFViewer.vue @@ -8,23 +8,33 @@

{{ error }}

-
- +
+ PDF page
@@ -217,19 +160,19 @@ onBeforeUnmount(() => { } } -.pdf-canvas-container { +.pdf-image-container { position: relative; border: 1px solid var(--color-border); box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); background: var(--color-main-background); max-width: 100%; overflow: auto; +} - canvas { - display: block; - max-width: 100%; - height: auto; - } +.pdf-page-image { + display: block; + max-width: 100%; + height: auto; } @media (max-width: 768px) { diff --git a/third_party/astrolabe/templates/index.php b/third_party/astrolabe/templates/index.php index 07688a5..8059c60 100644 --- a/third_party/astrolabe/templates/index.php +++ b/third_party/astrolabe/templates/index.php @@ -5,10 +5,6 @@ declare(strict_types=1); use OCA\Astrolabe\AppInfo\Application; use OCP\Util; -// Load PDF.js loader first (must be external, not bundled by Vite, -// to avoid ES private field transformation issues with fake worker fallback) -// The loader imports pdf.mjs and sets window.pdfjsLib before the main app runs -Util::addScript(Application::APP_ID, 'pdfjs-loader'); Util::addScript(Application::APP_ID, Application::APP_ID . '-main'); Util::addStyle(Application::APP_ID, Application::APP_ID . '-main'); diff --git a/third_party/astrolabe/vite.config.js b/third_party/astrolabe/vite.config.js index 22e5d41..e8f1239 100644 --- a/third_party/astrolabe/vite.config.js +++ b/third_party/astrolabe/vite.config.js @@ -1,40 +1,7 @@ import { defineConfig } from 'vite' import vue from '@vitejs/plugin-vue' import { resolve } from 'path' -import { readFileSync, copyFileSync, writeFileSync, mkdirSync } from 'fs' - -// Plugin to copy PDF.js files to output directory -// Both pdf.mjs and pdf.worker.mjs are loaded externally to avoid Vite transforming -// ES private fields, which breaks compatibility with the fake worker fallback -function copyPdfFiles() { - return { - name: 'copy-pdf-files', - writeBundle() { - mkdirSync(resolve(__dirname, 'js'), { recursive: true }) - // Copy main library - copyFileSync( - resolve(__dirname, 'node_modules/pdfjs-dist/build/pdf.mjs'), - resolve(__dirname, 'js/pdf.mjs') - ) - console.log('Copied pdf.mjs to js/') - // Copy worker (loaded by pdfjs at runtime) - copyFileSync( - resolve(__dirname, 'node_modules/pdfjs-dist/build/pdf.worker.mjs'), - resolve(__dirname, 'js/pdf.worker.mjs') - ) - console.log('Copied pdf.worker.mjs to js/') - // Create loader script that imports pdf.mjs and sets window.pdfjsLib - // This is loaded via script tag before the main app - const loaderScript = `// PDF.js loader - imports pdf.mjs and exposes it globally -// Loaded before main app to make pdfjsLib available as window.pdfjsLib -import * as pdfjsLib from './pdf.mjs'; -window.pdfjsLib = pdfjsLib; -` - writeFileSync(resolve(__dirname, 'js/pdfjs-loader.mjs'), loaderScript) - console.log('Created pdfjs-loader.mjs in js/') - } - } -} +import { readFileSync } from 'fs' // Read app info from info.xml for @nextcloud/vue const infoXml = readFileSync(resolve(__dirname, 'appinfo/info.xml'), 'utf-8') @@ -42,7 +9,7 @@ const appName = infoXml.match(/([^<]+)<\/id>/)?.[1] || 'astrolabe' const appVersion = infoXml.match(/([^<]+)<\/version>/)?.[1] || '' export default defineConfig({ - plugins: [vue(), copyPdfFiles()], + plugins: [vue()], define: { appName: JSON.stringify(appName), appVersion: JSON.stringify(appVersion),