refactor(astrolabe): replace client-side PDF.js with server-side PyMuPDF rendering

Replace the client-side PDF.js viewer with server-side rendering using PyMuPDF.
This avoids CSP worker restrictions and ES private field access issues that
affected Chromium browsers.

Changes:
- Add /api/v1/pdf-preview endpoint to MCP server (management.py)
- Add pdf-preview route and controller action in Astrolabe PHP backend
- Refactor PDFViewer.vue to display server-rendered PNG images
- Remove pdfjs-dist dependency and client-side PDF loading code
- Use @nextcloud/axios for CSRF token handling in PDFViewer

The server downloads the PDF via WebDAV, renders the requested page with
PyMuPDF at the specified scale, and returns a base64-encoded PNG image.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Chris Coutinho
2026-01-26 20:04:57 +01:00
parent bc62f2a066
commit d5544a7731
9 changed files with 325 additions and 162 deletions
+136
View File
@@ -1825,3 +1825,139 @@ async def get_chunk_context(request: Request) -> JSONResponse:
{"error": error_msg},
status_code=500,
)
async def get_pdf_preview(request: Request) -> JSONResponse:
"""GET /api/v1/pdf-preview - Render PDF page to PNG image.
Server-side PDF rendering using PyMuPDF. This endpoint allows Astrolabe
to display PDF pages without requiring client-side PDF.js, avoiding CSP
worker restrictions and ES private field issues in Chromium.
Query parameters:
file_path: WebDAV path to PDF file (e.g., "/Documents/report.pdf")
page: Page number (1-indexed, default: 1)
scale: Zoom factor for rendering (default: 2.0 = 144 DPI)
Returns:
{
"success": true,
"image": "<base64-encoded-png>",
"page_number": 1,
"total_pages": 10
}
Requires OAuth bearer token for authentication.
"""
# Log incoming request
file_path_param = request.query_params.get("file_path", "<not provided>")
page_param = request.query_params.get("page", "1")
logger.info(f"PDF preview request: file_path={file_path_param}, page={page_param}")
try:
# Validate OAuth token and extract user
user_id, validated = await validate_token_and_get_user(request)
logger.info(f"PDF preview authenticated for user: {user_id}")
except Exception as e:
logger.warning(f"Unauthorized access to /api/v1/pdf-preview: {e}")
return JSONResponse(
{
"success": False,
"error": "Unauthorized",
"message": _sanitize_error_for_client(e, "get_pdf_preview"),
},
status_code=401,
)
try:
# Parse and validate parameters
file_path = request.query_params.get("file_path")
if not file_path:
return JSONResponse(
{"success": False, "error": "Missing required parameter: file_path"},
status_code=400,
)
try:
page_num = _parse_int_param(
request.query_params.get("page"), 1, 1, 10000, "page"
)
scale = _parse_float_param(
request.query_params.get("scale"), 2.0, 0.5, 5.0, "scale"
)
except ValueError as e:
return JSONResponse({"success": False, "error": str(e)}, status_code=400)
# Get bearer token for WebDAV authentication
token = extract_bearer_token(request)
if not token:
raise ValueError("Missing token")
# Get Nextcloud host from OAuth context
oauth_ctx = request.app.state.oauth_context
nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
if not nextcloud_host:
raise ValueError("Nextcloud host not configured")
# Download PDF via WebDAV using user's token
from nextcloud_mcp_server.client import NextcloudClient
async with NextcloudClient.from_token(
base_url=nextcloud_host, token=token, username=user_id
) as nc_client:
pdf_bytes, _ = await nc_client.webdav.read_file(file_path)
# Render page with PyMuPDF
import pymupdf
doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
total_pages = doc.page_count
# Validate page number
if page_num > total_pages:
doc.close()
return JSONResponse(
{
"success": False,
"error": f"Page {page_num} does not exist (document has {total_pages} pages)",
},
status_code=400,
)
page = doc[page_num - 1] # 0-indexed
mat = pymupdf.Matrix(scale, scale)
pix = page.get_pixmap(matrix=mat, alpha=False)
png_bytes = pix.tobytes("png")
doc.close()
# Encode as base64
image_b64 = base64.b64encode(png_bytes).decode("ascii")
logger.info(
f"Rendered PDF preview: {file_path} page {page_num}/{total_pages}, "
f"{len(png_bytes):,} bytes"
)
return JSONResponse(
{
"success": True,
"image": image_b64,
"page_number": page_num,
"total_pages": total_pages,
}
)
except FileNotFoundError:
logger.warning(f"PDF file not found: {file_path_param}")
return JSONResponse(
{"success": False, "error": "PDF file not found"},
status_code=404,
)
except Exception as e:
logger.error(f"PDF preview error: {e}", exc_info=True)
error_msg = _sanitize_error_for_client(e, "get_pdf_preview")
return JSONResponse(
{"success": False, "error": error_msg},
status_code=500,
)
+4 -1
View File
@@ -2119,6 +2119,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
get_app_password_status,
get_chunk_context,
get_installed_apps,
get_pdf_preview,
get_server_status,
get_user_session,
get_vector_sync_status,
@@ -2179,6 +2180,8 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
routes.append(
Route("/api/v1/chunk-context", get_chunk_context, methods=["GET"])
)
# PDF preview endpoint for Astrolabe (server-side rendering)
routes.append(Route("/api/v1/pdf-preview", get_pdf_preview, methods=["GET"]))
# ADR-018: Unified search endpoint for Nextcloud PHP app integration
routes.append(Route("/api/v1/search", unified_search, methods=["POST"]))
routes.append(Route("/api/v1/apps", get_installed_apps, methods=["GET"]))
@@ -2193,7 +2196,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
"/api/v1/users/{user_id}/session, /api/v1/users/{user_id}/revoke, "
"/api/v1/users/{user_id}/app-password, "
"/api/v1/vector-viz/search, /api/v1/search, /api/v1/apps, "
"/api/v1/webhooks"
"/api/v1/webhooks, /api/v1/pdf-preview"
)
# ADR-016: Add Smithery well-known config endpoint for container runtime discovery
+5
View File
@@ -72,6 +72,11 @@ return [
'url' => '/api/chunk-context',
'verb' => 'GET',
],
[
'name' => 'api#pdfPreview',
'url' => '/api/pdf-preview',
'verb' => 'GET',
],
// Admin settings routes
[
+57
View File
@@ -788,4 +788,61 @@ class ApiController extends Controller {
return new JSONResponse($result);
}
/**
* Get PDF page preview (server-side rendered).
*
* AJAX endpoint for PDF viewer in semantic search UI.
* Uses server-side PyMuPDF rendering to avoid CSP/worker issues.
*
* @param string $file_path WebDAV path to PDF file
* @param int $page Page number (1-indexed, default: 1)
* @param float $scale Zoom factor (default: 2.0)
* @return JSONResponse
*/
#[NoAdminRequired]
public function pdfPreview(
string $file_path,
int $page = 1,
float $scale = 2.0,
): JSONResponse {
$user = $this->userSession->getUser();
if (!$user) {
return new JSONResponse(['success' => false, 'error' => 'User not authenticated'], Http::STATUS_UNAUTHORIZED);
}
$userId = $user->getUID();
// Create refresh callback
$refreshCallback = function (string $refreshToken) {
$newTokenData = $this->tokenRefresher->refreshAccessToken($refreshToken);
if (!$newTokenData) {
return null;
}
return [
'access_token' => $newTokenData['access_token'],
'refresh_token' => $newTokenData['refresh_token'] ?? $refreshToken,
'expires_in' => $newTokenData['expires_in'] ?? 3600,
];
};
// Get user's OAuth token for MCP server with automatic refresh
$accessToken = $this->tokenStorage->getAccessToken($userId, $refreshCallback);
if (!$accessToken) {
return new JSONResponse([
'success' => false,
'error' => 'MCP server authorization required.'
], Http::STATUS_UNAUTHORIZED);
}
$result = $this->client->getPdfPreview($file_path, $page, $scale, $accessToken);
if (isset($result['error'])) {
return new JSONResponse(['success' => false, 'error' => $result['error']], Http::STATUS_INTERNAL_SERVER_ERROR);
}
return new JSONResponse($result);
}
}
+57
View File
@@ -605,4 +605,61 @@ class McpServerClient {
return ['error' => $e->getMessage()];
}
}
/**
* Get PDF page preview (server-side rendered).
*
* Renders a PDF page to PNG using PyMuPDF on the server.
* This avoids client-side PDF.js issues with CSP and ES private fields.
*
* Requires OAuth bearer token for authentication.
*
* @param string $filePath WebDAV path to PDF file
* @param int $page Page number (1-indexed)
* @param float $scale Zoom factor (default: 2.0)
* @param string $token OAuth bearer token
* @return array{
* success?: bool,
* image?: string,
* page_number?: int,
* total_pages?: int,
* error?: string
* }
*/
public function getPdfPreview(
string $filePath,
int $page,
float $scale,
string $token,
): array {
try {
$response = $this->httpClient->get(
$this->baseUrl . '/api/v1/pdf-preview',
[
'headers' => [
'Authorization' => 'Bearer ' . $token
],
'query' => [
'file_path' => $filePath,
'page' => $page,
'scale' => $scale,
]
]
);
$data = json_decode($response->getBody(), true);
if (json_last_error() !== JSON_ERROR_NONE) {
throw new \RuntimeException('Invalid JSON response from server');
}
return $data;
} catch (\Exception $e) {
$this->logger->error('Failed to get PDF preview', [
'error' => $e->getMessage(),
'file_path' => $filePath,
'page' => $page,
]);
return ['error' => $e->getMessage()];
}
}
}
-1
View File
@@ -25,7 +25,6 @@
"@nextcloud/router": "^3.0.1",
"@nextcloud/vue": "^9.3.3",
"markdown-it": "^14.1.0",
"pdfjs-dist": "^4.0.379",
"plotly.js-dist-min": "^3.0.0",
"vue": "^3.0.0",
"vue-material-design-icons": "^5.3.1"
+64 -121
View File
@@ -8,23 +8,33 @@
<AlertCircle :size="48" />
<p>{{ error }}</p>
</div>
<div v-else ref="containerRef" class="pdf-canvas-container">
<canvas ref="canvasRef" />
<div v-else class="pdf-image-container">
<img
:src="`data:image/png;base64,${imageData}`"
class="pdf-page-image"
alt="PDF page" />
</div>
</div>
</template>
<script setup>
import { ref, shallowRef, markRaw, watch, onMounted, onBeforeUnmount, nextTick } from 'vue'
/**
* PDFViewer - Server-side PDF rendering component.
*
* Displays PDF pages as server-rendered PNG images, avoiding client-side
* PDF.js issues with CSP worker restrictions and ES private field access
* in Chromium browsers.
*
* The server uses PyMuPDF to render PDF pages to PNG images, which are
* returned as base64-encoded data.
*/
import { ref, watch, onMounted } from 'vue'
import axios from '@nextcloud/axios'
import { generateUrl } from '@nextcloud/router'
import { translate as t } from '@nextcloud/l10n'
import { NcLoadingIcon } from '@nextcloud/vue'
import AlertCircle from 'vue-material-design-icons/AlertCircle.vue'
// Use global pdfjsLib loaded by pdfjs-loader.mjs (external, not bundled)
// This avoids Vite transforming ES private fields which breaks fake worker compatibility
const pdfjsLib = window.pdfjsLib
const props = defineProps({
filePath: {
type: String,
@@ -36,68 +46,68 @@ const props = defineProps({
},
scale: {
type: Number,
default: 1.5,
default: 2.0,
},
})
const emit = defineEmits(['loaded', 'error', 'page-rendered'])
// Reactive state
// Use shallowRef for pdfDoc because PDFDocumentProxy uses ES private fields
// which can't be accessed through Vue's Proxy wrapper
const pdfDoc = shallowRef(null)
const loading = ref(true)
const error = ref(null)
const imageData = ref(null)
const totalPages = ref(0)
const canvasRef = ref(null)
const containerRef = ref(null)
// Methods
async function loadPDF() {
/**
* Fetch a PDF page from the server as a PNG image.
*/
async function loadPage() {
loading.value = true
error.value = null
try {
// Clean and encode the file path
const cleanPath = props.filePath.startsWith('/')
? props.filePath.substring(1)
: props.filePath
const encodedPath = cleanPath.split('/').map(encodeURIComponent).join('/')
const downloadUrl = generateUrl(`/remote.php/webdav/${encodedPath}`)
// Build request URL
const url = generateUrl('/apps/astrolabe/api/pdf-preview')
const params = {
file_path: props.filePath,
page: props.pageNumber,
scale: props.scale,
}
// Set worker source using OC.linkTo for correct app webroot path
// Must be done here (not at module load time) because _oc_appswebroots isn't populated until after page load
pdfjsLib.GlobalWorkerOptions.workerSrc = window.OC.linkTo('astrolabe', 'js/pdf.worker.mjs')
const response = await axios.get(url, { params })
// Load PDF document
const loadingTask = pdfjsLib.getDocument({
url: downloadUrl,
withCredentials: true,
useWorkerFetch: false, // Disable worker fetch for CSP compliance
isEvalSupported: false, // Disable eval for CSP
})
if (!response.data.success) {
throw new Error(response.data.error || 'Failed to load PDF page')
}
// Use markRaw to prevent Vue from wrapping in Proxy (breaks private field access in Chromium)
pdfDoc.value = markRaw(await loadingTask.promise)
totalPages.value = pdfDoc.value.numPages
emit('loaded', { totalPages: totalPages.value })
const data = response.data
// Update state
imageData.value = data.image
totalPages.value = data.total_pages
// Emit loaded event - App.vue uses this for navigation controls
emit('loaded', { totalPages: data.total_pages })
emit('page-rendered', { pageNumber: props.pageNumber })
// Set loading to false - the watcher will handle rendering
loading.value = false
} catch (err) {
console.error('PDF load error:', err)
// Provide user-friendly error messages
if (err.name === 'MissingPDFException') {
// Provide user-friendly error messages based on axios error structure
const status = err.response?.status
const serverError = err.response?.data?.error
if (status === 404) {
error.value = t('astrolabe', 'PDF file not found')
} else if (err.name === 'InvalidPDFException') {
error.value = t('astrolabe', 'Invalid or corrupted PDF file')
} else if (err.message?.includes('NetworkError') || err.message?.includes('Network')) {
} else if (status === 401 || status === 403) {
error.value = serverError || t('astrolabe', 'Authorization required to view PDF')
} else if (err.code === 'ERR_NETWORK' || err.message?.includes('Network')) {
error.value = t('astrolabe', 'Network error loading PDF')
} else if (err.message?.includes('404')) {
error.value = t('astrolabe', 'PDF file not found')
} else if (serverError) {
error.value = serverError
} else {
error.value = t('astrolabe', 'Unable to load PDF file')
error.value = t('astrolabe', 'Unable to load PDF page')
}
emit('error', err)
@@ -105,79 +115,12 @@ async function loadPDF() {
}
}
async function renderPage(pageNum) {
if (!pdfDoc.value) {
return
}
// Re-fetch when file path or page number changes
watch(() => [props.filePath, props.pageNumber], loadPage)
try {
// Use markRaw to prevent Vue from wrapping in Proxy (breaks private field access in Chromium)
const page = markRaw(await pdfDoc.value.getPage(pageNum))
const canvas = canvasRef.value
if (!canvas) {
console.error('PDF canvas ref not found')
error.value = t('astrolabe', 'Canvas element not available')
return
}
const context = canvas.getContext('2d')
// Use scale for better resolution on high-DPI screens
const viewport = page.getViewport({ scale: props.scale })
canvas.height = viewport.height
canvas.width = viewport.width
// Render page to canvas
const renderContext = {
canvasContext: context,
viewport,
}
await page.render(renderContext).promise
emit('page-rendered', { pageNumber: pageNum })
} catch (err) {
console.error('PDF render error:', err)
error.value = t('astrolabe', 'Error rendering PDF page')
emit('error', err)
}
}
// Watchers
watch(() => props.pageNumber, (newPage) => {
if (pdfDoc.value && newPage > 0 && newPage <= totalPages.value) {
renderPage(newPage)
}
})
watch(() => props.filePath, () => {
// Reload PDF if file path changes
loadPDF()
})
watch(loading, async (newLoading) => {
// When loading completes, wait for canvas to be available and render
if (!newLoading && pdfDoc.value && !error.value) {
// Wait for Vue to update DOM
await nextTick()
// Canvas should now be rendered (v-else condition)
if (canvasRef.value) {
await renderPage(props.pageNumber)
}
}
})
// Lifecycle hooks
// Initial load
onMounted(() => {
loadPDF()
})
onBeforeUnmount(() => {
if (pdfDoc.value) {
pdfDoc.value.destroy()
}
loadPage()
})
</script>
@@ -217,19 +160,19 @@ onBeforeUnmount(() => {
}
}
.pdf-canvas-container {
.pdf-image-container {
position: relative;
border: 1px solid var(--color-border);
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
background: var(--color-main-background);
max-width: 100%;
overflow: auto;
}
canvas {
display: block;
max-width: 100%;
height: auto;
}
.pdf-page-image {
display: block;
max-width: 100%;
height: auto;
}
@media (max-width: 768px) {
-4
View File
@@ -5,10 +5,6 @@ declare(strict_types=1);
use OCA\Astrolabe\AppInfo\Application;
use OCP\Util;
// Load PDF.js loader first (must be external, not bundled by Vite,
// to avoid ES private field transformation issues with fake worker fallback)
// The loader imports pdf.mjs and sets window.pdfjsLib before the main app runs
Util::addScript(Application::APP_ID, 'pdfjs-loader');
Util::addScript(Application::APP_ID, Application::APP_ID . '-main');
Util::addStyle(Application::APP_ID, Application::APP_ID . '-main');
+2 -35
View File
@@ -1,40 +1,7 @@
import { defineConfig } from 'vite'
import vue from '@vitejs/plugin-vue'
import { resolve } from 'path'
import { readFileSync, copyFileSync, writeFileSync, mkdirSync } from 'fs'
// Plugin to copy PDF.js files to output directory
// Both pdf.mjs and pdf.worker.mjs are loaded externally to avoid Vite transforming
// ES private fields, which breaks compatibility with the fake worker fallback
function copyPdfFiles() {
return {
name: 'copy-pdf-files',
writeBundle() {
mkdirSync(resolve(__dirname, 'js'), { recursive: true })
// Copy main library
copyFileSync(
resolve(__dirname, 'node_modules/pdfjs-dist/build/pdf.mjs'),
resolve(__dirname, 'js/pdf.mjs')
)
console.log('Copied pdf.mjs to js/')
// Copy worker (loaded by pdfjs at runtime)
copyFileSync(
resolve(__dirname, 'node_modules/pdfjs-dist/build/pdf.worker.mjs'),
resolve(__dirname, 'js/pdf.worker.mjs')
)
console.log('Copied pdf.worker.mjs to js/')
// Create loader script that imports pdf.mjs and sets window.pdfjsLib
// This is loaded via script tag before the main app
const loaderScript = `// PDF.js loader - imports pdf.mjs and exposes it globally
// Loaded before main app to make pdfjsLib available as window.pdfjsLib
import * as pdfjsLib from './pdf.mjs';
window.pdfjsLib = pdfjsLib;
`
writeFileSync(resolve(__dirname, 'js/pdfjs-loader.mjs'), loaderScript)
console.log('Created pdfjs-loader.mjs in js/')
}
}
}
import { readFileSync } from 'fs'
// Read app info from info.xml for @nextcloud/vue
const infoXml = readFileSync(resolve(__dirname, 'appinfo/info.xml'), 'utf-8')
@@ -42,7 +9,7 @@ const appName = infoXml.match(/<id>([^<]+)<\/id>/)?.[1] || 'astrolabe'
const appVersion = infoXml.match(/<version>([^<]+)<\/version>/)?.[1] || ''
export default defineConfig({
plugins: [vue(), copyPdfFiles()],
plugins: [vue()],
define: {
appName: JSON.stringify(appName),
appVersion: JSON.stringify(appVersion),