From 61ce873411f5eeb6baa920d8db3a3aa09dbbc3e7 Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Mon, 26 Jan 2026 21:16:31 +0100 Subject: [PATCH] chore: Address reviewer comments and add error handling to PDF chunk viz preview endpoints --- nextcloud_mcp_server/api/management.py | 56 ++++-- .../test_management_pdf_preview_endpoint.py | 177 +++++++++++++++++- 2 files changed, 214 insertions(+), 19 deletions(-) diff --git a/nextcloud_mcp_server/api/management.py b/nextcloud_mcp_server/api/management.py index d54ceda..05d4d52 100644 --- a/nextcloud_mcp_server/api/management.py +++ b/nextcloud_mcp_server/api/management.py @@ -19,6 +19,7 @@ from importlib.metadata import version from typing import TYPE_CHECKING, Any import httpx +import pymupdf if TYPE_CHECKING: from nextcloud_mcp_server.auth.storage import RefreshTokenStorage @@ -1878,6 +1879,13 @@ async def get_pdf_preview(request: Request) -> JSONResponse: status_code=400, ) + # Validate no path traversal sequences + if ".." in file_path: + return JSONResponse( + {"success": False, "error": "Invalid file path"}, + status_code=400, + ) + try: page_num = _parse_int_param( request.query_params.get("page"), 1, 1, 10000, "page" @@ -1908,28 +1916,38 @@ async def get_pdf_preview(request: Request) -> JSONResponse: ) as nc_client: pdf_bytes, _ = await nc_client.webdav.read_file(file_path) - # Render page with PyMuPDF - import pymupdf - - doc = pymupdf.open(stream=pdf_bytes, filetype="pdf") - total_pages = doc.page_count - - # Validate page number - if page_num > total_pages: - doc.close() + # Check file size limit (50 MB) + max_pdf_size = 50 * 1024 * 1024 + if len(pdf_bytes) > max_pdf_size: return JSONResponse( { "success": False, - "error": f"Page {page_num} does not exist (document has {total_pages} pages)", + "error": f"PDF file exceeds maximum size limit ({max_pdf_size // (1024 * 1024)} MB)", }, - status_code=400, + status_code=413, ) - page = doc[page_num - 1] # 0-indexed - mat = pymupdf.Matrix(scale, scale) - pix = page.get_pixmap(matrix=mat, alpha=False) - png_bytes = pix.tobytes("png") - doc.close() + # Render page with PyMuPDF + doc = pymupdf.open(stream=pdf_bytes, filetype="pdf") + try: + total_pages = doc.page_count + + # Validate page number + if page_num > total_pages: + return JSONResponse( + { + "success": False, + "error": f"Page {page_num} does not exist (document has {total_pages} pages)", + }, + status_code=400, + ) + + page = doc[page_num - 1] # 0-indexed + mat = pymupdf.Matrix(scale, scale) + pix = page.get_pixmap(matrix=mat, alpha=False) + png_bytes = pix.tobytes("png") + finally: + doc.close() # Encode as base64 image_b64 = base64.b64encode(png_bytes).decode("ascii") @@ -1954,6 +1972,12 @@ async def get_pdf_preview(request: Request) -> JSONResponse: {"success": False, "error": "PDF file not found"}, status_code=404, ) + except (pymupdf.FileDataError, pymupdf.EmptyFileError): + logger.warning(f"Invalid or corrupted PDF file: {file_path_param}") + return JSONResponse( + {"success": False, "error": "Invalid or corrupted PDF file"}, + status_code=400, + ) except Exception as e: logger.error(f"PDF preview error: {e}", exc_info=True) error_msg = _sanitize_error_for_client(e, "get_pdf_preview") diff --git a/tests/unit/test_management_pdf_preview_endpoint.py b/tests/unit/test_management_pdf_preview_endpoint.py index 69950ef..e85682c 100644 --- a/tests/unit/test_management_pdf_preview_endpoint.py +++ b/tests/unit/test_management_pdf_preview_endpoint.py @@ -461,8 +461,8 @@ class TestPdfPreviewEdgeCases: data = response.json() assert data["success"] is False - def test_corrupted_pdf_returns_500(self): - """Test that corrupted PDF data returns 500.""" + def test_corrupted_pdf_returns_400(self): + """Test that corrupted PDF data returns 400 with specific error.""" mock_webdav = AsyncMock() # Return invalid PDF bytes mock_webdav.read_file = AsyncMock( @@ -496,9 +496,13 @@ class TestPdfPreviewEdgeCases: headers={"Authorization": "Bearer test-token"}, ) - assert response.status_code == 500 + assert response.status_code == 400 data = response.json() assert data["success"] is False + assert ( + "corrupted" in data["error"].lower() + or "invalid" in data["error"].lower() + ) def test_boundary_scale_values(self): """Test boundary scale values (min and max).""" @@ -543,3 +547,170 @@ class TestPdfPreviewEdgeCases: headers={"Authorization": "Bearer test-token"}, ) assert response.status_code == 200 + + +class TestPdfPreviewSecurityValidation: + """Tests for security validations in PDF preview endpoint.""" + + def test_path_traversal_returns_400(self): + """Test that path traversal attempts are blocked with 400.""" + with ( + patch( + "nextcloud_mcp_server.api.management.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.management.extract_bearer_token", + return_value="test-token", + ), + ): + app = create_test_app() + client = TestClient(app) + + # Test various path traversal patterns + traversal_paths = [ + "/Documents/../../../etc/passwd", + "/../secret.pdf", + "/folder/..%2F..%2Fetc/passwd", # URL-encoded + "/test/../secret.pdf", + ] + + for path in traversal_paths: + response = client.get( + f"/api/v1/pdf-preview?file_path={path}", + headers={"Authorization": "Bearer test-token"}, + ) + assert response.status_code == 400, ( + f"Path traversal not blocked: {path}" + ) + data = response.json() + assert data["success"] is False + assert "invalid file path" in data["error"].lower() + + def test_file_size_limit_exceeded_returns_413(self): + """Test that files exceeding 50MB limit return 413.""" + # Create bytes larger than 50MB limit + large_pdf_bytes = b"x" * (51 * 1024 * 1024) # 51 MB + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock( + return_value=(large_pdf_bytes, "application/pdf") + ) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.management.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.management.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/large.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 413 + data = response.json() + assert data["success"] is False + assert "size limit" in data["error"].lower() + + def test_corrupted_pdf_returns_400(self): + """Test that corrupted PDF returns 400 with specific error message.""" + # Invalid PDF content that PyMuPDF cannot parse + corrupted_pdf_bytes = b"not a valid PDF file content" + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock( + return_value=(corrupted_pdf_bytes, "application/pdf") + ) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.management.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.management.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/corrupted.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["success"] is False + assert ( + "corrupted" in data["error"].lower() + or "invalid" in data["error"].lower() + ) + + def test_empty_pdf_returns_400(self): + """Test that empty PDF file returns 400.""" + empty_pdf_bytes = b"" + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock( + return_value=(empty_pdf_bytes, "application/pdf") + ) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.management.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.management.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/empty.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["success"] is False