chore: Address reviewer comments and add error handling to PDF chunk viz preview endpoints

This commit is contained in:
Chris Coutinho
2026-01-26 21:16:31 +01:00
parent 8507e480d6
commit 61ce873411
2 changed files with 214 additions and 19 deletions
+40 -16
View File
@@ -19,6 +19,7 @@ from importlib.metadata import version
from typing import TYPE_CHECKING, Any
import httpx
import pymupdf
if TYPE_CHECKING:
from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
@@ -1878,6 +1879,13 @@ async def get_pdf_preview(request: Request) -> JSONResponse:
status_code=400,
)
# Validate no path traversal sequences
if ".." in file_path:
return JSONResponse(
{"success": False, "error": "Invalid file path"},
status_code=400,
)
try:
page_num = _parse_int_param(
request.query_params.get("page"), 1, 1, 10000, "page"
@@ -1908,28 +1916,38 @@ async def get_pdf_preview(request: Request) -> JSONResponse:
) as nc_client:
pdf_bytes, _ = await nc_client.webdav.read_file(file_path)
# Render page with PyMuPDF
import pymupdf
doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
total_pages = doc.page_count
# Validate page number
if page_num > total_pages:
doc.close()
# Check file size limit (50 MB)
max_pdf_size = 50 * 1024 * 1024
if len(pdf_bytes) > max_pdf_size:
return JSONResponse(
{
"success": False,
"error": f"Page {page_num} does not exist (document has {total_pages} pages)",
"error": f"PDF file exceeds maximum size limit ({max_pdf_size // (1024 * 1024)} MB)",
},
status_code=400,
status_code=413,
)
page = doc[page_num - 1] # 0-indexed
mat = pymupdf.Matrix(scale, scale)
pix = page.get_pixmap(matrix=mat, alpha=False)
png_bytes = pix.tobytes("png")
doc.close()
# Render page with PyMuPDF
doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
try:
total_pages = doc.page_count
# Validate page number
if page_num > total_pages:
return JSONResponse(
{
"success": False,
"error": f"Page {page_num} does not exist (document has {total_pages} pages)",
},
status_code=400,
)
page = doc[page_num - 1] # 0-indexed
mat = pymupdf.Matrix(scale, scale)
pix = page.get_pixmap(matrix=mat, alpha=False)
png_bytes = pix.tobytes("png")
finally:
doc.close()
# Encode as base64
image_b64 = base64.b64encode(png_bytes).decode("ascii")
@@ -1954,6 +1972,12 @@ async def get_pdf_preview(request: Request) -> JSONResponse:
{"success": False, "error": "PDF file not found"},
status_code=404,
)
except (pymupdf.FileDataError, pymupdf.EmptyFileError):
logger.warning(f"Invalid or corrupted PDF file: {file_path_param}")
return JSONResponse(
{"success": False, "error": "Invalid or corrupted PDF file"},
status_code=400,
)
except Exception as e:
logger.error(f"PDF preview error: {e}", exc_info=True)
error_msg = _sanitize_error_for_client(e, "get_pdf_preview")
@@ -461,8 +461,8 @@ class TestPdfPreviewEdgeCases:
data = response.json()
assert data["success"] is False
def test_corrupted_pdf_returns_500(self):
"""Test that corrupted PDF data returns 500."""
def test_corrupted_pdf_returns_400(self):
"""Test that corrupted PDF data returns 400 with specific error."""
mock_webdav = AsyncMock()
# Return invalid PDF bytes
mock_webdav.read_file = AsyncMock(
@@ -496,9 +496,13 @@ class TestPdfPreviewEdgeCases:
headers={"Authorization": "Bearer test-token"},
)
assert response.status_code == 500
assert response.status_code == 400
data = response.json()
assert data["success"] is False
assert (
"corrupted" in data["error"].lower()
or "invalid" in data["error"].lower()
)
def test_boundary_scale_values(self):
"""Test boundary scale values (min and max)."""
@@ -543,3 +547,170 @@ class TestPdfPreviewEdgeCases:
headers={"Authorization": "Bearer test-token"},
)
assert response.status_code == 200
class TestPdfPreviewSecurityValidation:
"""Tests for security validations in PDF preview endpoint."""
def test_path_traversal_returns_400(self):
"""Test that path traversal attempts are blocked with 400."""
with (
patch(
"nextcloud_mcp_server.api.management.validate_token_and_get_user",
new_callable=AsyncMock,
return_value=("testuser", True),
),
patch(
"nextcloud_mcp_server.api.management.extract_bearer_token",
return_value="test-token",
),
):
app = create_test_app()
client = TestClient(app)
# Test various path traversal patterns
traversal_paths = [
"/Documents/../../../etc/passwd",
"/../secret.pdf",
"/folder/..%2F..%2Fetc/passwd", # URL-encoded
"/test/../secret.pdf",
]
for path in traversal_paths:
response = client.get(
f"/api/v1/pdf-preview?file_path={path}",
headers={"Authorization": "Bearer test-token"},
)
assert response.status_code == 400, (
f"Path traversal not blocked: {path}"
)
data = response.json()
assert data["success"] is False
assert "invalid file path" in data["error"].lower()
def test_file_size_limit_exceeded_returns_413(self):
"""Test that files exceeding 50MB limit return 413."""
# Create bytes larger than 50MB limit
large_pdf_bytes = b"x" * (51 * 1024 * 1024) # 51 MB
mock_webdav = AsyncMock()
mock_webdav.read_file = AsyncMock(
return_value=(large_pdf_bytes, "application/pdf")
)
mock_nc_client = MagicMock()
mock_nc_client.webdav = mock_webdav
mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client)
mock_nc_client.__aexit__ = AsyncMock(return_value=None)
with (
patch(
"nextcloud_mcp_server.api.management.validate_token_and_get_user",
new_callable=AsyncMock,
return_value=("testuser", True),
),
patch(
"nextcloud_mcp_server.api.management.extract_bearer_token",
return_value="test-token",
),
patch(
"nextcloud_mcp_server.client.NextcloudClient.from_token",
return_value=mock_nc_client,
),
):
app = create_test_app()
client = TestClient(app)
response = client.get(
"/api/v1/pdf-preview?file_path=/large.pdf",
headers={"Authorization": "Bearer test-token"},
)
assert response.status_code == 413
data = response.json()
assert data["success"] is False
assert "size limit" in data["error"].lower()
def test_corrupted_pdf_returns_400(self):
"""Test that corrupted PDF returns 400 with specific error message."""
# Invalid PDF content that PyMuPDF cannot parse
corrupted_pdf_bytes = b"not a valid PDF file content"
mock_webdav = AsyncMock()
mock_webdav.read_file = AsyncMock(
return_value=(corrupted_pdf_bytes, "application/pdf")
)
mock_nc_client = MagicMock()
mock_nc_client.webdav = mock_webdav
mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client)
mock_nc_client.__aexit__ = AsyncMock(return_value=None)
with (
patch(
"nextcloud_mcp_server.api.management.validate_token_and_get_user",
new_callable=AsyncMock,
return_value=("testuser", True),
),
patch(
"nextcloud_mcp_server.api.management.extract_bearer_token",
return_value="test-token",
),
patch(
"nextcloud_mcp_server.client.NextcloudClient.from_token",
return_value=mock_nc_client,
),
):
app = create_test_app()
client = TestClient(app)
response = client.get(
"/api/v1/pdf-preview?file_path=/corrupted.pdf",
headers={"Authorization": "Bearer test-token"},
)
assert response.status_code == 400
data = response.json()
assert data["success"] is False
assert (
"corrupted" in data["error"].lower()
or "invalid" in data["error"].lower()
)
def test_empty_pdf_returns_400(self):
"""Test that empty PDF file returns 400."""
empty_pdf_bytes = b""
mock_webdav = AsyncMock()
mock_webdav.read_file = AsyncMock(
return_value=(empty_pdf_bytes, "application/pdf")
)
mock_nc_client = MagicMock()
mock_nc_client.webdav = mock_webdav
mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client)
mock_nc_client.__aexit__ = AsyncMock(return_value=None)
with (
patch(
"nextcloud_mcp_server.api.management.validate_token_and_get_user",
new_callable=AsyncMock,
return_value=("testuser", True),
),
patch(
"nextcloud_mcp_server.api.management.extract_bearer_token",
return_value="test-token",
),
patch(
"nextcloud_mcp_server.client.NextcloudClient.from_token",
return_value=mock_nc_client,
),
):
app = create_test_app()
client = TestClient(app)
response = client.get(
"/api/v1/pdf-preview?file_path=/empty.pdf",
headers={"Authorization": "Bearer test-token"},
)
assert response.status_code == 400
data = response.json()
assert data["success"] is False