chore: Address reviewer comments and add error handling to PDF chunk viz preview endpoints
This commit is contained in:
@@ -19,6 +19,7 @@ from importlib.metadata import version
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import httpx
|
||||
import pymupdf
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
|
||||
@@ -1878,6 +1879,13 @@ async def get_pdf_preview(request: Request) -> JSONResponse:
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
# Validate no path traversal sequences
|
||||
if ".." in file_path:
|
||||
return JSONResponse(
|
||||
{"success": False, "error": "Invalid file path"},
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
try:
|
||||
page_num = _parse_int_param(
|
||||
request.query_params.get("page"), 1, 1, 10000, "page"
|
||||
@@ -1908,28 +1916,38 @@ async def get_pdf_preview(request: Request) -> JSONResponse:
|
||||
) as nc_client:
|
||||
pdf_bytes, _ = await nc_client.webdav.read_file(file_path)
|
||||
|
||||
# Render page with PyMuPDF
|
||||
import pymupdf
|
||||
|
||||
doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
|
||||
total_pages = doc.page_count
|
||||
|
||||
# Validate page number
|
||||
if page_num > total_pages:
|
||||
doc.close()
|
||||
# Check file size limit (50 MB)
|
||||
max_pdf_size = 50 * 1024 * 1024
|
||||
if len(pdf_bytes) > max_pdf_size:
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"Page {page_num} does not exist (document has {total_pages} pages)",
|
||||
"error": f"PDF file exceeds maximum size limit ({max_pdf_size // (1024 * 1024)} MB)",
|
||||
},
|
||||
status_code=400,
|
||||
status_code=413,
|
||||
)
|
||||
|
||||
page = doc[page_num - 1] # 0-indexed
|
||||
mat = pymupdf.Matrix(scale, scale)
|
||||
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
png_bytes = pix.tobytes("png")
|
||||
doc.close()
|
||||
# Render page with PyMuPDF
|
||||
doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
|
||||
try:
|
||||
total_pages = doc.page_count
|
||||
|
||||
# Validate page number
|
||||
if page_num > total_pages:
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"Page {page_num} does not exist (document has {total_pages} pages)",
|
||||
},
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
page = doc[page_num - 1] # 0-indexed
|
||||
mat = pymupdf.Matrix(scale, scale)
|
||||
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
png_bytes = pix.tobytes("png")
|
||||
finally:
|
||||
doc.close()
|
||||
|
||||
# Encode as base64
|
||||
image_b64 = base64.b64encode(png_bytes).decode("ascii")
|
||||
@@ -1954,6 +1972,12 @@ async def get_pdf_preview(request: Request) -> JSONResponse:
|
||||
{"success": False, "error": "PDF file not found"},
|
||||
status_code=404,
|
||||
)
|
||||
except (pymupdf.FileDataError, pymupdf.EmptyFileError):
|
||||
logger.warning(f"Invalid or corrupted PDF file: {file_path_param}")
|
||||
return JSONResponse(
|
||||
{"success": False, "error": "Invalid or corrupted PDF file"},
|
||||
status_code=400,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"PDF preview error: {e}", exc_info=True)
|
||||
error_msg = _sanitize_error_for_client(e, "get_pdf_preview")
|
||||
|
||||
@@ -461,8 +461,8 @@ class TestPdfPreviewEdgeCases:
|
||||
data = response.json()
|
||||
assert data["success"] is False
|
||||
|
||||
def test_corrupted_pdf_returns_500(self):
|
||||
"""Test that corrupted PDF data returns 500."""
|
||||
def test_corrupted_pdf_returns_400(self):
|
||||
"""Test that corrupted PDF data returns 400 with specific error."""
|
||||
mock_webdav = AsyncMock()
|
||||
# Return invalid PDF bytes
|
||||
mock_webdav.read_file = AsyncMock(
|
||||
@@ -496,9 +496,13 @@ class TestPdfPreviewEdgeCases:
|
||||
headers={"Authorization": "Bearer test-token"},
|
||||
)
|
||||
|
||||
assert response.status_code == 500
|
||||
assert response.status_code == 400
|
||||
data = response.json()
|
||||
assert data["success"] is False
|
||||
assert (
|
||||
"corrupted" in data["error"].lower()
|
||||
or "invalid" in data["error"].lower()
|
||||
)
|
||||
|
||||
def test_boundary_scale_values(self):
|
||||
"""Test boundary scale values (min and max)."""
|
||||
@@ -543,3 +547,170 @@ class TestPdfPreviewEdgeCases:
|
||||
headers={"Authorization": "Bearer test-token"},
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
class TestPdfPreviewSecurityValidation:
|
||||
"""Tests for security validations in PDF preview endpoint."""
|
||||
|
||||
def test_path_traversal_returns_400(self):
|
||||
"""Test that path traversal attempts are blocked with 400."""
|
||||
with (
|
||||
patch(
|
||||
"nextcloud_mcp_server.api.management.validate_token_and_get_user",
|
||||
new_callable=AsyncMock,
|
||||
return_value=("testuser", True),
|
||||
),
|
||||
patch(
|
||||
"nextcloud_mcp_server.api.management.extract_bearer_token",
|
||||
return_value="test-token",
|
||||
),
|
||||
):
|
||||
app = create_test_app()
|
||||
client = TestClient(app)
|
||||
|
||||
# Test various path traversal patterns
|
||||
traversal_paths = [
|
||||
"/Documents/../../../etc/passwd",
|
||||
"/../secret.pdf",
|
||||
"/folder/..%2F..%2Fetc/passwd", # URL-encoded
|
||||
"/test/../secret.pdf",
|
||||
]
|
||||
|
||||
for path in traversal_paths:
|
||||
response = client.get(
|
||||
f"/api/v1/pdf-preview?file_path={path}",
|
||||
headers={"Authorization": "Bearer test-token"},
|
||||
)
|
||||
assert response.status_code == 400, (
|
||||
f"Path traversal not blocked: {path}"
|
||||
)
|
||||
data = response.json()
|
||||
assert data["success"] is False
|
||||
assert "invalid file path" in data["error"].lower()
|
||||
|
||||
def test_file_size_limit_exceeded_returns_413(self):
|
||||
"""Test that files exceeding 50MB limit return 413."""
|
||||
# Create bytes larger than 50MB limit
|
||||
large_pdf_bytes = b"x" * (51 * 1024 * 1024) # 51 MB
|
||||
|
||||
mock_webdav = AsyncMock()
|
||||
mock_webdav.read_file = AsyncMock(
|
||||
return_value=(large_pdf_bytes, "application/pdf")
|
||||
)
|
||||
|
||||
mock_nc_client = MagicMock()
|
||||
mock_nc_client.webdav = mock_webdav
|
||||
mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client)
|
||||
mock_nc_client.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"nextcloud_mcp_server.api.management.validate_token_and_get_user",
|
||||
new_callable=AsyncMock,
|
||||
return_value=("testuser", True),
|
||||
),
|
||||
patch(
|
||||
"nextcloud_mcp_server.api.management.extract_bearer_token",
|
||||
return_value="test-token",
|
||||
),
|
||||
patch(
|
||||
"nextcloud_mcp_server.client.NextcloudClient.from_token",
|
||||
return_value=mock_nc_client,
|
||||
),
|
||||
):
|
||||
app = create_test_app()
|
||||
client = TestClient(app)
|
||||
response = client.get(
|
||||
"/api/v1/pdf-preview?file_path=/large.pdf",
|
||||
headers={"Authorization": "Bearer test-token"},
|
||||
)
|
||||
|
||||
assert response.status_code == 413
|
||||
data = response.json()
|
||||
assert data["success"] is False
|
||||
assert "size limit" in data["error"].lower()
|
||||
|
||||
def test_corrupted_pdf_returns_400(self):
|
||||
"""Test that corrupted PDF returns 400 with specific error message."""
|
||||
# Invalid PDF content that PyMuPDF cannot parse
|
||||
corrupted_pdf_bytes = b"not a valid PDF file content"
|
||||
|
||||
mock_webdav = AsyncMock()
|
||||
mock_webdav.read_file = AsyncMock(
|
||||
return_value=(corrupted_pdf_bytes, "application/pdf")
|
||||
)
|
||||
|
||||
mock_nc_client = MagicMock()
|
||||
mock_nc_client.webdav = mock_webdav
|
||||
mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client)
|
||||
mock_nc_client.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"nextcloud_mcp_server.api.management.validate_token_and_get_user",
|
||||
new_callable=AsyncMock,
|
||||
return_value=("testuser", True),
|
||||
),
|
||||
patch(
|
||||
"nextcloud_mcp_server.api.management.extract_bearer_token",
|
||||
return_value="test-token",
|
||||
),
|
||||
patch(
|
||||
"nextcloud_mcp_server.client.NextcloudClient.from_token",
|
||||
return_value=mock_nc_client,
|
||||
),
|
||||
):
|
||||
app = create_test_app()
|
||||
client = TestClient(app)
|
||||
response = client.get(
|
||||
"/api/v1/pdf-preview?file_path=/corrupted.pdf",
|
||||
headers={"Authorization": "Bearer test-token"},
|
||||
)
|
||||
|
||||
assert response.status_code == 400
|
||||
data = response.json()
|
||||
assert data["success"] is False
|
||||
assert (
|
||||
"corrupted" in data["error"].lower()
|
||||
or "invalid" in data["error"].lower()
|
||||
)
|
||||
|
||||
def test_empty_pdf_returns_400(self):
|
||||
"""Test that empty PDF file returns 400."""
|
||||
empty_pdf_bytes = b""
|
||||
|
||||
mock_webdav = AsyncMock()
|
||||
mock_webdav.read_file = AsyncMock(
|
||||
return_value=(empty_pdf_bytes, "application/pdf")
|
||||
)
|
||||
|
||||
mock_nc_client = MagicMock()
|
||||
mock_nc_client.webdav = mock_webdav
|
||||
mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client)
|
||||
mock_nc_client.__aexit__ = AsyncMock(return_value=None)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"nextcloud_mcp_server.api.management.validate_token_and_get_user",
|
||||
new_callable=AsyncMock,
|
||||
return_value=("testuser", True),
|
||||
),
|
||||
patch(
|
||||
"nextcloud_mcp_server.api.management.extract_bearer_token",
|
||||
return_value="test-token",
|
||||
),
|
||||
patch(
|
||||
"nextcloud_mcp_server.client.NextcloudClient.from_token",
|
||||
return_value=mock_nc_client,
|
||||
),
|
||||
):
|
||||
app = create_test_app()
|
||||
client = TestClient(app)
|
||||
response = client.get(
|
||||
"/api/v1/pdf-preview?file_path=/empty.pdf",
|
||||
headers={"Authorization": "Bearer test-token"},
|
||||
)
|
||||
|
||||
assert response.status_code == 400
|
||||
data = response.json()
|
||||
assert data["success"] is False
|
||||
|
||||
Reference in New Issue
Block a user