diff --git a/CLAUDE.md b/CLAUDE.md index a2441f7..343678d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -239,6 +239,25 @@ uv run python -m tests.load.benchmark --output results.json --verbose **Credentials**: root/password, nextcloud/password, database: `nextcloud` +### Quick Query Script (Recommended for Agents) + +Use `scripts/dbquery.py` for single SQL statements without requiring approval for each `docker compose exec`: + +```bash +# Basic query +./scripts/dbquery.py "SELECT COUNT(*) FROM oc_users" + +# Vertical output (one column per line) - useful for wide tables +./scripts/dbquery.py -E "SELECT * FROM oc_oidc_clients LIMIT 1" + +# With different credentials +./scripts/dbquery.py -u nextcloud -p nextcloud "SHOW TABLES" +``` + +### Direct Docker Access + +For interactive sessions or complex operations: + ```bash # Connect to database docker compose exec db mariadb -u root -ppassword nextcloud @@ -264,6 +283,40 @@ docker compose exec db mariadb -u root -ppassword nextcloud -e \ - `oc_oidc_registration_tokens` - RFC 7592 registration tokens - `oc_oidc_redirect_uris` - Redirect URIs +### SQLite Databases (MCP Services) + +Use `scripts/sqlitequery.py` to query SQLite databases in MCP service containers: + +```bash +# List tables +./scripts/sqlitequery.py ".tables" + +# Query specific service +./scripts/sqlitequery.py -s oauth "SELECT * FROM refresh_tokens" +./scripts/sqlitequery.py -s keycloak "SELECT * FROM oauth_clients" +./scripts/sqlitequery.py -s basic "SELECT * FROM app_passwords" + +# With column headers +./scripts/sqlitequery.py --column "SELECT * FROM audit_logs LIMIT 5" + +# JSON output +./scripts/sqlitequery.py --json "SELECT * FROM oauth_sessions" + +# View schema +./scripts/sqlitequery.py -s oauth ".schema refresh_tokens" +``` + +**Services**: `mcp` (default), `oauth`, `keycloak`, `basic` + +**SQLite Tables**: +- `refresh_tokens` - OAuth refresh tokens with user profiles +- `audit_logs` - Security audit trail +- `oauth_clients` - DCR OAuth client credentials +- `oauth_sessions` - OAuth flow session state +- `registered_webhooks` - Webhook registrations +- `app_passwords` - Multi-user BasicAuth passwords +- `alembic_version` - Migration tracking + ## Architecture Quick Reference **For detailed architecture, see:** diff --git a/nextcloud_mcp_server/api/__init__.py b/nextcloud_mcp_server/api/__init__.py index 0b11b6d..3cc242c 100644 --- a/nextcloud_mcp_server/api/__init__.py +++ b/nextcloud_mcp_server/api/__init__.py @@ -3,4 +3,74 @@ Provides REST endpoints for the Nextcloud PHP app to query server status, user sessions, and vector sync metrics. All endpoints use OAuth bearer token authentication via the UnifiedTokenVerifier. + +This package is organized into modules by domain: +- management.py: Server status, user sessions, shared helpers +- passwords.py: App password provisioning for multi-user BasicAuth +- webhooks.py: Webhook registration management +- visualization.py: Search and PDF visualization endpoints """ + +# Re-export all public functions for backward compatibility +from nextcloud_mcp_server.api.management import ( + __version__, + _parse_float_param, + _parse_int_param, + _sanitize_error_for_client, + _validate_query_string, + extract_bearer_token, + get_server_status, + get_user_session, + get_vector_sync_status, + revoke_user_access, + validate_token_and_get_user, +) +from nextcloud_mcp_server.api.passwords import ( + delete_app_password, + get_app_password_status, + provision_app_password, +) +from nextcloud_mcp_server.api.visualization import ( + get_chunk_context, + get_pdf_preview, + unified_search, + vector_search, +) +from nextcloud_mcp_server.api.webhooks import ( + create_webhook, + delete_webhook, + get_installed_apps, + list_webhooks, +) + +__all__ = [ + # Version + "__version__", + # Shared helpers (from management.py) + "extract_bearer_token", + "validate_token_and_get_user", + "_sanitize_error_for_client", + "_parse_int_param", + "_parse_float_param", + "_validate_query_string", + # Status endpoints (from management.py) + "get_server_status", + "get_vector_sync_status", + # Session endpoints (from management.py) + "get_user_session", + "revoke_user_access", + # Password endpoints (from passwords.py) + "provision_app_password", + "get_app_password_status", + "delete_app_password", + # Webhook endpoints (from webhooks.py) + "get_installed_apps", + "list_webhooks", + "create_webhook", + "delete_webhook", + # Visualization endpoints (from visualization.py) + "unified_search", + "vector_search", + "get_chunk_context", + "get_pdf_preview", +] diff --git a/nextcloud_mcp_server/api/management.py b/nextcloud_mcp_server/api/management.py index bfded9e..b569e4b 100644 --- a/nextcloud_mcp_server/api/management.py +++ b/nextcloud_mcp_server/api/management.py @@ -4,24 +4,22 @@ ADR-018: Provides REST API endpoints for the Nextcloud PHP app to query: - Server status and version - User session information and background access status - Vector sync metrics -- Vector search for visualization All endpoints use OAuth bearer token authentication via UnifiedTokenVerifier. The PHP app obtains tokens through PKCE flow and uses them to access these endpoints. + +Shared helper functions for other API modules are also exported from here: +- extract_bearer_token: Extract OAuth token from request +- validate_token_and_get_user: Validate token and get user ID +- _sanitize_error_for_client: Return safe error messages +- _parse_int_param, _parse_float_param, _validate_query_string: Parameter validation """ -import base64 import logging -import re import time -from collections import defaultdict from importlib.metadata import version -from typing import TYPE_CHECKING, Any +from typing import Any -import httpx - -if TYPE_CHECKING: - from nextcloud_mcp_server.auth.storage import RefreshTokenStorage from starlette.requests import Request from starlette.responses import JSONResponse @@ -31,23 +29,6 @@ logger = logging.getLogger(__name__) # Get package version from metadata __version__ = version("nextcloud-mcp-server") -# App password format regex (Nextcloud format: xxxxx-xxxxx-xxxxx-xxxxx-xxxxx) -APP_PASSWORD_PATTERN = re.compile( - r"^[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}$" -) - -# Timeout for Nextcloud API validation requests (seconds) -NEXTCLOUD_VALIDATION_TIMEOUT = 10.0 - -# Rate limiting configuration for app password provisioning -# Limits: 5 attempts per user per hour -RATE_LIMIT_MAX_ATTEMPTS = 5 -RATE_LIMIT_WINDOW_SECONDS = 3600 # 1 hour - -# In-memory rate limiter storage -# Structure: {user_id: [(timestamp, success), ...]} -_rate_limit_attempts: dict[str, list[tuple[float, bool]]] = defaultdict(list) - # Track server start time for uptime calculation _server_start_time = time.time() @@ -204,141 +185,6 @@ def _validate_query_string(query: str, max_length: int = 10000) -> None: raise ValueError(f"Query too long: maximum {max_length} characters") -async def _get_app_password_storage(request: Request) -> "RefreshTokenStorage": - """Get or initialize RefreshTokenStorage for app password operations. - - Checks app.state.storage first, then falls back to creating from environment. - This helper avoids repeated storage initialization logic across endpoints. - - Args: - request: Starlette request with app state - - Returns: - Initialized RefreshTokenStorage instance - """ - from nextcloud_mcp_server.auth.storage import RefreshTokenStorage - - storage = getattr(request.app.state, "storage", None) - - if not storage: - # Multi-user BasicAuth mode may not have oauth_context - # Initialize storage from environment - storage = RefreshTokenStorage.from_env() - await storage.initialize() - - return storage - - -def _check_rate_limit(user_id: str) -> tuple[bool, int]: - """Check if user is rate limited for app password operations. - - Implements a sliding window rate limiter to prevent brute-force attacks - on the app password provisioning endpoint. - - Args: - user_id: User identifier to check - - Returns: - Tuple of (is_allowed, seconds_until_retry) - - is_allowed: True if request should be allowed - - seconds_until_retry: Seconds to wait if rate limited (0 if allowed) - """ - current_time = time.time() - window_start = current_time - RATE_LIMIT_WINDOW_SECONDS - - # Clean up old attempts outside the window - _rate_limit_attempts[user_id] = [ - (ts, success) - for ts, success in _rate_limit_attempts[user_id] - if ts > window_start - ] - - # Count recent attempts (both successful and failed) - recent_attempts = len(_rate_limit_attempts[user_id]) - - if recent_attempts >= RATE_LIMIT_MAX_ATTEMPTS: - # Find when the oldest attempt in the window will expire - oldest_attempt = min(ts for ts, _ in _rate_limit_attempts[user_id]) - seconds_until_retry = int( - oldest_attempt + RATE_LIMIT_WINDOW_SECONDS - current_time - ) - return False, max(1, seconds_until_retry) - - return True, 0 - - -def _record_rate_limit_attempt(user_id: str, success: bool) -> None: - """Record an app password provisioning attempt for rate limiting. - - Args: - user_id: User identifier - success: Whether the attempt was successful - """ - _rate_limit_attempts[user_id].append((time.time(), success)) - - -def _extract_basic_auth( - request: Request, path_user_id: str -) -> tuple[str, str, JSONResponse | None]: - """Extract and validate BasicAuth credentials from request. - - Validates: - 1. Authorization header is present and valid BasicAuth format - 2. Username in credentials matches the path user_id - - Args: - request: Starlette request with Authorization header - path_user_id: User ID from the URL path to verify against - - Returns: - Tuple of (username, password, error_response) - - If successful: (username, password, None) - - If failed: ("", "", JSONResponse with error) - """ - auth_header = request.headers.get("Authorization") - - if not auth_header or not auth_header.startswith("Basic "): - return ( - "", - "", - JSONResponse( - {"success": False, "error": "Missing BasicAuth credentials"}, - status_code=401, - ), - ) - - try: - # Decode BasicAuth - encoded = auth_header.split(" ", 1)[1] - decoded = base64.b64decode(encoded).decode("utf-8") - username, password = decoded.split(":", 1) - except Exception: - return ( - "", - "", - JSONResponse( - {"success": False, "error": "Invalid BasicAuth format"}, - status_code=401, - ), - ) - - # Verify username matches path user_id - if username != path_user_id: - logger.warning( - f"Username mismatch in app password operation for path user {path_user_id}" - ) - return ( - "", - "", - JSONResponse( - {"success": False, "error": "Username does not match path user_id"}, - status_code=403, - ), - ) - - return username, password, None - - async def get_server_status(request: Request) -> JSONResponse: """GET /api/v1/status - Server status and version. @@ -671,1157 +517,3 @@ async def revoke_user_access(request: Request) -> JSONResponse: {"success": False, "error": error_msg}, status_code=500, ) - - -async def provision_app_password(request: Request) -> JSONResponse: - """POST /api/v1/users/{user_id}/app-password - Store app password for background sync. - - This endpoint is used by Astrolabe (Nextcloud PHP app) to provision app passwords - for multi-user BasicAuth mode background sync. - - The request must include BasicAuth credentials where: - - username: Nextcloud user ID (must match path user_id) - - password: The app password being provisioned - - The MCP server validates the app password against Nextcloud before storing it. - This proves the user owns the password and has access to Nextcloud. - - Security model: - - User identity is verified via BasicAuth against Nextcloud - - App password is encrypted before storage - - Only the user who owns the password can provision it - - Rate limited to prevent brute-force attacks - """ - from nextcloud_mcp_server.config import get_settings - - # Get user_id from path - path_user_id = request.path_params.get("user_id") - if not path_user_id: - return JSONResponse( - {"success": False, "error": "Missing user_id in path"}, - status_code=400, - ) - - # Check rate limit before processing - is_allowed, retry_after = _check_rate_limit(path_user_id) - if not is_allowed: - logger.warning( - f"Rate limit exceeded for app password provisioning: {path_user_id}" - ) - return JSONResponse( - { - "success": False, - "error": f"Rate limit exceeded. Try again in {retry_after} seconds.", - }, - status_code=429, - headers={"Retry-After": str(retry_after)}, - ) - - # Extract and validate BasicAuth credentials - username, app_password, error_response = _extract_basic_auth(request, path_user_id) - if error_response is not None: - _record_rate_limit_attempt(path_user_id, success=False) - return error_response - - # Validate app password format - if not APP_PASSWORD_PATTERN.match(app_password): - _record_rate_limit_attempt(path_user_id, success=False) - return JSONResponse( - {"success": False, "error": "Invalid app password format"}, - status_code=400, - ) - - # Get Nextcloud host from settings - settings = get_settings() - nextcloud_host = settings.nextcloud_host - - if not nextcloud_host: - logger.error("NEXTCLOUD_HOST not configured") - return JSONResponse( - {"success": False, "error": "Server not configured"}, - status_code=500, - ) - - # Validate app password against Nextcloud - try: - async with httpx.AsyncClient(timeout=NEXTCLOUD_VALIDATION_TIMEOUT) as client: - # Use OCS API to verify credentials - test_url = f"{nextcloud_host}/ocs/v1.php/cloud/user" - response = await client.get( - test_url, - auth=(username, app_password), - params={"format": "json"}, - headers={"OCS-APIRequest": "true"}, - ) - - if response.status_code != 200: - logger.warning( - f"App password validation failed for user: HTTP {response.status_code}" - ) - _record_rate_limit_attempt(path_user_id, success=False) - return JSONResponse( - {"success": False, "error": "Invalid app password"}, - status_code=401, - ) - - # Verify the user ID from response matches - data = response.json() - ocs_user_id = data.get("ocs", {}).get("data", {}).get("id") - if ocs_user_id != username: - logger.warning("User ID mismatch in OCS response") - _record_rate_limit_attempt(path_user_id, success=False) - return JSONResponse( - {"success": False, "error": "User ID mismatch"}, - status_code=403, - ) - - except httpx.RequestError as e: - logger.error(f"Failed to validate app password: {e}") - return JSONResponse( - {"success": False, "error": "Failed to validate credentials"}, - status_code=500, - ) - - # Store the validated app password - try: - storage = await _get_app_password_storage(request) - await storage.store_app_password(username, app_password) - - _record_rate_limit_attempt(path_user_id, success=True) - logger.info(f"Provisioned app password for user: {username}") - - return JSONResponse( - { - "success": True, - "message": f"App password stored for {username}", - } - ) - - except Exception as e: - error_msg = _sanitize_error_for_client(e, "provision_app_password") - return JSONResponse( - {"success": False, "error": error_msg}, - status_code=500, - ) - - -async def get_app_password_status(request: Request) -> JSONResponse: - """GET /api/v1/users/{user_id}/app-password - Check if user has provisioned app password. - - Returns status of background sync access for multi-user BasicAuth mode. - - Requires BasicAuth with the user's app password for authentication. - """ - # Get user_id from path - path_user_id = request.path_params.get("user_id") - if not path_user_id: - return JSONResponse( - {"success": False, "error": "Missing user_id in path"}, - status_code=400, - ) - - # Extract and validate BasicAuth credentials - username, _, error_response = _extract_basic_auth(request, path_user_id) - if error_response is not None: - return error_response - - try: - storage = await _get_app_password_storage(request) - app_password = await storage.get_app_password(username) - - return JSONResponse( - { - "success": True, - "user_id": username, - "has_app_password": app_password is not None, - } - ) - - except Exception as e: - error_msg = _sanitize_error_for_client(e, "get_app_password_status") - return JSONResponse( - {"success": False, "error": error_msg}, - status_code=500, - ) - - -async def delete_app_password(request: Request) -> JSONResponse: - """DELETE /api/v1/users/{user_id}/app-password - Delete stored app password. - - Removes the user's app password from MCP server storage. - - Requires BasicAuth with the user's credentials. - """ - from nextcloud_mcp_server.config import get_settings - - # Get user_id from path - path_user_id = request.path_params.get("user_id") - if not path_user_id: - return JSONResponse( - {"success": False, "error": "Missing user_id in path"}, - status_code=400, - ) - - # Extract and validate BasicAuth credentials - username, password, error_response = _extract_basic_auth(request, path_user_id) - if error_response is not None: - return error_response - - # Validate credentials against Nextcloud - settings = get_settings() - nextcloud_host = settings.nextcloud_host - - try: - async with httpx.AsyncClient(timeout=NEXTCLOUD_VALIDATION_TIMEOUT) as client: - test_url = f"{nextcloud_host}/ocs/v1.php/cloud/user" - response = await client.get( - test_url, - auth=(username, password), - params={"format": "json"}, - headers={"OCS-APIRequest": "true"}, - ) - - if response.status_code != 200: - return JSONResponse( - {"success": False, "error": "Invalid credentials"}, - status_code=401, - ) - except httpx.RequestError as e: - logger.error(f"Failed to validate credentials: {e}") - return JSONResponse( - {"success": False, "error": "Failed to validate credentials"}, - status_code=500, - ) - - try: - storage = await _get_app_password_storage(request) - deleted = await storage.delete_app_password(username) - - if deleted: - logger.info(f"Deleted app password for user: {username}") - return JSONResponse( - { - "success": True, - "message": f"App password deleted for {username}", - } - ) - else: - return JSONResponse( - { - "success": True, - "message": "No app password found to delete", - } - ) - - except Exception as e: - error_msg = _sanitize_error_for_client(e, "delete_app_password") - return JSONResponse( - {"success": False, "error": error_msg}, - status_code=500, - ) - - -async def get_installed_apps(request: Request) -> JSONResponse: - """GET /api/v1/apps - Get list of installed Nextcloud apps. - - Returns a list of installed app IDs for filtering webhook presets. - - Requires OAuth bearer token for authentication. - """ - try: - # Validate OAuth token and extract user - user_id, validated = await validate_token_and_get_user(request) - except Exception as e: - logger.warning(f"Unauthorized access to /api/v1/apps: {e}") - return JSONResponse( - { - "error": "Unauthorized", - "message": _sanitize_error_for_client(e, "get_installed_apps"), - }, - status_code=401, - ) - - try: - # Get Bearer token from request - token = extract_bearer_token(request) - if not token: - raise ValueError("Missing Authorization header") - - # Get Nextcloud host from OAuth context - oauth_ctx = request.app.state.oauth_context - nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") - - if not nextcloud_host: - raise ValueError("Nextcloud host not configured") - - # Create authenticated HTTP client - async with httpx.AsyncClient( - base_url=nextcloud_host, - headers={"Authorization": f"Bearer {token}"}, - timeout=30.0, - ) as client: - # Get installed apps using OCS API - # Notes, Calendar, Deck, Tables, etc. are apps that support webhooks - # We check which ones are installed and enabled - ocs_url = "/ocs/v1.php/cloud/apps" - params = {"filter": "enabled"} - - response = await client.get( - ocs_url, - params=params, - headers={"OCS-APIRequest": "true", "Accept": "application/json"}, - ) - - if response.status_code != 200: - raise ValueError(f"OCS API returned status {response.status_code}") - - data = response.json() - apps = data.get("ocs", {}).get("data", {}).get("apps", []) - - return JSONResponse({"apps": apps}) - - except Exception as e: - logger.error(f"Error getting installed apps for user {user_id}: {e}") - return JSONResponse( - { - "error": "Internal error", - "message": _sanitize_error_for_client(e, "get_installed_apps"), - }, - status_code=500, - ) - - -async def list_webhooks(request: Request) -> JSONResponse: - """GET /api/v1/webhooks - List all registered webhooks. - - Returns list of webhook registrations for the authenticated user. - - Requires OAuth bearer token for authentication. - """ - try: - # Validate OAuth token and extract user - user_id, validated = await validate_token_and_get_user(request) - except Exception as e: - logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}") - return JSONResponse( - { - "error": "Unauthorized", - "message": _sanitize_error_for_client(e, "list_webhooks"), - }, - status_code=401, - ) - - try: - from nextcloud_mcp_server.client.webhooks import WebhooksClient - - # Get Bearer token from request - token = extract_bearer_token(request) - if not token: - raise ValueError("Missing Authorization header") - - # Get Nextcloud host from OAuth context - oauth_ctx = request.app.state.oauth_context - nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") - - if not nextcloud_host: - raise ValueError("Nextcloud host not configured") - - # Create authenticated HTTP client - async with httpx.AsyncClient( - base_url=nextcloud_host, - headers={"Authorization": f"Bearer {token}"}, - timeout=30.0, - ) as client: - # Use WebhooksClient to list webhooks - webhooks_client = WebhooksClient(client, user_id) - webhooks = await webhooks_client.list_webhooks() - - return JSONResponse({"webhooks": webhooks}) - - except Exception as e: - logger.error(f"Error listing webhooks for user {user_id}: {e}") - return JSONResponse( - { - "error": "Internal error", - "message": _sanitize_error_for_client(e, "list_webhooks"), - }, - status_code=500, - ) - - -async def create_webhook(request: Request) -> JSONResponse: - """POST /api/v1/webhooks - Create a new webhook registration. - - Request body: - { - "event": "OCP\\Files\\Events\\Node\\NodeCreatedEvent", - "uri": "http://mcp:8000/webhooks/nextcloud", - "eventFilter": {"event.node.path": "/^\\/.*\\/files\\/Notes\\//"} - } - - Returns the created webhook data including the webhook ID. - - Requires OAuth bearer token for authentication. - """ - try: - # Validate OAuth token and extract user - user_id, validated = await validate_token_and_get_user(request) - except Exception as e: - logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}") - return JSONResponse( - { - "error": "Unauthorized", - "message": _sanitize_error_for_client(e, "create_webhook"), - }, - status_code=401, - ) - - try: - from nextcloud_mcp_server.client.webhooks import WebhooksClient - - # Parse request body - body = await request.json() - event = body.get("event") - uri = body.get("uri") - # Accept both camelCase (eventFilter) and snake_case (event_filter) - event_filter = body.get("eventFilter") or body.get("event_filter") - - if not event or not uri: - return JSONResponse( - { - "error": "Bad request", - "message": "Missing required fields: event, uri", - }, - status_code=400, - ) - - # Get Bearer token from request - token = extract_bearer_token(request) - if not token: - raise ValueError("Missing Authorization header") - - # Get Nextcloud host from OAuth context - oauth_ctx = request.app.state.oauth_context - nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") - - if not nextcloud_host: - raise ValueError("Nextcloud host not configured") - - # Create authenticated HTTP client - async with httpx.AsyncClient( - base_url=nextcloud_host, - headers={"Authorization": f"Bearer {token}"}, - timeout=30.0, - ) as client: - # Use WebhooksClient to create webhook - webhooks_client = WebhooksClient(client, user_id) - webhook_data = await webhooks_client.create_webhook( - event=event, uri=uri, event_filter=event_filter - ) - - return JSONResponse({"webhook": webhook_data}) - - except Exception as e: - logger.error(f"Error creating webhook for user {user_id}: {e}") - return JSONResponse( - { - "error": "Internal error", - "message": _sanitize_error_for_client(e, "create_webhook"), - }, - status_code=500, - ) - - -async def delete_webhook(request: Request) -> JSONResponse: - """DELETE /api/v1/webhooks/{webhook_id} - Delete a webhook registration. - - Returns success/failure status. - - Requires OAuth bearer token for authentication. - """ - try: - # Validate OAuth token and extract user - user_id, validated = await validate_token_and_get_user(request) - except Exception as e: - logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}") - return JSONResponse( - { - "error": "Unauthorized", - "message": _sanitize_error_for_client(e, "delete_webhook"), - }, - status_code=401, - ) - - try: - from nextcloud_mcp_server.client.webhooks import WebhooksClient - - # Get webhook_id from path parameter - webhook_id = request.path_params.get("webhook_id") - if not webhook_id: - return JSONResponse( - {"error": "Bad request", "message": "Missing webhook_id"}, - status_code=400, - ) - - try: - webhook_id = int(webhook_id) - except ValueError: - return JSONResponse( - {"error": "Bad request", "message": "Invalid webhook_id"}, - status_code=400, - ) - - # Get Bearer token from request - token = extract_bearer_token(request) - if not token: - raise ValueError("Missing Authorization header") - - # Get Nextcloud host from OAuth context - oauth_ctx = request.app.state.oauth_context - nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") - - if not nextcloud_host: - raise ValueError("Nextcloud host not configured") - - # Create authenticated HTTP client - async with httpx.AsyncClient( - base_url=nextcloud_host, - headers={"Authorization": f"Bearer {token}"}, - timeout=30.0, - ) as client: - # Use WebhooksClient to delete webhook - webhooks_client = WebhooksClient(client, user_id) - await webhooks_client.delete_webhook(webhook_id=webhook_id) - - return JSONResponse({"success": True, "message": "Webhook deleted"}) - - except Exception as e: - logger.error(f"Error deleting webhook for user {user_id}: {e}") - return JSONResponse( - { - "error": "Internal error", - "message": _sanitize_error_for_client(e, "delete_webhook"), - }, - status_code=500, - ) - - -async def unified_search(request: Request) -> JSONResponse: - """POST /api/v1/search - Search endpoint for Nextcloud Unified Search. - - Optimized search endpoint for the Nextcloud Unified Search provider - and other PHP app integrations. Returns results with metadata needed - for navigation to source documents. - - Request body: - { - "query": "search query", - "algorithm": "semantic|bm25|hybrid", // default: hybrid - "limit": 20, // max: 100 - "offset": 0, // pagination offset - "include_pca": false, // optional PCA coordinates - "include_chunks": true // include text snippets - } - - Response: - { - "results": [{ - "id": "doc123", - "doc_type": "note", - "title": "Document Title", - "excerpt": "Matching text snippet...", - "score": 0.85, - "path": "/path/to/file.txt", // for files - "board_id": 1, // for deck cards - "card_id": 42 - }], - "total_found": 150, - "algorithm_used": "hybrid" - } - - Requires OAuth bearer token for user filtering. - """ - from nextcloud_mcp_server.config import get_settings - - settings = get_settings() - if not settings.vector_sync_enabled: - return JSONResponse( - {"error": "Vector sync is disabled on this server"}, - status_code=404, - ) - - # Validate OAuth token and extract user - try: - user_id, _validated = await validate_token_and_get_user(request) - except Exception as e: - logger.warning(f"Unauthorized access to /api/v1/search: {e}") - return JSONResponse( - { - "error": "Unauthorized", - "message": _sanitize_error_for_client(e, "unified_search"), - }, - status_code=401, - ) - - try: - # Parse request body - body = await request.json() - - # Validate and parse parameters - try: - query = body.get("query", "") - _validate_query_string(query, max_length=10000) - - limit = _parse_int_param( - str(body.get("limit")) if body.get("limit") is not None else None, - 20, - 1, - 100, - "limit", - ) - - offset = _parse_int_param( - str(body.get("offset")) if body.get("offset") is not None else None, - 0, - 0, - 1000000, - "offset", - ) - - score_threshold = _parse_float_param( - body.get("score_threshold"), - 0.0, - 0.0, - 1.0, - "score_threshold", - ) - except ValueError as e: - return JSONResponse({"error": str(e)}, status_code=400) - - algorithm = body.get("algorithm", "hybrid") - fusion = body.get("fusion", "rrf") - include_pca = body.get("include_pca", False) - include_chunks = body.get("include_chunks", True) - doc_types = body.get("doc_types") # Optional filter - - if not query: - return JSONResponse({"results": [], "total_found": 0}) - - # Validate algorithm - valid_algorithms = {"semantic", "bm25", "hybrid"} - if algorithm not in valid_algorithms: - algorithm = "hybrid" - - # Validate fusion method - valid_fusions = {"rrf", "dbsf"} - if fusion not in valid_fusions: - fusion = "rrf" - - # Execute search using the appropriate algorithm - from nextcloud_mcp_server.search import ( - BM25HybridSearchAlgorithm, - SemanticSearchAlgorithm, - ) - - # Select search algorithm - if algorithm == "semantic": - search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold) - else: - search_algo = BM25HybridSearchAlgorithm( - score_threshold=score_threshold, fusion=fusion - ) - - # Request extra results to handle offset - search_limit = limit + offset - - # Execute search - all_results = [] - if doc_types and isinstance(doc_types, list): - for doc_type in doc_types: - if doc_type: - results = await search_algo.search( - query=query, - user_id=user_id, - limit=search_limit, - doc_type=doc_type, - ) - all_results.extend(results) - all_results.sort(key=lambda r: r.score, reverse=True) - else: - all_results = await search_algo.search( - query=query, - user_id=user_id, - limit=search_limit, - ) - - # Sort results by score (no deduplication - show all chunks) - sorted_results = sorted(all_results, key=lambda r: r.score, reverse=True) - - # Calculate total and apply pagination - total_found = len(sorted_results) - paginated_results = sorted_results[offset : offset + limit] - - # Format results for Unified Search - formatted_results = [] - for result in paginated_results: - # Get document ID (prefer note_id for notes) - doc_id = result.id - if result.metadata and "note_id" in result.metadata: - doc_id = result.metadata["note_id"] - - result_data: dict[str, Any] = { - "id": doc_id, - "doc_type": result.doc_type, - "title": result.title, - "score": result.score, - } - - # Include excerpt/chunk if requested (full content, no truncation) - if include_chunks and result.excerpt: - result_data["excerpt"] = result.excerpt - - # Include navigation metadata from result.metadata - if result.metadata: - # File path and mimetype for files - if "path" in result.metadata: - result_data["path"] = result.metadata["path"] - if "mime_type" in result.metadata: - result_data["mime_type"] = result.metadata["mime_type"] - - # Deck card navigation - if "board_id" in result.metadata: - result_data["board_id"] = result.metadata["board_id"] - if "card_id" in result.metadata: - result_data["card_id"] = result.metadata["card_id"] - - # Calendar event metadata - if "calendar_id" in result.metadata: - result_data["calendar_id"] = result.metadata["calendar_id"] - if "event_uid" in result.metadata: - result_data["event_uid"] = result.metadata["event_uid"] - - # Add PDF page metadata - if result.page_number is not None: - result_data["page_number"] = result.page_number - if result.page_count is not None: - result_data["page_count"] = result.page_count - - # Add chunk metadata (always present, defaults to 0 and 1) - result_data["chunk_index"] = result.chunk_index - result_data["total_chunks"] = result.total_chunks - - # Add chunk offsets for modal navigation - if result.chunk_start_offset is not None: - result_data["chunk_start_offset"] = result.chunk_start_offset - if result.chunk_end_offset is not None: - result_data["chunk_end_offset"] = result.chunk_end_offset - - formatted_results.append(result_data) - - response_data: dict[str, Any] = { - "results": formatted_results, - "total_found": total_found, - "algorithm_used": algorithm, - } - - # Optional PCA coordinates - if include_pca and len(paginated_results) >= 2: - try: - from nextcloud_mcp_server.vector.visualization import ( - compute_pca_coordinates, - ) - - if search_algo.query_embedding is not None: - query_embedding = search_algo.query_embedding - else: - from nextcloud_mcp_server.embedding.service import ( - get_embedding_service, - ) - - embedding_service = get_embedding_service() - query_embedding = await embedding_service.embed(query) - - pca_data = await compute_pca_coordinates( - paginated_results, query_embedding - ) - response_data["pca_data"] = pca_data - except Exception as e: - logger.warning(f"Failed to compute PCA for unified search: {e}") - - return JSONResponse(response_data) - - except Exception as e: - logger.error(f"Error in unified search: {e}") - return JSONResponse( - { - "error": "Internal error", - "message": _sanitize_error_for_client(e, "unified_search"), - }, - status_code=500, - ) - - -async def vector_search(request: Request) -> JSONResponse: - """POST /api/v1/vector-viz/search - Vector search for visualization. - - Executes semantic search and returns results with optional PCA coordinates - for 2D visualization. - - Request body: - { - "query": "search query", - "algorithm": "semantic|bm25|hybrid", // default: hybrid - "limit": 10, // max: 50 - "include_pca": true, // whether to include 2D coordinates - "doc_types": ["note", "file"] // optional filter by document types - } - - Requires OAuth bearer token for user filtering. - """ - from nextcloud_mcp_server.config import get_settings - - settings = get_settings() - if not settings.vector_sync_enabled: - return JSONResponse( - {"error": "Vector sync is disabled on this server"}, - status_code=404, - ) - - # Validate OAuth token and extract user - try: - user_id, _validated = await validate_token_and_get_user(request) - except Exception as e: - logger.warning(f"Unauthorized access to /api/v1/vector-viz/search: {e}") - return JSONResponse( - { - "error": "Unauthorized", - "message": _sanitize_error_for_client(e, "vector_search"), - }, - status_code=401, - ) - - try: - # Parse request body - body = await request.json() - query = body.get("query", "") - algorithm = body.get("algorithm", "hybrid") - fusion = body.get("fusion", "rrf") - score_threshold = body.get("score_threshold", 0.0) - limit = min(body.get("limit", 10), 50) # Enforce max limit - include_pca = body.get("include_pca", True) - doc_types = body.get("doc_types") # Optional list of document types - - if not query: - return JSONResponse( - {"error": "Missing required parameter: query"}, - status_code=400, - ) - - # Validate algorithm - valid_algorithms = {"semantic", "bm25", "hybrid"} - if algorithm not in valid_algorithms: - algorithm = "hybrid" - - # Validate fusion method - valid_fusions = {"rrf", "dbsf"} - if fusion not in valid_fusions: - fusion = "rrf" - - # Execute search using the appropriate algorithm - from nextcloud_mcp_server.search import ( - BM25HybridSearchAlgorithm, - SemanticSearchAlgorithm, - ) - - # Select search algorithm - if algorithm == "semantic": - search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold) - else: - # Both "hybrid" and "bm25" use the BM25HybridSearchAlgorithm - # which combines dense semantic and sparse BM25 vectors - search_algo = BM25HybridSearchAlgorithm( - score_threshold=score_threshold, fusion=fusion - ) - - # Execute search for each doc_type if specified, otherwise search all - all_results = [] - if doc_types and isinstance(doc_types, list): - # Search each doc_type separately and merge results - for doc_type in doc_types: - if doc_type: # Skip empty strings - results = await search_algo.search( - query=query, - user_id=user_id, - limit=limit, - doc_type=doc_type, - ) - all_results.extend(results) - # Sort merged results by score and limit - all_results.sort(key=lambda r: r.score, reverse=True) - all_results = all_results[:limit] - else: - # Search all document types - all_results = await search_algo.search( - query=query, - user_id=user_id, - limit=limit, - ) - - # Format results for PHP client - formatted_results = [] - for result in all_results: - formatted_result = { - "id": result.id, - "doc_type": result.doc_type, - "title": result.title, - "excerpt": result.excerpt[:200] if result.excerpt else "", - "score": result.score, - "metadata": result.metadata, - # Chunk information for context display - "chunk_index": result.chunk_index, - "total_chunks": result.total_chunks, - } - # Include optional fields if present - if result.chunk_start_offset is not None: - formatted_result["chunk_start_offset"] = result.chunk_start_offset - if result.chunk_end_offset is not None: - formatted_result["chunk_end_offset"] = result.chunk_end_offset - if result.page_number is not None: - formatted_result["page_number"] = result.page_number - if result.page_count is not None: - formatted_result["page_count"] = result.page_count - formatted_results.append(formatted_result) - - response_data: dict[str, Any] = { - "results": formatted_results, - "algorithm_used": algorithm, - "total_documents": len(formatted_results), - } - - # Compute PCA coordinates for visualization using shared function - if include_pca and len(all_results) >= 2: - try: - from nextcloud_mcp_server.vector.visualization import ( - compute_pca_coordinates, - ) - - # Get query embedding from search algorithm or generate it - if search_algo.query_embedding is not None: - query_embedding = search_algo.query_embedding - else: - from nextcloud_mcp_server.embedding.service import ( - get_embedding_service, - ) - - embedding_service = get_embedding_service() - query_embedding = await embedding_service.embed(query) - - pca_data = await compute_pca_coordinates(all_results, query_embedding) - response_data["coordinates_3d"] = pca_data["coordinates_3d"] - response_data["query_coords"] = pca_data["query_coords"] - if "pca_variance" in pca_data: - response_data["pca_variance"] = pca_data["pca_variance"] - except Exception as e: - logger.warning(f"Failed to compute PCA coordinates: {e}") - response_data["coordinates_3d"] = [] - response_data["query_coords"] = [] - elif include_pca: - # Not enough results for PCA - response_data["coordinates_3d"] = [] - response_data["query_coords"] = [] - - return JSONResponse(response_data) - - except Exception as e: - error_msg = _sanitize_error_for_client(e, "vector_search") - return JSONResponse( - {"error": error_msg}, - status_code=500, - ) - - -async def get_chunk_context(request: Request) -> JSONResponse: - """GET /api/v1/chunk-context - Fetch chunk text with context. - - Retrieves the matched chunk along with surrounding text and metadata. - Used by clients to display chunk context and highlighted PDFs. - - Query parameters: - doc_type: Document type (e.g., "note") - doc_id: Document ID - start: Chunk start offset (character position) - end: Chunk end offset (character position) - context: Characters of context before/after (default: 500) - - Requires OAuth bearer token for authentication. - """ - try: - # Validate OAuth token and extract user - user_id, validated = await validate_token_and_get_user(request) - except Exception as e: - logger.warning(f"Unauthorized access to /api/v1/chunk-context: {e}") - return JSONResponse( - { - "error": "Unauthorized", - "message": _sanitize_error_for_client(e, "get_chunk_context"), - }, - status_code=401, - ) - - try: - # Get query parameters - doc_type = request.query_params.get("doc_type") - doc_id = request.query_params.get("doc_id") - start_str = request.query_params.get("start") - end_str = request.query_params.get("end") - - # Validate required parameters - if not all([doc_type, doc_id, start_str, end_str]): - return JSONResponse( - { - "success": False, - "error": "Missing required parameters: doc_type, doc_id, start, end", - }, - status_code=400, - ) - - # Type narrowing: we already checked these are not None above - assert start_str is not None - assert end_str is not None - assert doc_id is not None - assert doc_type is not None - - # Parse and validate integer parameters with bounds checking - try: - context_chars = _parse_int_param( - request.query_params.get("context"), - 500, - 0, - 10000, - "context_chars", - ) - start = _parse_int_param(start_str, 0, 0, 10000000, "start") - end = _parse_int_param(end_str, 0, 0, 10000000, "end") - if end <= start: - raise ValueError("end must be greater than start") - except ValueError as e: - return JSONResponse({"success": False, "error": str(e)}, status_code=400) - # Convert doc_id to int if possible (most IDs are int) - doc_id_val: str | int = int(doc_id) if doc_id.isdigit() else doc_id - - # Get bearer token for client initialization - token = extract_bearer_token(request) - if not token: - raise ValueError("Missing token") - - # Get Nextcloud host from OAuth context - oauth_ctx = request.app.state.oauth_context - nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") - - if not nextcloud_host: - raise ValueError("Nextcloud host not configured") - - # Initialize authenticated Nextcloud client - from nextcloud_mcp_server.client import NextcloudClient - from nextcloud_mcp_server.search.context import get_chunk_with_context - - async with NextcloudClient.from_token( - base_url=nextcloud_host, token=token, username=user_id - ) as nc_client: - chunk_context = await get_chunk_with_context( - nc_client=nc_client, - user_id=user_id, - doc_id=doc_id_val, - doc_type=doc_type, - chunk_start=start, - chunk_end=end, - context_chars=context_chars, - ) - - if chunk_context is None: - return JSONResponse( - { - "success": False, - "error": f"Failed to fetch chunk context for {doc_type} {doc_id}", - }, - status_code=404, - ) - - # For PDF files, also fetch the highlighted page image from Qdrant if available - # This is useful for clients that want to show a pre-rendered image - highlighted_page_image = None - page_number = chunk_context.page_number - - if doc_type == "file": - try: - from qdrant_client.models import FieldCondition, Filter, MatchValue - - from nextcloud_mcp_server.config import get_settings - from nextcloud_mcp_server.vector.placeholder import ( - get_placeholder_filter, - ) - from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client - - settings = get_settings() - qdrant_client = await get_qdrant_client() - - # Query for this specific chunk's highlighted image - points_response = await qdrant_client.scroll( - collection_name=settings.get_collection_name(), - scroll_filter=Filter( - must=[ - get_placeholder_filter(), - FieldCondition( - key="doc_id", match=MatchValue(value=doc_id_val) - ), - FieldCondition( - key="user_id", match=MatchValue(value=user_id) - ), - FieldCondition( - key="chunk_start_offset", match=MatchValue(value=start) - ), - FieldCondition( - key="chunk_end_offset", match=MatchValue(value=end) - ), - ] - ), - limit=1, - with_vectors=False, - with_payload=["highlighted_page_image", "page_number"], - ) - - if points_response[0]: - payload = points_response[0][0].payload - if payload: - highlighted_page_image = payload.get("highlighted_page_image") - # Trust Qdrant page number if available (might be more accurate than context expansion logic) - if payload.get("page_number") is not None: - page_number = payload.get("page_number") - - except Exception as e: - logger.warning(f"Failed to fetch highlighted image: {e}") - - # Build response - response_data = { - "success": True, - "chunk_text": chunk_context.chunk_text, - "before_context": chunk_context.before_context, - "after_context": chunk_context.after_context, - "has_more_before": chunk_context.has_before_truncation, - "has_more_after": chunk_context.has_after_truncation, - "page_number": page_number, - "chunk_index": chunk_context.chunk_index, - "total_chunks": chunk_context.total_chunks, - } - - if highlighted_page_image: - response_data["highlighted_page_image"] = highlighted_page_image - - return JSONResponse(response_data) - - except Exception as e: - error_msg = _sanitize_error_for_client(e, "get_chunk_context") - return JSONResponse( - {"error": error_msg}, - status_code=500, - ) diff --git a/nextcloud_mcp_server/api/passwords.py b/nextcloud_mcp_server/api/passwords.py new file mode 100644 index 0000000..c2e9ad2 --- /dev/null +++ b/nextcloud_mcp_server/api/passwords.py @@ -0,0 +1,429 @@ +"""App password management API endpoints. + +Provides REST API endpoints for app password provisioning in multi-user BasicAuth mode. +These endpoints are used by the Nextcloud PHP app (Astrolabe) to: +- Store app passwords for background sync operations +- Check app password status +- Delete stored app passwords + +Authentication is via BasicAuth with the user's Nextcloud credentials. +Passwords are validated against Nextcloud before being stored. +""" + +import base64 +import logging +import re +import time +from collections import defaultdict +from typing import TYPE_CHECKING + +import httpx +from starlette.requests import Request +from starlette.responses import JSONResponse + +if TYPE_CHECKING: + from nextcloud_mcp_server.auth.storage import RefreshTokenStorage + +from nextcloud_mcp_server.api.management import _sanitize_error_for_client + +logger = logging.getLogger(__name__) + +# App password format regex (Nextcloud format: xxxxx-xxxxx-xxxxx-xxxxx-xxxxx) +APP_PASSWORD_PATTERN = re.compile( + r"^[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}$" +) + +# Timeout for Nextcloud API validation requests (seconds) +NEXTCLOUD_VALIDATION_TIMEOUT = 10.0 + +# Rate limiting configuration for app password provisioning +# Limits: 5 attempts per user per hour +RATE_LIMIT_MAX_ATTEMPTS = 5 +RATE_LIMIT_WINDOW_SECONDS = 3600 # 1 hour + +# In-memory rate limiter storage +# Structure: {user_id: [(timestamp, success), ...]} +_rate_limit_attempts: dict[str, list[tuple[float, bool]]] = defaultdict(list) + + +def _check_rate_limit(user_id: str) -> tuple[bool, int]: + """Check if user is rate limited for app password operations. + + Implements a sliding window rate limiter to prevent brute-force attacks + on the app password provisioning endpoint. + + Args: + user_id: User identifier to check + + Returns: + Tuple of (is_allowed, seconds_until_retry) + - is_allowed: True if request should be allowed + - seconds_until_retry: Seconds to wait if rate limited (0 if allowed) + """ + current_time = time.time() + window_start = current_time - RATE_LIMIT_WINDOW_SECONDS + + # Clean up old attempts outside the window + _rate_limit_attempts[user_id] = [ + (ts, success) + for ts, success in _rate_limit_attempts[user_id] + if ts > window_start + ] + + # Count recent attempts (both successful and failed) + recent_attempts = len(_rate_limit_attempts[user_id]) + + if recent_attempts >= RATE_LIMIT_MAX_ATTEMPTS: + # Find when the oldest attempt in the window will expire + oldest_attempt = min(ts for ts, _ in _rate_limit_attempts[user_id]) + seconds_until_retry = int( + oldest_attempt + RATE_LIMIT_WINDOW_SECONDS - current_time + ) + return False, max(1, seconds_until_retry) + + return True, 0 + + +def _record_rate_limit_attempt(user_id: str, success: bool) -> None: + """Record an app password provisioning attempt for rate limiting. + + Args: + user_id: User identifier + success: Whether the attempt was successful + """ + _rate_limit_attempts[user_id].append((time.time(), success)) + + +def _extract_basic_auth( + request: Request, path_user_id: str +) -> tuple[str, str, JSONResponse | None]: + """Extract and validate BasicAuth credentials from request. + + Validates: + 1. Authorization header is present and valid BasicAuth format + 2. Username in credentials matches the path user_id + + Args: + request: Starlette request with Authorization header + path_user_id: User ID from the URL path to verify against + + Returns: + Tuple of (username, password, error_response) + - If successful: (username, password, None) + - If failed: ("", "", JSONResponse with error) + """ + auth_header = request.headers.get("Authorization") + + if not auth_header or not auth_header.startswith("Basic "): + return ( + "", + "", + JSONResponse( + {"success": False, "error": "Missing BasicAuth credentials"}, + status_code=401, + ), + ) + + try: + # Decode BasicAuth + encoded = auth_header.split(" ", 1)[1] + decoded = base64.b64decode(encoded).decode("utf-8") + username, password = decoded.split(":", 1) + except Exception: + return ( + "", + "", + JSONResponse( + {"success": False, "error": "Invalid BasicAuth format"}, + status_code=401, + ), + ) + + # Verify username matches path user_id + if username != path_user_id: + logger.warning( + f"Username mismatch in app password operation for path user {path_user_id}" + ) + return ( + "", + "", + JSONResponse( + {"success": False, "error": "Username does not match path user_id"}, + status_code=403, + ), + ) + + return username, password, None + + +async def _get_app_password_storage(request: Request) -> "RefreshTokenStorage": + """Get or initialize RefreshTokenStorage for app password operations. + + Checks app.state.storage first, then falls back to creating from environment. + This helper avoids repeated storage initialization logic across endpoints. + + Args: + request: Starlette request with app state + + Returns: + Initialized RefreshTokenStorage instance + """ + from nextcloud_mcp_server.auth.storage import RefreshTokenStorage + + storage = getattr(request.app.state, "storage", None) + + if not storage: + # Multi-user BasicAuth mode may not have oauth_context + # Initialize storage from environment + storage = RefreshTokenStorage.from_env() + await storage.initialize() + + return storage + + +async def provision_app_password(request: Request) -> JSONResponse: + """POST /api/v1/users/{user_id}/app-password - Store app password for background sync. + + This endpoint is used by Astrolabe (Nextcloud PHP app) to provision app passwords + for multi-user BasicAuth mode background sync. + + The request must include BasicAuth credentials where: + - username: Nextcloud user ID (must match path user_id) + - password: The app password being provisioned + + The MCP server validates the app password against Nextcloud before storing it. + This proves the user owns the password and has access to Nextcloud. + + Security model: + - User identity is verified via BasicAuth against Nextcloud + - App password is encrypted before storage + - Only the user who owns the password can provision it + - Rate limited to prevent brute-force attacks + """ + from nextcloud_mcp_server.config import get_settings + + # Get user_id from path + path_user_id = request.path_params.get("user_id") + if not path_user_id: + return JSONResponse( + {"success": False, "error": "Missing user_id in path"}, + status_code=400, + ) + + # Check rate limit before processing + is_allowed, retry_after = _check_rate_limit(path_user_id) + if not is_allowed: + logger.warning( + f"Rate limit exceeded for app password provisioning: {path_user_id}" + ) + return JSONResponse( + { + "success": False, + "error": f"Rate limit exceeded. Try again in {retry_after} seconds.", + }, + status_code=429, + headers={"Retry-After": str(retry_after)}, + ) + + # Extract and validate BasicAuth credentials + username, app_password, error_response = _extract_basic_auth(request, path_user_id) + if error_response is not None: + _record_rate_limit_attempt(path_user_id, success=False) + return error_response + + # Validate app password format + if not APP_PASSWORD_PATTERN.match(app_password): + _record_rate_limit_attempt(path_user_id, success=False) + return JSONResponse( + {"success": False, "error": "Invalid app password format"}, + status_code=400, + ) + + # Get Nextcloud host from settings + settings = get_settings() + nextcloud_host = settings.nextcloud_host + + if not nextcloud_host: + logger.error("NEXTCLOUD_HOST not configured") + return JSONResponse( + {"success": False, "error": "Server not configured"}, + status_code=500, + ) + + # Validate app password against Nextcloud + try: + async with httpx.AsyncClient(timeout=NEXTCLOUD_VALIDATION_TIMEOUT) as client: + # Use OCS API to verify credentials + test_url = f"{nextcloud_host}/ocs/v1.php/cloud/user" + response = await client.get( + test_url, + auth=(username, app_password), + params={"format": "json"}, + headers={"OCS-APIRequest": "true"}, + ) + + if response.status_code != 200: + logger.warning( + f"App password validation failed for user: HTTP {response.status_code}" + ) + _record_rate_limit_attempt(path_user_id, success=False) + return JSONResponse( + {"success": False, "error": "Invalid app password"}, + status_code=401, + ) + + # Verify the user ID from response matches + data = response.json() + ocs_user_id = data.get("ocs", {}).get("data", {}).get("id") + if ocs_user_id != username: + logger.warning("User ID mismatch in OCS response") + _record_rate_limit_attempt(path_user_id, success=False) + return JSONResponse( + {"success": False, "error": "User ID mismatch"}, + status_code=403, + ) + + except httpx.RequestError as e: + logger.error(f"Failed to validate app password: {e}") + return JSONResponse( + {"success": False, "error": "Failed to validate credentials"}, + status_code=500, + ) + + # Store the validated app password + try: + storage = await _get_app_password_storage(request) + await storage.store_app_password(username, app_password) + + _record_rate_limit_attempt(path_user_id, success=True) + logger.info(f"Provisioned app password for user: {username}") + + return JSONResponse( + { + "success": True, + "message": f"App password stored for {username}", + } + ) + + except Exception as e: + error_msg = _sanitize_error_for_client(e, "provision_app_password") + return JSONResponse( + {"success": False, "error": error_msg}, + status_code=500, + ) + + +async def get_app_password_status(request: Request) -> JSONResponse: + """GET /api/v1/users/{user_id}/app-password - Check if user has provisioned app password. + + Returns status of background sync access for multi-user BasicAuth mode. + + Requires BasicAuth with the user's app password for authentication. + """ + # Get user_id from path + path_user_id = request.path_params.get("user_id") + if not path_user_id: + return JSONResponse( + {"success": False, "error": "Missing user_id in path"}, + status_code=400, + ) + + # Extract and validate BasicAuth credentials + username, _, error_response = _extract_basic_auth(request, path_user_id) + if error_response is not None: + return error_response + + try: + storage = await _get_app_password_storage(request) + app_password = await storage.get_app_password(username) + + return JSONResponse( + { + "success": True, + "user_id": username, + "has_app_password": app_password is not None, + } + ) + + except Exception as e: + error_msg = _sanitize_error_for_client(e, "get_app_password_status") + return JSONResponse( + {"success": False, "error": error_msg}, + status_code=500, + ) + + +async def delete_app_password(request: Request) -> JSONResponse: + """DELETE /api/v1/users/{user_id}/app-password - Delete stored app password. + + Removes the user's app password from MCP server storage. + + Requires BasicAuth with the user's credentials. + """ + from nextcloud_mcp_server.config import get_settings + + # Get user_id from path + path_user_id = request.path_params.get("user_id") + if not path_user_id: + return JSONResponse( + {"success": False, "error": "Missing user_id in path"}, + status_code=400, + ) + + # Extract and validate BasicAuth credentials + username, password, error_response = _extract_basic_auth(request, path_user_id) + if error_response is not None: + return error_response + + # Validate credentials against Nextcloud + settings = get_settings() + nextcloud_host = settings.nextcloud_host + + try: + async with httpx.AsyncClient(timeout=NEXTCLOUD_VALIDATION_TIMEOUT) as client: + test_url = f"{nextcloud_host}/ocs/v1.php/cloud/user" + response = await client.get( + test_url, + auth=(username, password), + params={"format": "json"}, + headers={"OCS-APIRequest": "true"}, + ) + + if response.status_code != 200: + return JSONResponse( + {"success": False, "error": "Invalid credentials"}, + status_code=401, + ) + except httpx.RequestError as e: + logger.error(f"Failed to validate credentials: {e}") + return JSONResponse( + {"success": False, "error": "Failed to validate credentials"}, + status_code=500, + ) + + try: + storage = await _get_app_password_storage(request) + deleted = await storage.delete_app_password(username) + + if deleted: + logger.info(f"Deleted app password for user: {username}") + return JSONResponse( + { + "success": True, + "message": f"App password deleted for {username}", + } + ) + else: + return JSONResponse( + { + "success": True, + "message": "No app password found to delete", + } + ) + + except Exception as e: + error_msg = _sanitize_error_for_client(e, "delete_app_password") + return JSONResponse( + {"success": False, "error": error_msg}, + status_code=500, + ) diff --git a/nextcloud_mcp_server/api/visualization.py b/nextcloud_mcp_server/api/visualization.py new file mode 100644 index 0000000..3d7184f --- /dev/null +++ b/nextcloud_mcp_server/api/visualization.py @@ -0,0 +1,813 @@ +"""Visualization API endpoints for search and PDF preview. + +ADR-018: Provides REST API endpoints for the Nextcloud PHP app (Astrolabe) to: +- Execute unified search with semantic/BM25/hybrid algorithms +- Execute vector search with PCA visualization coordinates +- Fetch chunk context with surrounding text +- Render PDF pages server-side (avoiding CSP/worker issues) + +All endpoints require OAuth bearer token authentication via UnifiedTokenVerifier. +""" + +import base64 +import logging +from typing import TYPE_CHECKING, Any + +import pymupdf + +if TYPE_CHECKING: + pass + +from starlette.requests import Request +from starlette.responses import JSONResponse + +from nextcloud_mcp_server.api.management import ( + _parse_float_param, + _parse_int_param, + _sanitize_error_for_client, + _validate_query_string, + extract_bearer_token, + validate_token_and_get_user, +) + +logger = logging.getLogger(__name__) + + +async def unified_search(request: Request) -> JSONResponse: + """POST /api/v1/search - Search endpoint for Nextcloud Unified Search. + + Optimized search endpoint for the Nextcloud Unified Search provider + and other PHP app integrations. Returns results with metadata needed + for navigation to source documents. + + Request body: + { + "query": "search query", + "algorithm": "semantic|bm25|hybrid", // default: hybrid + "limit": 20, // max: 100 + "offset": 0, // pagination offset + "include_pca": false, // optional PCA coordinates + "include_chunks": true // include text snippets + } + + Response: + { + "results": [{ + "id": "doc123", + "doc_type": "note", + "title": "Document Title", + "excerpt": "Matching text snippet...", + "score": 0.85, + "path": "/path/to/file.txt", // for files + "board_id": 1, // for deck cards + "card_id": 42 + }], + "total_found": 150, + "algorithm_used": "hybrid" + } + + Requires OAuth bearer token for user filtering. + """ + from nextcloud_mcp_server.config import get_settings + + settings = get_settings() + if not settings.vector_sync_enabled: + return JSONResponse( + {"error": "Vector sync is disabled on this server"}, + status_code=404, + ) + + # Validate OAuth token and extract user + try: + user_id, _validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/search: {e}") + return JSONResponse( + { + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "unified_search"), + }, + status_code=401, + ) + + try: + # Parse request body + body = await request.json() + + # Validate and parse parameters + try: + query = body.get("query", "") + _validate_query_string(query, max_length=10000) + + limit = _parse_int_param( + str(body.get("limit")) if body.get("limit") is not None else None, + 20, + 1, + 100, + "limit", + ) + + offset = _parse_int_param( + str(body.get("offset")) if body.get("offset") is not None else None, + 0, + 0, + 1000000, + "offset", + ) + + score_threshold = _parse_float_param( + body.get("score_threshold"), + 0.0, + 0.0, + 1.0, + "score_threshold", + ) + except ValueError as e: + return JSONResponse({"error": str(e)}, status_code=400) + + algorithm = body.get("algorithm", "hybrid") + fusion = body.get("fusion", "rrf") + include_pca = body.get("include_pca", False) + include_chunks = body.get("include_chunks", True) + doc_types = body.get("doc_types") # Optional filter + + if not query: + return JSONResponse({"results": [], "total_found": 0}) + + # Validate algorithm + valid_algorithms = {"semantic", "bm25", "hybrid"} + if algorithm not in valid_algorithms: + algorithm = "hybrid" + + # Validate fusion method + valid_fusions = {"rrf", "dbsf"} + if fusion not in valid_fusions: + fusion = "rrf" + + # Execute search using the appropriate algorithm + from nextcloud_mcp_server.search import ( + BM25HybridSearchAlgorithm, + SemanticSearchAlgorithm, + ) + + # Select search algorithm + if algorithm == "semantic": + search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold) + else: + search_algo = BM25HybridSearchAlgorithm( + score_threshold=score_threshold, fusion=fusion + ) + + # Request extra results to handle offset + search_limit = limit + offset + + # Execute search + all_results = [] + if doc_types and isinstance(doc_types, list): + for doc_type in doc_types: + if doc_type: + results = await search_algo.search( + query=query, + user_id=user_id, + limit=search_limit, + doc_type=doc_type, + ) + all_results.extend(results) + all_results.sort(key=lambda r: r.score, reverse=True) + else: + all_results = await search_algo.search( + query=query, + user_id=user_id, + limit=search_limit, + ) + + # Sort results by score (no deduplication - show all chunks) + sorted_results = sorted(all_results, key=lambda r: r.score, reverse=True) + + # Calculate total and apply pagination + total_found = len(sorted_results) + paginated_results = sorted_results[offset : offset + limit] + + # Format results for Unified Search + formatted_results = [] + for result in paginated_results: + # Get document ID (prefer note_id for notes) + doc_id = result.id + if result.metadata and "note_id" in result.metadata: + doc_id = result.metadata["note_id"] + + result_data: dict[str, Any] = { + "id": doc_id, + "doc_type": result.doc_type, + "title": result.title, + "score": result.score, + } + + # Include excerpt/chunk if requested (full content, no truncation) + if include_chunks and result.excerpt: + result_data["excerpt"] = result.excerpt + + # Include navigation metadata from result.metadata + if result.metadata: + # File path and mimetype for files + if "path" in result.metadata: + result_data["path"] = result.metadata["path"] + if "mime_type" in result.metadata: + result_data["mime_type"] = result.metadata["mime_type"] + + # Deck card navigation + if "board_id" in result.metadata: + result_data["board_id"] = result.metadata["board_id"] + if "card_id" in result.metadata: + result_data["card_id"] = result.metadata["card_id"] + + # Calendar event metadata + if "calendar_id" in result.metadata: + result_data["calendar_id"] = result.metadata["calendar_id"] + if "event_uid" in result.metadata: + result_data["event_uid"] = result.metadata["event_uid"] + + # Add PDF page metadata + if result.page_number is not None: + result_data["page_number"] = result.page_number + if result.page_count is not None: + result_data["page_count"] = result.page_count + + # Add chunk metadata (always present, defaults to 0 and 1) + result_data["chunk_index"] = result.chunk_index + result_data["total_chunks"] = result.total_chunks + + # Add chunk offsets for modal navigation + if result.chunk_start_offset is not None: + result_data["chunk_start_offset"] = result.chunk_start_offset + if result.chunk_end_offset is not None: + result_data["chunk_end_offset"] = result.chunk_end_offset + + formatted_results.append(result_data) + + response_data: dict[str, Any] = { + "results": formatted_results, + "total_found": total_found, + "algorithm_used": algorithm, + } + + # Optional PCA coordinates + if include_pca and len(paginated_results) >= 2: + try: + from nextcloud_mcp_server.vector.visualization import ( + compute_pca_coordinates, + ) + + if search_algo.query_embedding is not None: + query_embedding = search_algo.query_embedding + else: + from nextcloud_mcp_server.embedding.service import ( + get_embedding_service, + ) + + embedding_service = get_embedding_service() + query_embedding = await embedding_service.embed(query) + + pca_data = await compute_pca_coordinates( + paginated_results, query_embedding + ) + response_data["pca_data"] = pca_data + except Exception as e: + logger.warning(f"Failed to compute PCA for unified search: {e}") + + return JSONResponse(response_data) + + except Exception as e: + logger.error(f"Error in unified search: {e}") + return JSONResponse( + { + "error": "Internal error", + "message": _sanitize_error_for_client(e, "unified_search"), + }, + status_code=500, + ) + + +async def vector_search(request: Request) -> JSONResponse: + """POST /api/v1/vector-viz/search - Vector search for visualization. + + Executes semantic search and returns results with optional PCA coordinates + for 2D visualization. + + Request body: + { + "query": "search query", + "algorithm": "semantic|bm25|hybrid", // default: hybrid + "limit": 10, // max: 50 + "include_pca": true, // whether to include 2D coordinates + "doc_types": ["note", "file"] // optional filter by document types + } + + Requires OAuth bearer token for user filtering. + """ + from nextcloud_mcp_server.config import get_settings + + settings = get_settings() + if not settings.vector_sync_enabled: + return JSONResponse( + {"error": "Vector sync is disabled on this server"}, + status_code=404, + ) + + # Validate OAuth token and extract user + try: + user_id, _validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/vector-viz/search: {e}") + return JSONResponse( + { + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "vector_search"), + }, + status_code=401, + ) + + try: + # Parse request body + body = await request.json() + query = body.get("query", "") + algorithm = body.get("algorithm", "hybrid") + fusion = body.get("fusion", "rrf") + score_threshold = body.get("score_threshold", 0.0) + limit = min(body.get("limit", 10), 50) # Enforce max limit + include_pca = body.get("include_pca", True) + doc_types = body.get("doc_types") # Optional list of document types + + if not query: + return JSONResponse( + {"error": "Missing required parameter: query"}, + status_code=400, + ) + + # Validate algorithm + valid_algorithms = {"semantic", "bm25", "hybrid"} + if algorithm not in valid_algorithms: + algorithm = "hybrid" + + # Validate fusion method + valid_fusions = {"rrf", "dbsf"} + if fusion not in valid_fusions: + fusion = "rrf" + + # Execute search using the appropriate algorithm + from nextcloud_mcp_server.search import ( + BM25HybridSearchAlgorithm, + SemanticSearchAlgorithm, + ) + + # Select search algorithm + if algorithm == "semantic": + search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold) + else: + # Both "hybrid" and "bm25" use the BM25HybridSearchAlgorithm + # which combines dense semantic and sparse BM25 vectors + search_algo = BM25HybridSearchAlgorithm( + score_threshold=score_threshold, fusion=fusion + ) + + # Execute search for each doc_type if specified, otherwise search all + all_results = [] + if doc_types and isinstance(doc_types, list): + # Search each doc_type separately and merge results + for doc_type in doc_types: + if doc_type: # Skip empty strings + results = await search_algo.search( + query=query, + user_id=user_id, + limit=limit, + doc_type=doc_type, + ) + all_results.extend(results) + # Sort merged results by score and limit + all_results.sort(key=lambda r: r.score, reverse=True) + all_results = all_results[:limit] + else: + # Search all document types + all_results = await search_algo.search( + query=query, + user_id=user_id, + limit=limit, + ) + + # Format results for PHP client + formatted_results = [] + for result in all_results: + formatted_result = { + "id": result.id, + "doc_type": result.doc_type, + "title": result.title, + "excerpt": result.excerpt[:200] if result.excerpt else "", + "score": result.score, + "metadata": result.metadata, + # Chunk information for context display + "chunk_index": result.chunk_index, + "total_chunks": result.total_chunks, + } + # Include optional fields if present + if result.chunk_start_offset is not None: + formatted_result["chunk_start_offset"] = result.chunk_start_offset + if result.chunk_end_offset is not None: + formatted_result["chunk_end_offset"] = result.chunk_end_offset + if result.page_number is not None: + formatted_result["page_number"] = result.page_number + if result.page_count is not None: + formatted_result["page_count"] = result.page_count + formatted_results.append(formatted_result) + + response_data: dict[str, Any] = { + "results": formatted_results, + "algorithm_used": algorithm, + "total_documents": len(formatted_results), + } + + # Compute PCA coordinates for visualization using shared function + if include_pca and len(all_results) >= 2: + try: + from nextcloud_mcp_server.vector.visualization import ( + compute_pca_coordinates, + ) + + # Get query embedding from search algorithm or generate it + if search_algo.query_embedding is not None: + query_embedding = search_algo.query_embedding + else: + from nextcloud_mcp_server.embedding.service import ( + get_embedding_service, + ) + + embedding_service = get_embedding_service() + query_embedding = await embedding_service.embed(query) + + pca_data = await compute_pca_coordinates(all_results, query_embedding) + response_data["coordinates_3d"] = pca_data["coordinates_3d"] + response_data["query_coords"] = pca_data["query_coords"] + if "pca_variance" in pca_data: + response_data["pca_variance"] = pca_data["pca_variance"] + except Exception as e: + logger.warning(f"Failed to compute PCA coordinates: {e}") + response_data["coordinates_3d"] = [] + response_data["query_coords"] = [] + elif include_pca: + # Not enough results for PCA + response_data["coordinates_3d"] = [] + response_data["query_coords"] = [] + + return JSONResponse(response_data) + + except Exception as e: + error_msg = _sanitize_error_for_client(e, "vector_search") + return JSONResponse( + {"error": error_msg}, + status_code=500, + ) + + +async def get_chunk_context(request: Request) -> JSONResponse: + """GET /api/v1/chunk-context - Fetch chunk text with context. + + Retrieves the matched chunk along with surrounding text and metadata. + Used by clients to display chunk context and highlighted PDFs. + + Query parameters: + doc_type: Document type (e.g., "note") + doc_id: Document ID + start: Chunk start offset (character position) + end: Chunk end offset (character position) + context: Characters of context before/after (default: 500) + + Requires OAuth bearer token for authentication. + """ + try: + # Validate OAuth token and extract user + user_id, validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/chunk-context: {e}") + return JSONResponse( + { + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "get_chunk_context"), + }, + status_code=401, + ) + + try: + # Get query parameters + doc_type = request.query_params.get("doc_type") + doc_id = request.query_params.get("doc_id") + start_str = request.query_params.get("start") + end_str = request.query_params.get("end") + + # Validate required parameters + if not all([doc_type, doc_id, start_str, end_str]): + return JSONResponse( + { + "success": False, + "error": "Missing required parameters: doc_type, doc_id, start, end", + }, + status_code=400, + ) + + # Type narrowing: we already checked these are not None above + assert start_str is not None + assert end_str is not None + assert doc_id is not None + assert doc_type is not None + + # Parse and validate integer parameters with bounds checking + try: + context_chars = _parse_int_param( + request.query_params.get("context"), + 500, + 0, + 10000, + "context_chars", + ) + start = _parse_int_param(start_str, 0, 0, 10000000, "start") + end = _parse_int_param(end_str, 0, 0, 10000000, "end") + if end <= start: + raise ValueError("end must be greater than start") + except ValueError as e: + return JSONResponse({"success": False, "error": str(e)}, status_code=400) + # Convert doc_id to int if possible (most IDs are int) + doc_id_val: str | int = int(doc_id) if doc_id.isdigit() else doc_id + + # Get bearer token for client initialization + token = extract_bearer_token(request) + if not token: + raise ValueError("Missing token") + + # Get Nextcloud host from OAuth context + oauth_ctx = request.app.state.oauth_context + nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") + + if not nextcloud_host: + raise ValueError("Nextcloud host not configured") + + # Initialize authenticated Nextcloud client + from nextcloud_mcp_server.client import NextcloudClient + from nextcloud_mcp_server.search.context import get_chunk_with_context + + async with NextcloudClient.from_token( + base_url=nextcloud_host, token=token, username=user_id + ) as nc_client: + chunk_context = await get_chunk_with_context( + nc_client=nc_client, + user_id=user_id, + doc_id=doc_id_val, + doc_type=doc_type, + chunk_start=start, + chunk_end=end, + context_chars=context_chars, + ) + + if chunk_context is None: + return JSONResponse( + { + "success": False, + "error": f"Failed to fetch chunk context for {doc_type} {doc_id}", + }, + status_code=404, + ) + + # For PDF files, also fetch the highlighted page image from Qdrant if available + # This is useful for clients that want to show a pre-rendered image + highlighted_page_image = None + page_number = chunk_context.page_number + + if doc_type == "file": + try: + from qdrant_client.models import FieldCondition, Filter, MatchValue + + from nextcloud_mcp_server.config import get_settings + from nextcloud_mcp_server.vector.placeholder import ( + get_placeholder_filter, + ) + from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client + + settings = get_settings() + qdrant_client = await get_qdrant_client() + + # Query for this specific chunk's highlighted image + points_response = await qdrant_client.scroll( + collection_name=settings.get_collection_name(), + scroll_filter=Filter( + must=[ + get_placeholder_filter(), + FieldCondition( + key="doc_id", match=MatchValue(value=doc_id_val) + ), + FieldCondition( + key="user_id", match=MatchValue(value=user_id) + ), + FieldCondition( + key="chunk_start_offset", match=MatchValue(value=start) + ), + FieldCondition( + key="chunk_end_offset", match=MatchValue(value=end) + ), + ] + ), + limit=1, + with_vectors=False, + with_payload=["highlighted_page_image", "page_number"], + ) + + if points_response[0]: + payload = points_response[0][0].payload + if payload: + highlighted_page_image = payload.get("highlighted_page_image") + # Trust Qdrant page number if available (might be more accurate than context expansion logic) + if payload.get("page_number") is not None: + page_number = payload.get("page_number") + + except Exception as e: + logger.warning(f"Failed to fetch highlighted image: {e}") + + # Build response + response_data = { + "success": True, + "chunk_text": chunk_context.chunk_text, + "before_context": chunk_context.before_context, + "after_context": chunk_context.after_context, + "has_more_before": chunk_context.has_before_truncation, + "has_more_after": chunk_context.has_after_truncation, + "page_number": page_number, + "chunk_index": chunk_context.chunk_index, + "total_chunks": chunk_context.total_chunks, + } + + if highlighted_page_image: + response_data["highlighted_page_image"] = highlighted_page_image + + return JSONResponse(response_data) + + except Exception as e: + error_msg = _sanitize_error_for_client(e, "get_chunk_context") + return JSONResponse( + {"error": error_msg}, + status_code=500, + ) + + +async def get_pdf_preview(request: Request) -> JSONResponse: + """GET /api/v1/pdf-preview - Render PDF page to PNG image. + + Server-side PDF rendering using PyMuPDF. This endpoint allows Astrolabe + to display PDF pages without requiring client-side PDF.js, avoiding CSP + worker restrictions and ES private field issues in Chromium. + + Query parameters: + file_path: WebDAV path to PDF file (e.g., "/Documents/report.pdf") + page: Page number (1-indexed, default: 1) + scale: Zoom factor for rendering (default: 2.0 = 144 DPI) + + Returns: + { + "success": true, + "image": "", + "page_number": 1, + "total_pages": 10 + } + + Requires OAuth bearer token for authentication. + """ + # Log incoming request + file_path_param = request.query_params.get("file_path", "") + page_param = request.query_params.get("page", "1") + logger.info(f"PDF preview request: file_path={file_path_param}, page={page_param}") + + try: + # Validate OAuth token and extract user + user_id, validated = await validate_token_and_get_user(request) + logger.info(f"PDF preview authenticated for user: {user_id}") + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/pdf-preview: {e}") + return JSONResponse( + { + "success": False, + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "get_pdf_preview"), + }, + status_code=401, + ) + + try: + # Parse and validate parameters + file_path = request.query_params.get("file_path") + if not file_path: + return JSONResponse( + {"success": False, "error": "Missing required parameter: file_path"}, + status_code=400, + ) + + # Validate no path traversal sequences + if ".." in file_path: + return JSONResponse( + {"success": False, "error": "Invalid file path"}, + status_code=400, + ) + + try: + page_num = _parse_int_param( + request.query_params.get("page"), 1, 1, 10000, "page" + ) + scale = _parse_float_param( + request.query_params.get("scale"), 2.0, 0.5, 5.0, "scale" + ) + except ValueError as e: + return JSONResponse({"success": False, "error": str(e)}, status_code=400) + + # Get bearer token for WebDAV authentication + token = extract_bearer_token(request) + if not token: + raise ValueError("Missing token") + + # Get Nextcloud host from OAuth context + oauth_ctx = request.app.state.oauth_context + nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") + + if not nextcloud_host: + raise ValueError("Nextcloud host not configured") + + # Download PDF via WebDAV using user's token + from nextcloud_mcp_server.client import NextcloudClient + + async with NextcloudClient.from_token( + base_url=nextcloud_host, token=token, username=user_id + ) as nc_client: + pdf_bytes, _ = await nc_client.webdav.read_file(file_path) + + # Check file size limit (50 MB) + max_pdf_size = 50 * 1024 * 1024 + if len(pdf_bytes) > max_pdf_size: + return JSONResponse( + { + "success": False, + "error": f"PDF file exceeds maximum size limit ({max_pdf_size // (1024 * 1024)} MB)", + }, + status_code=413, + ) + + # Render page with PyMuPDF + doc = pymupdf.open(stream=pdf_bytes, filetype="pdf") + try: + total_pages = doc.page_count + + # Validate page number + if page_num > total_pages: + return JSONResponse( + { + "success": False, + "error": f"Page {page_num} does not exist (document has {total_pages} pages)", + }, + status_code=400, + ) + + page = doc[page_num - 1] # 0-indexed + mat = pymupdf.Matrix(scale, scale) + pix = page.get_pixmap(matrix=mat, alpha=False) + png_bytes = pix.tobytes("png") + finally: + doc.close() + + # Encode as base64 + image_b64 = base64.b64encode(png_bytes).decode("ascii") + + logger.info( + f"Rendered PDF preview: {file_path} page {page_num}/{total_pages}, " + f"{len(png_bytes):,} bytes" + ) + + return JSONResponse( + { + "success": True, + "image": image_b64, + "page_number": page_num, + "total_pages": total_pages, + } + ) + + except FileNotFoundError: + logger.warning(f"PDF file not found: {file_path_param}") + return JSONResponse( + {"success": False, "error": "PDF file not found"}, + status_code=404, + ) + except (pymupdf.FileDataError, pymupdf.EmptyFileError): + logger.warning(f"Invalid or corrupted PDF file: {file_path_param}") + return JSONResponse( + {"success": False, "error": "Invalid or corrupted PDF file"}, + status_code=400, + ) + except Exception as e: + logger.error(f"PDF preview error: {e}", exc_info=True) + error_msg = _sanitize_error_for_client(e, "get_pdf_preview") + return JSONResponse( + {"success": False, "error": error_msg}, + status_code=500, + ) diff --git a/nextcloud_mcp_server/api/webhooks.py b/nextcloud_mcp_server/api/webhooks.py new file mode 100644 index 0000000..9626c6f --- /dev/null +++ b/nextcloud_mcp_server/api/webhooks.py @@ -0,0 +1,308 @@ +"""Webhook management API endpoints. + +Provides REST API endpoints for managing webhook registrations with Nextcloud. +These endpoints are used by the Nextcloud PHP app (Astrolabe) to: +- List installed Nextcloud apps +- Create, list, and delete webhook registrations + +All endpoints require OAuth bearer token authentication via UnifiedTokenVerifier. +""" + +import logging + +import httpx +from starlette.requests import Request +from starlette.responses import JSONResponse + +from nextcloud_mcp_server.api.management import ( + _sanitize_error_for_client, + extract_bearer_token, + validate_token_and_get_user, +) + +logger = logging.getLogger(__name__) + + +async def get_installed_apps(request: Request) -> JSONResponse: + """GET /api/v1/apps - Get list of installed Nextcloud apps. + + Returns a list of installed app IDs for filtering webhook presets. + + Requires OAuth bearer token for authentication. + """ + try: + # Validate OAuth token and extract user + user_id, validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/apps: {e}") + return JSONResponse( + { + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "get_installed_apps"), + }, + status_code=401, + ) + + try: + # Get Bearer token from request + token = extract_bearer_token(request) + if not token: + raise ValueError("Missing Authorization header") + + # Get Nextcloud host from OAuth context + oauth_ctx = request.app.state.oauth_context + nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") + + if not nextcloud_host: + raise ValueError("Nextcloud host not configured") + + # Create authenticated HTTP client + async with httpx.AsyncClient( + base_url=nextcloud_host, + headers={"Authorization": f"Bearer {token}"}, + timeout=30.0, + ) as client: + # Get installed apps using OCS API + # Notes, Calendar, Deck, Tables, etc. are apps that support webhooks + # We check which ones are installed and enabled + ocs_url = "/ocs/v1.php/cloud/apps" + params = {"filter": "enabled"} + + response = await client.get( + ocs_url, + params=params, + headers={"OCS-APIRequest": "true", "Accept": "application/json"}, + ) + + if response.status_code != 200: + raise ValueError(f"OCS API returned status {response.status_code}") + + data = response.json() + apps = data.get("ocs", {}).get("data", {}).get("apps", []) + + return JSONResponse({"apps": apps}) + + except Exception as e: + logger.error(f"Error getting installed apps for user {user_id}: {e}") + return JSONResponse( + { + "error": "Internal error", + "message": _sanitize_error_for_client(e, "get_installed_apps"), + }, + status_code=500, + ) + + +async def list_webhooks(request: Request) -> JSONResponse: + """GET /api/v1/webhooks - List all registered webhooks. + + Returns list of webhook registrations for the authenticated user. + + Requires OAuth bearer token for authentication. + """ + try: + # Validate OAuth token and extract user + user_id, validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}") + return JSONResponse( + { + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "list_webhooks"), + }, + status_code=401, + ) + + try: + from nextcloud_mcp_server.client.webhooks import WebhooksClient + + # Get Bearer token from request + token = extract_bearer_token(request) + if not token: + raise ValueError("Missing Authorization header") + + # Get Nextcloud host from OAuth context + oauth_ctx = request.app.state.oauth_context + nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") + + if not nextcloud_host: + raise ValueError("Nextcloud host not configured") + + # Create authenticated HTTP client + async with httpx.AsyncClient( + base_url=nextcloud_host, + headers={"Authorization": f"Bearer {token}"}, + timeout=30.0, + ) as client: + # Use WebhooksClient to list webhooks + webhooks_client = WebhooksClient(client, user_id) + webhooks = await webhooks_client.list_webhooks() + + return JSONResponse({"webhooks": webhooks}) + + except Exception as e: + logger.error(f"Error listing webhooks for user {user_id}: {e}") + return JSONResponse( + { + "error": "Internal error", + "message": _sanitize_error_for_client(e, "list_webhooks"), + }, + status_code=500, + ) + + +async def create_webhook(request: Request) -> JSONResponse: + """POST /api/v1/webhooks - Create a new webhook registration. + + Request body: + { + "event": "OCP\\Files\\Events\\Node\\NodeCreatedEvent", + "uri": "http://mcp:8000/webhooks/nextcloud", + "eventFilter": {"event.node.path": "/^\\/.*\\/files\\/Notes\\//"} + } + + Returns the created webhook data including the webhook ID. + + Requires OAuth bearer token for authentication. + """ + try: + # Validate OAuth token and extract user + user_id, validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}") + return JSONResponse( + { + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "create_webhook"), + }, + status_code=401, + ) + + try: + from nextcloud_mcp_server.client.webhooks import WebhooksClient + + # Parse request body + body = await request.json() + event = body.get("event") + uri = body.get("uri") + # Accept both camelCase (eventFilter) and snake_case (event_filter) + event_filter = body.get("eventFilter") or body.get("event_filter") + + if not event or not uri: + return JSONResponse( + { + "error": "Bad request", + "message": "Missing required fields: event, uri", + }, + status_code=400, + ) + + # Get Bearer token from request + token = extract_bearer_token(request) + if not token: + raise ValueError("Missing Authorization header") + + # Get Nextcloud host from OAuth context + oauth_ctx = request.app.state.oauth_context + nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") + + if not nextcloud_host: + raise ValueError("Nextcloud host not configured") + + # Create authenticated HTTP client + async with httpx.AsyncClient( + base_url=nextcloud_host, + headers={"Authorization": f"Bearer {token}"}, + timeout=30.0, + ) as client: + # Use WebhooksClient to create webhook + webhooks_client = WebhooksClient(client, user_id) + webhook_data = await webhooks_client.create_webhook( + event=event, uri=uri, event_filter=event_filter + ) + + return JSONResponse({"webhook": webhook_data}) + + except Exception as e: + logger.error(f"Error creating webhook for user {user_id}: {e}") + return JSONResponse( + { + "error": "Internal error", + "message": _sanitize_error_for_client(e, "create_webhook"), + }, + status_code=500, + ) + + +async def delete_webhook(request: Request) -> JSONResponse: + """DELETE /api/v1/webhooks/{webhook_id} - Delete a webhook registration. + + Returns success/failure status. + + Requires OAuth bearer token for authentication. + """ + try: + # Validate OAuth token and extract user + user_id, validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}") + return JSONResponse( + { + "error": "Unauthorized", + "message": _sanitize_error_for_client(e, "delete_webhook"), + }, + status_code=401, + ) + + try: + from nextcloud_mcp_server.client.webhooks import WebhooksClient + + # Get webhook_id from path parameter + webhook_id = request.path_params.get("webhook_id") + if not webhook_id: + return JSONResponse( + {"error": "Bad request", "message": "Missing webhook_id"}, + status_code=400, + ) + + try: + webhook_id = int(webhook_id) + except ValueError: + return JSONResponse( + {"error": "Bad request", "message": "Invalid webhook_id"}, + status_code=400, + ) + + # Get Bearer token from request + token = extract_bearer_token(request) + if not token: + raise ValueError("Missing Authorization header") + + # Get Nextcloud host from OAuth context + oauth_ctx = request.app.state.oauth_context + nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "") + + if not nextcloud_host: + raise ValueError("Nextcloud host not configured") + + # Create authenticated HTTP client + async with httpx.AsyncClient( + base_url=nextcloud_host, + headers={"Authorization": f"Bearer {token}"}, + timeout=30.0, + ) as client: + # Use WebhooksClient to delete webhook + webhooks_client = WebhooksClient(client, user_id) + await webhooks_client.delete_webhook(webhook_id=webhook_id) + + return JSONResponse({"success": True, "message": "Webhook deleted"}) + + except Exception as e: + logger.error(f"Error deleting webhook for user {user_id}: {e}") + return JSONResponse( + { + "error": "Internal error", + "message": _sanitize_error_for_client(e, "delete_webhook"), + }, + status_code=500, + ) diff --git a/nextcloud_mcp_server/app.py b/nextcloud_mcp_server/app.py index 2579408..cc1927b 100644 --- a/nextcloud_mcp_server/app.py +++ b/nextcloud_mcp_server/app.py @@ -2112,13 +2112,14 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None = settings.enable_multi_user_basic_auth and settings.enable_offline_access ) if enable_management_apis: - from nextcloud_mcp_server.api.management import ( + from nextcloud_mcp_server.api import ( create_webhook, delete_app_password, delete_webhook, get_app_password_status, get_chunk_context, get_installed_apps, + get_pdf_preview, get_server_status, get_user_session, get_vector_sync_status, @@ -2179,6 +2180,8 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None = routes.append( Route("/api/v1/chunk-context", get_chunk_context, methods=["GET"]) ) + # PDF preview endpoint for Astrolabe (server-side rendering) + routes.append(Route("/api/v1/pdf-preview", get_pdf_preview, methods=["GET"])) # ADR-018: Unified search endpoint for Nextcloud PHP app integration routes.append(Route("/api/v1/search", unified_search, methods=["POST"])) routes.append(Route("/api/v1/apps", get_installed_apps, methods=["GET"])) @@ -2193,7 +2196,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None = "/api/v1/users/{user_id}/session, /api/v1/users/{user_id}/revoke, " "/api/v1/users/{user_id}/app-password, " "/api/v1/vector-viz/search, /api/v1/search, /api/v1/apps, " - "/api/v1/webhooks" + "/api/v1/webhooks, /api/v1/pdf-preview" ) # ADR-016: Add Smithery well-known config endpoint for container runtime discovery diff --git a/scripts/dbquery.py b/scripts/dbquery.py new file mode 100755 index 0000000..21e894d --- /dev/null +++ b/scripts/dbquery.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +""" +Database query helper for development. + +Wraps `docker compose exec db mariadb` to execute SQL statements against +the Nextcloud MariaDB database. + +Usage: + ./scripts/dbquery.py "SELECT * FROM oc_notes LIMIT 5" + ./scripts/dbquery.py -u root -p password "SHOW TABLES" + ./scripts/dbquery.py --json "SELECT * FROM oc_oidc_clients" +""" + +import argparse +import subprocess +import sys +from pathlib import Path + + +def find_compose_dir() -> Path: + """Find the directory containing docker-compose.yml.""" + current = Path(__file__).resolve().parent + while current != current.parent: + if (current / "docker-compose.yml").exists(): + return current + if (current / "compose.yml").exists(): + return current + current = current.parent + # Default to script's parent directory + return Path(__file__).resolve().parent.parent + + +def run_query( + sql: str, + user: str = "root", + password: str = "password", + database: str = "nextcloud", + vertical: bool = False, + json_output: bool = False, +) -> tuple[int, str, str]: + """ + Execute SQL via docker compose exec. + + Returns: + Tuple of (return_code, stdout, stderr) + """ + compose_dir = find_compose_dir() + + cmd = [ + "docker", + "compose", + "exec", + "-T", # Disable pseudo-TTY allocation + "db", + "mariadb", + f"-u{user}", + f"-p{password}", + database, + "-e", + sql, + ] + + if vertical: + cmd.insert(-2, "-E") # Vertical output format + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd=compose_dir, + ) + + return result.returncode, result.stdout, result.stderr + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Execute SQL queries against the Nextcloud MariaDB database", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s "SELECT COUNT(*) FROM oc_notes" + %(prog)s "SELECT id, name FROM oc_oidc_clients" + %(prog)s -E "SELECT * FROM oc_users LIMIT 1" + %(prog)s --user nextcloud --password nextcloud "SHOW TABLES" + """, + ) + parser.add_argument("sql", help="SQL statement to execute") + parser.add_argument( + "-u", "--user", default="root", help="Database user (default: root)" + ) + parser.add_argument( + "-p", + "--password", + default="password", + help="Database password (default: password)", + ) + parser.add_argument( + "-d", + "--database", + default="nextcloud", + help="Database name (default: nextcloud)", + ) + parser.add_argument( + "-E", + "--vertical", + action="store_true", + help="Print output vertically (one column per line)", + ) + parser.add_argument( + "--json", + action="store_true", + dest="json_output", + help="Request JSON output (if supported)", + ) + + args = parser.parse_args() + + returncode, stdout, stderr = run_query( + sql=args.sql, + user=args.user, + password=args.password, + database=args.database, + vertical=args.vertical, + json_output=args.json_output, + ) + + if stdout: + print(stdout, end="") + if stderr: + # Filter out the password warning + filtered_stderr = "\n".join( + line + for line in stderr.splitlines() + if "Using a password on the command line interface can be insecure" + not in line + ) + if filtered_stderr: + print(filtered_stderr, file=sys.stderr) + + return returncode + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/sqlitequery.py b/scripts/sqlitequery.py new file mode 100755 index 0000000..a4292c6 --- /dev/null +++ b/scripts/sqlitequery.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +""" +SQLite database query helper for MCP service development. + +Wraps `docker compose exec sqlite3` to execute SQL statements +against the token storage database in any MCP service container. + +Usage: + ./scripts/sqlitequery.py ".tables" + ./scripts/sqlitequery.py -s oauth "SELECT * FROM refresh_tokens" + ./scripts/sqlitequery.py -s keycloak --headers "SELECT * FROM oauth_clients" + ./scripts/sqlitequery.py --json "SELECT * FROM audit_logs LIMIT 5" +""" + +import argparse +import subprocess +import sys +from pathlib import Path + +# Service name aliases for convenience +SERVICE_ALIASES = { + "mcp": "mcp", + "oauth": "mcp-oauth", + "mcp-oauth": "mcp-oauth", + "keycloak": "mcp-keycloak", + "mcp-keycloak": "mcp-keycloak", + "basic": "mcp-multi-user-basic", + "multi-user-basic": "mcp-multi-user-basic", + "mcp-multi-user-basic": "mcp-multi-user-basic", +} + + +def find_compose_dir() -> Path: + """Find the directory containing docker-compose.yml.""" + current = Path(__file__).resolve().parent + while current != current.parent: + if (current / "docker-compose.yml").exists(): + return current + if (current / "compose.yml").exists(): + return current + current = current.parent + # Default to script's parent directory + return Path(__file__).resolve().parent.parent + + +def resolve_service(service: str) -> str: + """Resolve service alias to container name.""" + resolved = SERVICE_ALIASES.get(service.lower()) + if resolved is None: + # Not a known alias, use as-is (might be a custom service) + return service + return resolved + + +def run_query( + sql: str, + service: str = "mcp", + database: str = "/app/data/tokens.db", + headers: bool = False, + json_output: bool = False, + column_mode: bool = False, +) -> tuple[int, str, str]: + """ + Execute SQL via docker compose exec. + + Returns: + Tuple of (return_code, stdout, stderr) + """ + compose_dir = find_compose_dir() + container = resolve_service(service) + + # Build sqlite3 command with options + sqlite_args = [] + + # Set output mode + if json_output: + sqlite_args.extend(["-json"]) + elif column_mode: + sqlite_args.extend(["-column"]) + + # Enable headers + if headers or column_mode: + sqlite_args.extend(["-header"]) + + cmd = [ + "docker", + "compose", + "exec", + "-T", # Disable pseudo-TTY allocation + container, + "sqlite3", + *sqlite_args, + database, + sql, + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + cwd=compose_dir, + ) + + return result.returncode, result.stdout, result.stderr + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Execute SQL queries against SQLite databases in MCP service containers", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Services: + mcp Single-user BasicAuth mode (default) + oauth Nextcloud OAuth mode (mcp-oauth) + keycloak Keycloak OAuth mode (mcp-keycloak) + basic Multi-user BasicAuth mode (mcp-multi-user-basic) + +Examples: + %(prog)s ".tables" + %(prog)s -s oauth "SELECT user_id FROM refresh_tokens" + %(prog)s -s keycloak ".schema oauth_clients" + %(prog)s --headers "SELECT * FROM audit_logs LIMIT 5" + %(prog)s --json "SELECT * FROM oauth_sessions" + """, + ) + parser.add_argument("sql", help="SQL statement or SQLite command to execute") + parser.add_argument( + "-s", + "--service", + default="mcp", + help="Target service (mcp, oauth, keycloak, basic) (default: mcp)", + ) + parser.add_argument( + "-d", + "--database", + default="/app/data/tokens.db", + help="Database path inside container (default: /app/data/tokens.db)", + ) + parser.add_argument( + "--headers", + action="store_true", + help="Show column headers", + ) + parser.add_argument( + "--json", + action="store_true", + dest="json_output", + help="Output in JSON format", + ) + parser.add_argument( + "--column", + action="store_true", + dest="column_mode", + help="Output in column format with headers", + ) + + args = parser.parse_args() + + returncode, stdout, stderr = run_query( + sql=args.sql, + service=args.service, + database=args.database, + headers=args.headers, + json_output=args.json_output, + column_mode=args.column_mode, + ) + + if stdout: + print(stdout, end="") + if stderr: + print(stderr, file=sys.stderr) + + return returncode + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/conftest.py b/tests/conftest.py index 5a5b026..61d953d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2351,32 +2351,41 @@ async def test_users_setup(anyio_backend, nc_client: NextcloudClient): except Exception as e: logger.warning(f"Error creating editors group (may already exist): {e}") - # Create each test user + # Create each test user (idempotent - check if exists first) for username, config in test_user_configs.items(): + # Check if user already exists + user_exists = False try: - await nc_client.users.create_user( - userid=username, - password=config["password"], - display_name=config["display_name"], - email=config["email"], - ) - logger.info(f"Created test user: {username}") - created_users.append(username) + await nc_client.users.get_user_details(username) + user_exists = True + logger.info(f"Test user {username} already exists, skipping creation") + except Exception: + # User doesn't exist, proceed with creation + pass - # Add user to groups if specified - for group in config["groups"]: - try: - await nc_client.users.add_user_to_group(username, group) - logger.info(f"Added {username} to group {group}") - except Exception as e: - logger.warning(f"Error adding {username} to group {group}: {e}") + if not user_exists: + try: + await nc_client.users.create_user( + userid=username, + password=config["password"], + display_name=config["display_name"], + email=config["email"], + ) + logger.info(f"Created test user: {username}") + created_users.append(username) # Only track users WE created - except Exception as e: - # User might already exist, that's okay - logger.warning( - f"Could not create user {username} (may already exist): {e}" - ) - created_users.append(username) # Add to list anyway for cleanup + # Add user to groups if specified + for group in config["groups"]: + try: + await nc_client.users.add_user_to_group(username, group) + logger.info(f"Added {username} to group {group}") + except Exception as e: + logger.warning( + f"Error adding {username} to group {group}: {e}" + ) + + except Exception as e: + logger.warning(f"Could not create user {username}: {e}") logger.info(f"Test users setup complete: {created_users}") yield test_user_configs diff --git a/tests/integration/test_astrolabe_multi_user_background_sync.py b/tests/integration/test_astrolabe_multi_user_background_sync.py index 7045aa1..5640f25 100644 --- a/tests/integration/test_astrolabe_multi_user_background_sync.py +++ b/tests/integration/test_astrolabe_multi_user_background_sync.py @@ -43,8 +43,19 @@ async def login_to_nextcloud(page: Page, username: str, password: str): await page.fill('input[name="user"]', username) await page.fill('input[name="password"]', password) - # Submit form - await page.click('button[type="submit"]') + # Submit form - use force=True to bypass stability check (CSS transitions) + submit_button = page.locator('button[type="submit"]') + try: + await submit_button.click(force=True, timeout=10000) + except Exception: + # Fallback: JavaScript click + logger.info("Using JavaScript click for login button...") + await page.evaluate( + """ + const btn = document.querySelector('button[type="submit"]'); + if (btn) btn.click(); + """ + ) await page.wait_for_load_state("networkidle", timeout=30000) # Verify logged in (should redirect away from login page) @@ -75,6 +86,289 @@ async def navigate_to_astrolabe_settings(page: Page): logger.info("✓ Successfully loaded Astrolabe settings page") +async def authorize_search_access(page: Page, username: str) -> bool: + """Complete Step 1: OAuth Authorization for Astrolabe. + + Handles the OAuth flow: + 1. Check if already authorized (Step 1 shows "Complete") + 2. Click "Authorize" link + 3. Handle Nextcloud OIDC consent screen + 4. Wait for redirect back to Astrolabe settings + 5. Verify "Complete" badge appears on Step 1 + + Args: + page: Playwright page instance (must be on Astrolabe settings page) + username: Username for logging + + Returns: + True if authorization completed successfully + """ + nextcloud_url = "http://localhost:8080" + + logger.info(f"Authorizing search access (Step 1) for {username}...") + + # Check if already on Astrolabe settings page, if not navigate there + if "/settings/user/astrolabe" not in page.url: + await navigate_to_astrolabe_settings(page) + + # Wait for page to fully render + await anyio.sleep(1) + + # Check if already authorized (either "Active" badge or Step 1 "Complete" badge) + try: + # Check for "Active" badge (fully configured state) + active_badge = page.get_by_text("Active", exact=True) + if await active_badge.count() > 0 and await active_badge.is_visible(): + logger.info(f"✓ Already fully authorized for {username} (Active badge)") + return True + except Exception: + pass + + try: + step1_section = page.locator('h4:has-text("Step 1")') + if await step1_section.count() > 0: + # Look for "Complete" text in the Step 1 section's parent + step1_parent = step1_section.locator("..") + complete_badge = step1_parent.get_by_text("Complete", exact=True) + if await complete_badge.count() > 0 and await complete_badge.is_visible(): + logger.info(f"✓ Step 1 already complete for {username}") + return True + except Exception: + pass + + # Find and click the "Authorize" button + authorize_button = page.locator('a.button.primary:has-text("Authorize")') + + try: + await authorize_button.wait_for(timeout=5000, state="visible") + logger.info(f"Found Authorize button for {username}") + except Exception: + # Take screenshot for debugging + screenshot_path = f"/tmp/astrolabe_no_authorize_button_{username}.png" + await page.screenshot(path=screenshot_path) + logger.error( + f"Could not find Authorize button for {username}. Screenshot: {screenshot_path}" + ) + raise ValueError(f"Authorize button not found for {username}") + + # Click the Authorize button - this will redirect to OAuth provider + # Use force=True to bypass stability check which can timeout due to CSS transitions + await authorize_button.click(force=True) + logger.info(f"Clicked Authorize button for {username}") + + # Wait for OAuth redirect to complete + await page.wait_for_load_state("networkidle", timeout=30000) + logger.info(f"After networkidle, current URL: {page.url}") + + # Take screenshot to see current state + await page.screenshot(path=f"/tmp/astrolabe_after_authorize_{username}.png") + logger.info(f"Screenshot saved: /tmp/astrolabe_after_authorize_{username}.png") + + # Handle OIDC consent screen if present + consent_handled = await _handle_oauth_consent_screen(page, username) + if consent_handled: + logger.info(f"✓ OAuth consent granted for {username}") + else: + logger.info( + f"No consent screen required for {username} (may be previously authorized)" + ) + + # Wait for redirect back to Astrolabe settings + # The OAuth callback will redirect back to /settings/user/astrolabe + try: + await page.wait_for_url( + f"**{nextcloud_url}/settings/user/astrolabe**", timeout=30000 + ) + logger.info(f"Redirected back to Astrolabe settings for {username}") + except Exception: + # Check if we're already on settings page + if "/settings/user/astrolabe" not in page.url: + logger.warning( + f"Not redirected to Astrolabe settings, current URL: {page.url}" + ) + # Navigate manually + await page.goto( + f"{nextcloud_url}/settings/user/astrolabe", wait_until="networkidle" + ) + + # Wait for page to reload and render + await anyio.sleep(2) + + # Verify authorization completed - check for various success indicators + # When fully configured, shows "Active" badge; when only Step 1 done, shows "Complete" + try: + # First check if "Active" badge is shown (fully configured state) + active_badge = page.get_by_text("Active", exact=True) + if await active_badge.count() > 0 and await active_badge.is_visible(): + logger.info(f"✓ OAuth authorization complete for {username} (Active badge)") + return True + except Exception: + pass + + try: + # Check for Step 1 "Complete" badge (partial configuration) + step1_section = page.locator('h4:has-text("Step 1")') + if await step1_section.count() > 0: + step1_parent = step1_section.locator("..") + complete_badge = step1_parent.get_by_text("Complete", exact=True) + await complete_badge.wait_for(timeout=5000, state="visible") + logger.info(f"✓ Step 1 OAuth authorization complete for {username}") + return True + except Exception: + pass + + # Neither badge found - authorization failed + screenshot_path = f"/tmp/astrolabe_step1_not_complete_{username}.png" + await page.screenshot(path=screenshot_path) + logger.error( + f"Authorization badge not visible for {username}. Screenshot: {screenshot_path}" + ) + raise ValueError(f"OAuth authorization did not complete for {username}") + + +async def _handle_oauth_consent_screen(page: Page, username: str) -> bool: + """Handle the OIDC consent screen during OAuth flow. + + Reuses the proven pattern from tests/conftest.py. + + Args: + page: Playwright page instance + username: Username for logging + + Returns: + True if consent was handled, False if no consent screen was found + """ + try: + logger.info(f"Checking for consent screen at URL: {page.url}") + + # Check if consent screen is present - try multiple selectors + # The consent screen may be #oidc-consent or use a different format + consent_div = await page.query_selector("#oidc-consent") + + if consent_div: + logger.info(f"Consent screen detected via #oidc-consent for {username}") + # Get consent screen data attributes for logging + client_name = await consent_div.get_attribute("data-client-name") + scopes_attr = await consent_div.get_attribute("data-scopes") + logger.info(f" Client: {client_name}") + logger.info(f" Requested scopes: {scopes_attr}") + else: + # Check for Allow button directly (different consent screen format) + allow_button = page.locator('button:has-text("Allow")') + if await allow_button.count() > 0: + logger.info(f"Consent screen detected via Allow button for {username}") + else: + logger.info(f"No consent screen found for {username} at {page.url}") + await page.screenshot(path=f"/tmp/no_consent_screen_{username}.png") + logger.info(f"Screenshot: /tmp/no_consent_screen_{username}.png") + return False + + # Wait for Vue.js to render the Allow button + try: + await page.wait_for_selector('button:has-text("Allow")', timeout=10000) + logger.info(" Allow button rendered by Vue.js") + except Exception as e: + screenshot_path = f"/tmp/consent_no_allow_button_{username}.png" + await page.screenshot(path=screenshot_path) + logger.error(f" Timeout waiting for Allow button: {e}") + raise + + # Check all scope checkboxes + scope_checkboxes = await page.query_selector_all('input[type="checkbox"]') + if scope_checkboxes: + logger.info(f" Found {len(scope_checkboxes)} scope checkboxes") + for i, checkbox in enumerate(scope_checkboxes): + is_checked = await checkbox.is_checked() + is_disabled = await checkbox.is_disabled() + if not is_checked and not is_disabled: + await checkbox.check() + logger.info(f" ✓ Checked scope checkbox {i + 1}") + + # Click the Allow button using JavaScript (handles viewport issues) + allow_button_locator = page.locator('button:has-text("Allow")') + + # Debug: take screenshot before clicking Allow + await page.screenshot(path=f"/tmp/consent_before_allow_{username}.png") + logger.info( + f" Screenshot before Allow: /tmp/consent_before_allow_{username}.png" + ) + + button_count = await allow_button_locator.count() + logger.info(f" Found {button_count} Allow button(s)") + + if button_count > 0: + current_url = page.url + logger.info(f" Current URL: {current_url}") + logger.info(f" Clicking Allow button for {username}...") + + # Use JavaScript click to handle consent buttons (proven pattern from conftest.py) + # This is more reliable than Playwright's click for Vue.js rendered buttons + await page.evaluate( + """ + const buttons = document.querySelectorAll('button'); + for (const btn of buttons) { + if (btn.textContent.trim() === 'Allow') { + btn.click(); + break; + } + } + """ + ) + + # Wait for URL to change (Vue.js uses window.location.href after fetch) + # networkidle doesn't detect fetch-based redirects + try: + await page.wait_for_url( + lambda url: url != current_url, + timeout=30000, + ) + logger.info(f" URL changed to: {page.url}") + except Exception as wait_error: + # If URL didn't change, check console for errors + logger.warning(f" URL didn't change after click: {wait_error}") + await page.screenshot(path=f"/tmp/consent_after_allow_{username}.png") + + # Try alternative: manually POST consent and navigate + logger.info(" Trying manual consent submission...") + try: + redirect_url = await page.evaluate( + """ + async () => { + const selectedScopes = Array.from(document.querySelectorAll('input[type="checkbox"]:checked')) + .map(cb => cb.value).join(' '); + + const response = await fetch('/index.php/apps/oidc/consent/grant', { + method: 'POST', + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + 'requesttoken': OC.requestToken, + }, + body: 'scopes=' + encodeURIComponent(selectedScopes), + redirect: 'follow', + }); + + return response.url || '/index.php/apps/oidc/authorize'; + } + """ + ) + logger.info(f" Manual consent returned URL: {redirect_url}") + await page.goto(redirect_url, wait_until="networkidle") + except Exception as manual_error: + logger.error(f" Manual consent also failed: {manual_error}") + raise + + await page.screenshot(path=f"/tmp/consent_after_allow_{username}.png") + logger.info(f" Consent granted for {username}") + return True + else: + logger.error(f" Allow button not found for {username}") + return False + + except Exception as e: + logger.error(f"Error handling consent screen for {username}: {e}") + raise + + async def generate_app_password( page: Page, username: str, app_name: str = "Astrolabe Background Sync" ) -> str: @@ -105,16 +399,32 @@ async def generate_app_password( await anyio.sleep(1.0) logger.info("Waited for Vue.js to process input and enable button") - # Click the create button + # Click the create button - use force=True to bypass stability check (CSS transitions) create_button = page.locator( 'button[type="submit"]:has-text("Create new app password")' ) - await create_button.click() + try: + await create_button.click(force=True, timeout=10000) + except Exception: + # Fallback: JavaScript click + logger.info("Using JavaScript click for create button...") + await page.evaluate( + """ + const btn = document.querySelector('button[type="submit"]'); + if (btn) btn.click(); + """ + ) logger.info("Clicked create app password button") # Wait for app password to be generated and displayed in the dialog await anyio.sleep(3) # Give it more time to generate and display + # Debug screenshot after clicking create + await page.screenshot(path=f"/tmp/app_password_after_create_{username}.png") + logger.info( + f"Screenshot after create: /tmp/app_password_after_create_{username}.png" + ) + # Find the Login input field which should have the username value # Then find the Password input field which is in the same form app_password = None @@ -172,11 +482,11 @@ async def generate_app_password( f"✓ Generated app password for {username}: {app_password[:10]}... (validated)" ) - # Close the dialog by clicking the Close button - close_button = page.get_by_role("button", name="Close") - await close_button.click() + # Close dialog with Escape key (bypasses CSS layout issues with h2 intercepting clicks) + logger.info("Closing app password dialog with Escape key...") + await page.keyboard.press("Escape") + await anyio.sleep(0.5) # Wait for dialog close animation logger.info("Closed app password dialog") - await anyio.sleep(0.5) return app_password @@ -226,9 +536,9 @@ async def enable_background_sync_via_app_password( # Wait for page to load await anyio.sleep(1) - # Check if already active (look for "Active" text in the Background Sync Access section) + # Check if already complete (look for Step 2 "Complete" badge or overall "Active" state) try: - # The "Active" badge appears as a with text "Active" + # First check for overall "Active" badge (both steps complete) active_text = page.get_by_text("Active", exact=True) if await active_text.is_visible(timeout=2000): logger.info(f"✓ Background sync already active for {username}") @@ -236,6 +546,18 @@ async def enable_background_sync_via_app_password( except Exception: pass + try: + # Check for Step 2 "Complete" badge (app password already set) + step2_section = page.locator('h4:has-text("Step 2")') + if await step2_section.count() > 0: + step2_parent = step2_section.locator("..") + complete_badge = step2_parent.get_by_text("Complete", exact=True) + if await complete_badge.count() > 0 and await complete_badge.is_visible(): + logger.info(f"✓ Step 2 (app password) already complete for {username}") + return True + except Exception: + pass + # Find the app password input field using the placeholder text # Based on manual testing: textbox with placeholder "xxxxx-xxxxx-xxxxx-xxxxx-xxxxx" app_password_input = page.get_by_placeholder("xxxxx-xxxxx-xxxxx-xxxxx-xxxxx") @@ -319,21 +641,120 @@ async def enable_background_sync_via_app_password( except Exception: pass - # Verify "Active" text appears after reload + # Verify Step 2 "Complete" badge or overall "Active" badge appears after reload + try: + # First try to find "Active" badge (both steps complete) + active_text = page.get_by_text("Active", exact=True) + if await active_text.count() > 0: + await active_text.wait_for(timeout=5000, state="visible") + logger.info( + f"✓ Background sync enabled for {username} - Active badge visible" + ) + return True + except Exception: + pass + + try: + # Check for Step 2 "Complete" badge + step2_section = page.locator('h4:has-text("Step 2")') + if await step2_section.count() > 0: + step2_parent = step2_section.locator("..") + complete_badge = step2_parent.get_by_text("Complete", exact=True) + await complete_badge.wait_for(timeout=5000, state="visible") + logger.info( + f"✓ Step 2 (app password) enabled for {username} - Complete badge visible" + ) + return True + except Exception: + pass + + # If neither badge found, raise error + screenshot_path = f"/tmp/astrolabe_after_password_{username}.png" + await page.screenshot(path=screenshot_path) + logger.error( + f"Neither Active nor Complete badge appeared for {username}. " + f"Screenshot: {screenshot_path}" + ) + raise ValueError(f"Background sync setup did not complete for {username}") + + +async def complete_astrolabe_authorization( + page: Page, username: str, password: str +) -> dict: + """Complete full Astrolabe two-step authorization. + + Performs the complete authorization flow: + 1. Navigate to Astrolabe settings + 2. OAuth authorization (Step 1) if needed + 3. Generate app password in Security settings + 4. App password entry (Step 2) if needed + + Args: + page: Playwright page instance (must be logged in) + username: Nextcloud username + password: Nextcloud password (for reference, not used directly) + + Returns: + Dict with {"step1": bool, "step2": bool, "app_password": str | None} + """ + logger.info(f"Starting full Astrolabe authorization for {username}...") + + result = {"step1": False, "step2": False, "app_password": None} + + # Navigate to Astrolabe settings + await navigate_to_astrolabe_settings(page) + + # Step 1: OAuth authorization + try: + result["step1"] = await authorize_search_access(page, username) + logger.info(f"✓ Step 1 complete for {username}") + except Exception as e: + logger.error(f"Step 1 failed for {username}: {e}") + raise + + # Navigate back to settings if needed (OAuth might have redirected elsewhere) + if "/settings/user/astrolabe" not in page.url: + await navigate_to_astrolabe_settings(page) + + # Check if Step 2 is already complete + try: + step2_section = page.locator('h4:has-text("Step 2")') + if await step2_section.count() > 0: + step2_parent = step2_section.locator("..") + complete_badge = step2_parent.get_by_text("Complete", exact=True) + if await complete_badge.count() > 0 and await complete_badge.is_visible(): + logger.info(f"✓ Step 2 already complete for {username}") + result["step2"] = True + return result + except Exception: + pass + + # Also check for overall "Active" badge try: active_text = page.get_by_text("Active", exact=True) - await active_text.wait_for(timeout=5000, state="visible") - logger.info(f"✓ Background sync enabled for {username} - Active badge visible") - return True + if await active_text.count() > 0 and await active_text.is_visible(): + logger.info(f"✓ Authorization already fully active for {username}") + result["step2"] = True + return result except Exception: - # Take screenshot for debugging - screenshot_path = f"/tmp/astrolabe_after_password_{username}.png" - await page.screenshot(path=screenshot_path) - logger.error( - f"Active badge did not appear for {username}. Screenshot: {screenshot_path}" + pass + + # Step 2: Generate app password and enter it + app_password = await generate_app_password(page, username) + result["app_password"] = app_password + + try: + result["step2"] = await enable_background_sync_via_app_password( + page, username, app_password ) + logger.info(f"✓ Step 2 complete for {username}") + except Exception as e: + logger.error(f"Step 2 failed for {username}: {e}") raise + logger.info(f"✓ Full Astrolabe authorization complete for {username}") + return result + async def verify_app_password_created(username: str) -> bool: """Verify that background sync app password was stored for the user. diff --git a/tests/integration/test_astrolabe_plotly_visualization.py b/tests/integration/test_astrolabe_plotly_visualization.py new file mode 100644 index 0000000..2a9a93b --- /dev/null +++ b/tests/integration/test_astrolabe_plotly_visualization.py @@ -0,0 +1,371 @@ +"""Integration test for Astrolabe Plotly 3D visualization with multi-user BasicAuth mode. + +This test verifies that: +1. User can provision background sync access via app password +2. Content created via MCP tools is indexed by vector sync +3. Semantic search via Astrolabe UI returns results +4. Plotly 3D visualization container renders correctly + +Requires: +- docker-compose up -d app db mcp-multi-user-basic +- ENABLE_SEMANTIC_SEARCH=true on the mcp-multi-user-basic container +""" + +import base64 +import json +import logging +import re +import uuid + +import anyio +import pytest +from playwright.async_api import Page + +# Import helper functions from existing test +from tests.conftest import create_mcp_client_session +from tests.integration.test_astrolabe_multi_user_background_sync import ( + complete_astrolabe_authorization, + login_to_nextcloud, +) + +logger = logging.getLogger(__name__) + +pytestmark = [pytest.mark.integration, pytest.mark.oauth] + + +async def wait_for_vector_sync( + mcp_client, initial_indexed_count: int, timeout_seconds: int = 60 +) -> tuple[bool, dict | None]: + """Wait for vector sync to index new content. + + Args: + mcp_client: MCP client session + initial_indexed_count: Initial indexed document count before creating content + timeout_seconds: Maximum time to wait for sync + + Returns: + Tuple of (success, status_data) + """ + wait_interval = 2 + waited = 0 + status_data = None + + while waited < timeout_seconds: + sync_status = await mcp_client.call_tool("nc_get_vector_sync_status", {}) + if sync_status.isError: + logger.warning(f"Vector sync status error: {sync_status}") + return False, None + + status_data = json.loads(sync_status.content[0].text) + indexed_count = status_data.get("indexed_count", 0) + pending_count = status_data.get("pending_count", 1) + + logger.info( + f"Sync status at {waited}s: indexed={indexed_count}, " + f"pending={pending_count}, status={status_data.get('status')}" + ) + + if indexed_count > initial_indexed_count and pending_count == 0: + logger.info( + f"✓ Sync complete: {indexed_count} documents indexed " + f"(was {initial_indexed_count})" + ) + return True, status_data + + await anyio.sleep(wait_interval) + waited += wait_interval + + return False, status_data + + +async def navigate_to_astrolabe_main(page: Page): + """Navigate to Astrolabe main app page (Semantic Search section). + + Args: + page: Playwright page instance (must be authenticated) + """ + nextcloud_url = "http://localhost:8080" + + logger.info("Navigating to Astrolabe main app...") + await page.goto(f"{nextcloud_url}/apps/astrolabe", wait_until="networkidle") + + # Wait for the app to load + await anyio.sleep(1) + + logger.info("✓ Successfully loaded Astrolabe main app") + + +@pytest.mark.integration +@pytest.mark.oauth +@pytest.mark.timeout( + 300 +) # 5 minutes - this test involves OAuth, app password, and vector sync +async def test_astrolabe_plotly_visualization_with_basic_auth( + browser, + test_users_setup, + configure_astrolabe_for_mcp_server, +): + """Test Plotly 3D visualization in Astrolabe with multi-user BasicAuth mode. + + This test: + 1. Configures Astrolabe for the mcp-multi-user-basic service + 2. Provisions background sync access for alice via app password + 3. Creates a note with unique searchable content (as alice) + 4. Waits for vector sync to index the note + 5. Performs semantic search in Astrolabe UI + 6. Verifies the Plotly visualization renders and results are displayed + """ + # Phase 1: Configure Astrolabe for mcp-multi-user-basic + await configure_astrolabe_for_mcp_server( + mcp_server_internal_url="http://mcp-multi-user-basic:8000", + mcp_server_public_url="http://localhost:8003", + ) + + username = "alice" + password = test_users_setup[username]["password"] + note_id = None + unique_term = None + + # Create MCP client with alice's credentials for the multi-user BasicAuth server + credentials = base64.b64encode(f"{username}:{password}".encode()).decode("utf-8") + auth_header = f"Basic {credentials}" + + context = await browser.new_context(ignore_https_errors=True) + page = await context.new_page() + + try: + # Phase 2: Complete full Astrolabe authorization (OAuth + app password) + await login_to_nextcloud(page, username, password) + auth_result = await complete_astrolabe_authorization(page, username, password) + logger.info(f"Authorization result: {auth_result}") + + # Create MCP client session as alice - all MCP operations inside this block + async for alice_mcp_client in create_mcp_client_session( + url="http://localhost:8003/mcp", + headers={"Authorization": auth_header}, + client_name="Alice BasicAuth MCP", + ): + # Phase 3: Get initial indexed count + initial_sync = await alice_mcp_client.call_tool( + "nc_get_vector_sync_status", {} + ) + + if initial_sync.isError: + pytest.skip("Vector sync not enabled on mcp-multi-user-basic") + + initial_data = json.loads(initial_sync.content[0].text) + initial_count = initial_data.get("indexed_count", 0) + logger.info(f"Initial indexed count: {initial_count}") + + # Create note with unique searchable term + unique_term = f"plotly_viz_test_{uuid.uuid4().hex[:8]}" + note_response = await alice_mcp_client.call_tool( + "nc_notes_create_note", + { + "title": f"Visualization Test Note {unique_term}", + "content": f"""# Testing Plotly Visualization + +This note contains the unique term: {unique_term} + +It is used to test the 3D vector space visualization in the Astrolabe app. +The visualization should show this document as a point in PCA-reduced space. + +## Key Features +- Semantic search with embeddings +- PCA dimension reduction to 3D +- Interactive Plotly scatter3d plot +""", + "category": "Test", + }, + ) + + if note_response.isError: + pytest.fail(f"Failed to create test note: {note_response}") + + note_data = json.loads(note_response.content[0].text) + note_id = note_data.get("id") + logger.info(f"Created test note ID: {note_id}") + + # Phase 4: Wait for vector indexing + sync_complete, status = await wait_for_vector_sync( + alice_mcp_client, initial_count, timeout_seconds=90 + ) + assert sync_complete, f"Vector sync did not complete in time: {status}" + + # Phase 5: Navigate to Astrolabe and perform search + await navigate_to_astrolabe_main(page) + + # Fill search query - find the Astrolabe search input specifically + # The NcTextField component wraps the input in a div with class mcp-search-input + search_input = page.locator(".mcp-search-input input") + await search_input.wait_for(timeout=10000, state="visible") + await search_input.fill(unique_term) + logger.info(f"Entered search query: {unique_term}") + + # Trigger search by pressing Enter on the input field + # This is wired to performSearch via @keyup.enter in the Vue component + await search_input.press("Enter") + logger.info("Pressed Enter to trigger search") + + # Wait for loading to complete - watch for loading indicator to disappear + loading_indicator = page.locator(".mcp-loading") + try: + # If loading indicator appears, wait for it to disappear + if await loading_indicator.count() > 0: + await loading_indicator.wait_for(state="hidden", timeout=30000) + logger.info("Loading completed") + except Exception: + # Loading might be too fast to catch + pass + + # Brief wait for UI to settle + await anyio.sleep(1) + + # Take diagnostic screenshot + await page.screenshot(path="/tmp/astrolabe_search_after_click.png") + logger.info( + "Took diagnostic screenshot: /tmp/astrolabe_search_after_click.png" + ) + + # Wait for search results using text-based detection + # This is more reliable than class-based selectors + # The UI shows "N results" when search completes successfully + results_text_pattern = page.get_by_text(re.compile(r"\d+ results?")) + no_results_text = page.get_by_text("No results found") + error_note = page.locator(".mcp-error") + + # Wait for one of: results count, no results message, or error + try: + # Poll for results or error states (don't rely on Nextcloud core CSS classes) + found_state = False + for attempt in range(60): # 60 attempts, 500ms each = 30s total + if await error_note.count() > 0: + error_text = await error_note.text_content() + logger.error(f"Search error: {error_text}") + pytest.fail(f"Search failed with error: {error_text}") + + if await no_results_text.count() > 0: + logger.warning( + "No results found - vector sync may not have completed" + ) + await page.screenshot(path="/tmp/astrolabe_no_results.png") + pytest.fail( + f"Search returned no results for '{unique_term}'. " + "Check if vector sync completed for alice's content." + ) + + if await results_text_pattern.count() > 0: + results_text = await results_text_pattern.first.text_content() + logger.info(f"Found results: {results_text}") + found_state = True + break + + if attempt % 10 == 0: + logger.info( + f"Waiting for results... (attempt {attempt + 1}/60)" + ) + + await anyio.sleep(0.5) + + if not found_state: + await page.screenshot(path="/tmp/astrolabe_search_timeout.png") + page_content = await page.content() + logger.error(f"Search state not resolved. Page URL: {page.url}") + logger.error(f"Page content snippet: {page_content[:2000]}") + raise AssertionError("Search did not complete within timeout") + + except AssertionError: + raise # Re-raise AssertionError as-is + except Exception as e: + # Take another screenshot and get page content for debugging + await page.screenshot(path="/tmp/astrolabe_search_timeout.png") + page_content = await page.content() + logger.error(f"Search state not resolved. Page URL: {page.url}") + logger.error(f"Page content snippet: {page_content[:2000]}") + raise AssertionError(f"Search did not complete: {e}") + + logger.info("Results loaded") + + # Phase 6: Verify visualization + # Check Plotly container is visible + viz_plot = page.locator("#viz-plot") + await viz_plot.wait_for(timeout=15000, state="visible") + logger.info("Plotly container is visible") + + # Verify Plotly has rendered content (SVG/canvas elements inside) + has_viz_content = await page.evaluate( + """ + () => { + const plot = document.getElementById('viz-plot'); + if (!plot) return false; + // Plotly creates .plotly class, canvas, or svg elements + return plot.children.length > 0 || + plot.querySelector('.plotly, canvas, svg, .main-svg') !== null; + } + """ + ) + assert has_viz_content, "Plotly visualization did not render any content" + logger.info("✓ Plotly visualization rendered content") + + # Verify results are displayed + result_items = page.locator(".mcp-result-item") + result_count = await result_items.count() + assert result_count > 0, "No search results displayed" + logger.info(f"✓ Found {result_count} search result(s)") + + # Verify our note appears in results + found_note = False + for i in range(result_count): + item = result_items.nth(i) + title_elem = item.locator(".mcp-result-title") + title_text = await title_elem.text_content() + if title_text and unique_term in title_text: + found_note = True + logger.info(f"✓ Found test note in results: {title_text}") + break + + assert found_note, f"Created note with '{unique_term}' not found in results" + + # Optional: Take screenshot for verification + await page.screenshot(path="/tmp/astrolabe_plotly_test_success.png") + logger.info("✓ All Plotly visualization assertions passed") + + # Cleanup: delete the created note (inside the MCP client context) + if note_id: + try: + delete_response = await alice_mcp_client.call_tool( + "nc_notes_delete_note", {"note_id": note_id} + ) + if not delete_response.isError: + logger.info(f"✓ Cleaned up test note {note_id}") + note_id = None # Mark as cleaned + else: + logger.warning( + f"Failed to delete note {note_id}: {delete_response}" + ) + except Exception as e: + logger.warning(f"Cleanup failed for note {note_id}: {e}") + + finally: + # Cleanup note if not already cleaned (create new client for cleanup) + if note_id: + try: + async for cleanup_client in create_mcp_client_session( + url="http://localhost:8003/mcp", + headers={"Authorization": auth_header}, + client_name="Cleanup MCP", + ): + delete_response = await cleanup_client.call_tool( + "nc_notes_delete_note", {"note_id": note_id} + ) + if not delete_response.isError: + logger.info(f"✓ Cleaned up test note {note_id} (finally)") + else: + logger.warning( + f"Failed to delete note {note_id}: {delete_response}" + ) + except Exception as e: + logger.warning(f"Cleanup failed for note {note_id}: {e}") + + # Close browser context + await context.close() diff --git a/tests/unit/test_management_app_password_endpoints.py b/tests/unit/test_management_app_password_endpoints.py index c7ce9f1..ef16754 100644 --- a/tests/unit/test_management_app_password_endpoints.py +++ b/tests/unit/test_management_app_password_endpoints.py @@ -18,8 +18,8 @@ from starlette.applications import Starlette from starlette.routing import Route from starlette.testclient import TestClient -from nextcloud_mcp_server.api import management -from nextcloud_mcp_server.api.management import ( +from nextcloud_mcp_server.api import passwords +from nextcloud_mcp_server.api.passwords import ( delete_app_password, get_app_password_status, provision_app_password, @@ -32,9 +32,9 @@ pytestmark = pytest.mark.unit @pytest.fixture(autouse=True) def clear_rate_limit(): """Clear rate limit state before each test.""" - management._rate_limit_attempts.clear() + passwords._rate_limit_attempts.clear() yield - management._rate_limit_attempts.clear() + passwords._rate_limit_attempts.clear() @pytest.fixture @@ -199,7 +199,7 @@ async def test_provision_app_password_success(temp_storage, mocker): mock_client.__aexit__ = AsyncMock() mocker.patch( - "nextcloud_mcp_server.api.management.httpx.AsyncClient", + "nextcloud_mcp_server.api.passwords.httpx.AsyncClient", return_value=mock_client, ) @@ -243,7 +243,7 @@ async def test_provision_app_password_nextcloud_validation_fails(mocker): mock_client.__aexit__ = AsyncMock() mocker.patch( - "nextcloud_mcp_server.api.management.httpx.AsyncClient", + "nextcloud_mcp_server.api.passwords.httpx.AsyncClient", return_value=mock_client, ) @@ -362,7 +362,7 @@ async def test_delete_app_password_success(temp_storage, mocker): mock_client.__aexit__ = AsyncMock() mocker.patch( - "nextcloud_mcp_server.api.management.httpx.AsyncClient", + "nextcloud_mcp_server.api.passwords.httpx.AsyncClient", return_value=mock_client, ) @@ -406,7 +406,7 @@ async def test_delete_app_password_not_found(temp_storage, mocker): mock_client.__aexit__ = AsyncMock() mocker.patch( - "nextcloud_mcp_server.api.management.httpx.AsyncClient", + "nextcloud_mcp_server.api.passwords.httpx.AsyncClient", return_value=mock_client, ) @@ -445,7 +445,7 @@ async def test_delete_app_password_invalid_credentials(mocker): mock_client.__aexit__ = AsyncMock() mocker.patch( - "nextcloud_mcp_server.api.management.httpx.AsyncClient", + "nextcloud_mcp_server.api.passwords.httpx.AsyncClient", return_value=mock_client, ) @@ -515,7 +515,7 @@ async def test_provision_app_password_rate_limiting(mocker): mock_client.__aexit__ = AsyncMock() mocker.patch( - "nextcloud_mcp_server.api.management.httpx.AsyncClient", + "nextcloud_mcp_server.api.passwords.httpx.AsyncClient", return_value=mock_client, ) @@ -574,7 +574,7 @@ async def test_rate_limiting_is_per_user(mocker): mock_client.__aexit__ = AsyncMock() mocker.patch( - "nextcloud_mcp_server.api.management.httpx.AsyncClient", + "nextcloud_mcp_server.api.passwords.httpx.AsyncClient", return_value=mock_client, ) diff --git a/tests/unit/test_management_pdf_preview_endpoint.py b/tests/unit/test_management_pdf_preview_endpoint.py new file mode 100644 index 0000000..b0e8007 --- /dev/null +++ b/tests/unit/test_management_pdf_preview_endpoint.py @@ -0,0 +1,716 @@ +""" +Unit tests for Management API PDF preview endpoint. + +Tests the /api/v1/pdf-preview endpoint focusing on: +- Parameter validation (file_path, page, scale) +- OAuth token validation +- PDF rendering with PyMuPDF +- Error handling (file not found, invalid page, etc.) +""" + +import base64 +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from starlette.applications import Starlette +from starlette.routing import Route +from starlette.testclient import TestClient + +from nextcloud_mcp_server.api.visualization import get_pdf_preview + +pytestmark = pytest.mark.unit + + +def create_test_app(): + """Create a test Starlette app with the PDF preview endpoint.""" + app = Starlette( + routes=[ + Route("/api/v1/pdf-preview", get_pdf_preview, methods=["GET"]), + ] + ) + # Set up OAuth context (required by endpoint) + app.state.oauth_context = {"config": {"nextcloud_host": "http://localhost:8080"}} + return app + + +def create_mock_pdf_bytes(): + """Create a minimal valid PDF for testing.""" + # Minimal PDF structure that PyMuPDF can parse + # This is a 1-page PDF with a blank page + pdf_content = b"""%PDF-1.4 +1 0 obj +<< /Type /Catalog /Pages 2 0 R >> +endobj +2 0 obj +<< /Type /Pages /Kids [3 0 R] /Count 1 >> +endobj +3 0 obj +<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> +endobj +xref +0 4 +0000000000 65535 f +0000000009 00000 n +0000000058 00000 n +0000000115 00000 n +trailer +<< /Size 4 /Root 1 0 R >> +startxref +196 +%%EOF""" + return pdf_content + + +class TestPdfPreviewParameterValidation: + """Tests for parameter validation in PDF preview endpoint.""" + + def test_missing_file_path_returns_400(self): + """Test that missing file_path parameter returns 400.""" + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["success"] is False + assert "file_path" in data["error"].lower() + + def test_invalid_page_number_returns_400(self): + """Test that invalid page number (0 or negative) returns 400.""" + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + ): + app = create_test_app() + client = TestClient(app) + + # Test page=0 + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&page=0", + headers={"Authorization": "Bearer test-token"}, + ) + assert response.status_code == 400 + data = response.json() + assert data["success"] is False + assert "page" in data["error"].lower() + + # Test negative page + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&page=-1", + headers={"Authorization": "Bearer test-token"}, + ) + assert response.status_code == 400 + + def test_invalid_scale_returns_400(self): + """Test that scale outside valid range returns 400.""" + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + ): + app = create_test_app() + client = TestClient(app) + + # Test scale too small + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&scale=0.1", + headers={"Authorization": "Bearer test-token"}, + ) + assert response.status_code == 400 + data = response.json() + assert data["success"] is False + assert "scale" in data["error"].lower() + + # Test scale too large + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&scale=10.0", + headers={"Authorization": "Bearer test-token"}, + ) + assert response.status_code == 400 + + def test_non_numeric_page_returns_400(self): + """Test that non-numeric page parameter returns 400.""" + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&page=abc", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["success"] is False + + +class TestPdfPreviewAuthentication: + """Tests for authentication in PDF preview endpoint.""" + + def test_unauthorized_without_token_returns_401(self): + """Test that request without token returns 401.""" + with patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + side_effect=Exception("Invalid token"), + ): + app = create_test_app() + client = TestClient(app) + response = client.get("/api/v1/pdf-preview?file_path=/test.pdf") + + assert response.status_code == 401 + data = response.json() + assert data["success"] is False + + def test_unauthorized_with_invalid_token_returns_401(self): + """Test that request with invalid token returns 401.""" + with patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + side_effect=Exception("Token expired"), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf", + headers={"Authorization": "Bearer invalid-token"}, + ) + + assert response.status_code == 401 + data = response.json() + assert data["success"] is False + + +class TestPdfPreviewRendering: + """Tests for PDF rendering functionality.""" + + def test_successful_pdf_render(self): + """Test successful PDF page rendering.""" + pdf_bytes = create_mock_pdf_bytes() + + # Mock the WebDAV client + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock(return_value=(pdf_bytes, "application/pdf")) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&page=1&scale=1.0", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert "image" in data + assert data["page_number"] == 1 + assert data["total_pages"] == 1 + + # Verify image is valid base64 + try: + decoded = base64.b64decode(data["image"]) + # PNG magic bytes + assert decoded[:8] == b"\x89PNG\r\n\x1a\n" + except Exception as e: + pytest.fail(f"Image is not valid base64-encoded PNG: {e}") + + def test_page_out_of_range_returns_400(self): + """Test that requesting page beyond total pages returns 400.""" + pdf_bytes = create_mock_pdf_bytes() + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock(return_value=(pdf_bytes, "application/pdf")) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&page=999", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["success"] is False + assert "page" in data["error"].lower() + assert "999" in data["error"] + + def test_file_not_found_returns_404(self): + """Test that non-existent file returns 404.""" + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock( + side_effect=FileNotFoundError("File not found") + ) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/nonexistent.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 404 + data = response.json() + assert data["success"] is False + assert "not found" in data["error"].lower() + + def test_default_parameters(self): + """Test that default parameters (page=1, scale=2.0) are used.""" + pdf_bytes = create_mock_pdf_bytes() + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock(return_value=(pdf_bytes, "application/pdf")) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + # Only file_path, no page or scale + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["success"] is True + assert data["page_number"] == 1 # Default page + + +class TestPdfPreviewEdgeCases: + """Tests for edge cases in PDF preview endpoint.""" + + def test_url_encoded_file_path(self): + """Test that URL-encoded file paths are handled correctly.""" + pdf_bytes = create_mock_pdf_bytes() + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock(return_value=(pdf_bytes, "application/pdf")) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + # URL-encoded path with spaces + response = client.get( + "/api/v1/pdf-preview?file_path=/Documents/My%20File.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 200 + # Verify the path was passed correctly to WebDAV + mock_webdav.read_file.assert_called_once() + call_args = mock_webdav.read_file.call_args[0] + assert "My File.pdf" in call_args[0] + + def test_missing_nextcloud_host_config(self): + """Test handling when Nextcloud host is not configured.""" + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + ): + app = create_test_app() + # Override with empty config + app.state.oauth_context = {"config": {"nextcloud_host": ""}} + + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 500 + data = response.json() + assert data["success"] is False + + def test_corrupted_pdf_returns_400(self): + """Test that corrupted PDF data returns 400 with specific error.""" + mock_webdav = AsyncMock() + # Return invalid PDF bytes + mock_webdav.read_file = AsyncMock( + return_value=(b"not a valid pdf", "application/pdf") + ) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/corrupted.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["success"] is False + assert ( + "corrupted" in data["error"].lower() + or "invalid" in data["error"].lower() + ) + + def test_boundary_scale_values(self): + """Test boundary scale values (min and max).""" + pdf_bytes = create_mock_pdf_bytes() + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock(return_value=(pdf_bytes, "application/pdf")) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + + # Test minimum valid scale (0.5) + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&scale=0.5", + headers={"Authorization": "Bearer test-token"}, + ) + assert response.status_code == 200 + + # Test maximum valid scale (5.0) + response = client.get( + "/api/v1/pdf-preview?file_path=/test.pdf&scale=5.0", + headers={"Authorization": "Bearer test-token"}, + ) + assert response.status_code == 200 + + +class TestPdfPreviewSecurityValidation: + """Tests for security validations in PDF preview endpoint.""" + + def test_path_traversal_returns_400(self): + """Test that path traversal attempts are blocked with 400.""" + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + ): + app = create_test_app() + client = TestClient(app) + + # Test various path traversal patterns + traversal_paths = [ + "/Documents/../../../etc/passwd", + "/../secret.pdf", + "/folder/..%2F..%2Fetc/passwd", # URL-encoded + "/test/../secret.pdf", + ] + + for path in traversal_paths: + response = client.get( + f"/api/v1/pdf-preview?file_path={path}", + headers={"Authorization": "Bearer test-token"}, + ) + assert response.status_code == 400, ( + f"Path traversal not blocked: {path}" + ) + data = response.json() + assert data["success"] is False + assert "invalid file path" in data["error"].lower() + + def test_file_size_limit_exceeded_returns_413(self): + """Test that files exceeding 50MB limit return 413.""" + # Create bytes larger than 50MB limit + large_pdf_bytes = b"x" * (51 * 1024 * 1024) # 51 MB + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock( + return_value=(large_pdf_bytes, "application/pdf") + ) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/large.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 413 + data = response.json() + assert data["success"] is False + assert "size limit" in data["error"].lower() + + def test_corrupted_pdf_returns_400(self): + """Test that corrupted PDF returns 400 with specific error message.""" + # Invalid PDF content that PyMuPDF cannot parse + corrupted_pdf_bytes = b"not a valid PDF file content" + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock( + return_value=(corrupted_pdf_bytes, "application/pdf") + ) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/corrupted.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["success"] is False + assert ( + "corrupted" in data["error"].lower() + or "invalid" in data["error"].lower() + ) + + def test_empty_pdf_returns_400(self): + """Test that empty PDF file returns 400.""" + empty_pdf_bytes = b"" + + mock_webdav = AsyncMock() + mock_webdav.read_file = AsyncMock( + return_value=(empty_pdf_bytes, "application/pdf") + ) + + mock_nc_client = MagicMock() + mock_nc_client.webdav = mock_webdav + mock_nc_client.__aenter__ = AsyncMock(return_value=mock_nc_client) + mock_nc_client.__aexit__ = AsyncMock(return_value=None) + + with ( + patch( + "nextcloud_mcp_server.api.visualization.validate_token_and_get_user", + new_callable=AsyncMock, + return_value=("testuser", True), + ), + patch( + "nextcloud_mcp_server.api.visualization.extract_bearer_token", + return_value="test-token", + ), + patch( + "nextcloud_mcp_server.client.NextcloudClient.from_token", + return_value=mock_nc_client, + ), + ): + app = create_test_app() + client = TestClient(app) + response = client.get( + "/api/v1/pdf-preview?file_path=/empty.pdf", + headers={"Authorization": "Bearer test-token"}, + ) + + assert response.status_code == 400 + data = response.json() + assert data["success"] is False diff --git a/third_party/astrolabe/appinfo/routes.php b/third_party/astrolabe/appinfo/routes.php index 5f969c8..35d0c8d 100644 --- a/third_party/astrolabe/appinfo/routes.php +++ b/third_party/astrolabe/appinfo/routes.php @@ -72,6 +72,11 @@ return [ 'url' => '/api/chunk-context', 'verb' => 'GET', ], + [ + 'name' => 'api#pdfPreview', + 'url' => '/api/pdf-preview', + 'verb' => 'GET', + ], // Admin settings routes [ diff --git a/third_party/astrolabe/lib/Controller/ApiController.php b/third_party/astrolabe/lib/Controller/ApiController.php index a2139b9..91535a3 100644 --- a/third_party/astrolabe/lib/Controller/ApiController.php +++ b/third_party/astrolabe/lib/Controller/ApiController.php @@ -152,10 +152,11 @@ class ApiController extends Controller { $userId = $user->getUID(); // Create refresh callback that calls IdP directly - $refreshCallback = function (string $refreshToken) { + /** @return array{access_token: string, refresh_token: string, expires_in: int}|null */ + $refreshCallback = function (string $refreshToken): ?array { $newTokenData = $this->tokenRefresher->refreshAccessToken($refreshToken); - if (!$newTokenData) { + if ($newTokenData === null) { return null; } @@ -168,7 +169,7 @@ class ApiController extends Controller { // Get user's OAuth token for MCP server with automatic refresh $accessToken = $this->tokenStorage->getAccessToken($userId, $refreshCallback); - if (!$accessToken) { + if ($accessToken === null) { return new JSONResponse([ 'success' => false, 'error' => 'MCP server authorization required. Please authorize the app first.' @@ -417,10 +418,11 @@ class ApiController extends Controller { $userId = $user->getUID(); // Create refresh callback - $refreshCallback = function (string $refreshToken) { + /** @return array{access_token: string, refresh_token: string, expires_in: int}|null */ + $refreshCallback = function (string $refreshToken): ?array { $newTokenData = $this->tokenRefresher->refreshAccessToken($refreshToken); - if (!$newTokenData) { + if ($newTokenData === null) { return null; } @@ -433,7 +435,7 @@ class ApiController extends Controller { // Get access token with automatic refresh $accessToken = $this->tokenStorage->getAccessToken($userId, $refreshCallback); - if (!$accessToken) { + if ($accessToken === null) { return new JSONResponse([ 'success' => false, 'error' => 'MCP server authorization required' @@ -529,10 +531,11 @@ class ApiController extends Controller { $userId = $user->getUID(); // Create refresh callback - $refreshCallback = function (string $refreshToken) { + /** @return array{access_token: string, refresh_token: string, expires_in: int}|null */ + $refreshCallback = function (string $refreshToken): ?array { $newTokenData = $this->tokenRefresher->refreshAccessToken($refreshToken); - if (!$newTokenData) { + if ($newTokenData === null) { return null; } @@ -545,7 +548,7 @@ class ApiController extends Controller { // Get access token with automatic refresh $accessToken = $this->tokenStorage->getAccessToken($userId, $refreshCallback); - if (!$accessToken) { + if ($accessToken === null) { return new JSONResponse([ 'success' => false, 'error' => 'MCP server authorization required' @@ -628,10 +631,11 @@ class ApiController extends Controller { $userId = $user->getUID(); // Create refresh callback - $refreshCallback = function (string $refreshToken) { + /** @return array{access_token: string, refresh_token: string, expires_in: int}|null */ + $refreshCallback = function (string $refreshToken): ?array { $newTokenData = $this->tokenRefresher->refreshAccessToken($refreshToken); - if (!$newTokenData) { + if ($newTokenData === null) { return null; } @@ -644,7 +648,7 @@ class ApiController extends Controller { // Get access token with automatic refresh $accessToken = $this->tokenStorage->getAccessToken($userId, $refreshCallback); - if (!$accessToken) { + if ($accessToken === null) { return new JSONResponse([ 'success' => false, 'error' => 'MCP server authorization required' @@ -757,10 +761,11 @@ class ApiController extends Controller { $userId = $user->getUID(); // Create refresh callback - $refreshCallback = function (string $refreshToken) { + /** @return array{access_token: string, refresh_token: string, expires_in: int}|null */ + $refreshCallback = function (string $refreshToken): ?array { $newTokenData = $this->tokenRefresher->refreshAccessToken($refreshToken); - if (!$newTokenData) { + if ($newTokenData === null) { return null; } @@ -773,7 +778,7 @@ class ApiController extends Controller { // Get user's OAuth token for MCP server with automatic refresh $accessToken = $this->tokenStorage->getAccessToken($userId, $refreshCallback); - if (!$accessToken) { + if ($accessToken === null) { return new JSONResponse([ 'success' => false, 'error' => 'MCP server authorization required.' @@ -788,4 +793,62 @@ class ApiController extends Controller { return new JSONResponse($result); } + + /** + * Get PDF page preview (server-side rendered). + * + * AJAX endpoint for PDF viewer in semantic search UI. + * Uses server-side PyMuPDF rendering to avoid CSP/worker issues. + * + * @param string $file_path WebDAV path to PDF file + * @param int $page Page number (1-indexed, default: 1) + * @param float $scale Zoom factor (default: 2.0) + * @return JSONResponse + */ + #[NoAdminRequired] + public function pdfPreview( + string $file_path, + int $page = 1, + float $scale = 2.0, + ): JSONResponse { + $user = $this->userSession->getUser(); + if (!$user) { + return new JSONResponse(['success' => false, 'error' => 'User not authenticated'], Http::STATUS_UNAUTHORIZED); + } + + $userId = $user->getUID(); + + // Create refresh callback + /** @return array{access_token: string, refresh_token: string, expires_in: int}|null */ + $refreshCallback = function (string $refreshToken): ?array { + $newTokenData = $this->tokenRefresher->refreshAccessToken($refreshToken); + + if ($newTokenData === null) { + return null; + } + + return [ + 'access_token' => $newTokenData['access_token'], + 'refresh_token' => $newTokenData['refresh_token'] ?? $refreshToken, + 'expires_in' => $newTokenData['expires_in'] ?? 3600, + ]; + }; + + // Get user's OAuth token for MCP server with automatic refresh + $accessToken = $this->tokenStorage->getAccessToken($userId, $refreshCallback); + if ($accessToken === null) { + return new JSONResponse([ + 'success' => false, + 'error' => 'MCP server authorization required.' + ], Http::STATUS_UNAUTHORIZED); + } + + $result = $this->client->getPdfPreview($file_path, $page, $scale, $accessToken); + + if (isset($result['error'])) { + return new JSONResponse(['success' => false, 'error' => $result['error']], Http::STATUS_INTERNAL_SERVER_ERROR); + } + + return new JSONResponse($result); + } } diff --git a/third_party/astrolabe/lib/Controller/OAuthController.php b/third_party/astrolabe/lib/Controller/OauthController.php similarity index 99% rename from third_party/astrolabe/lib/Controller/OAuthController.php rename to third_party/astrolabe/lib/Controller/OauthController.php index aa21811..6172335 100644 --- a/third_party/astrolabe/lib/Controller/OAuthController.php +++ b/third_party/astrolabe/lib/Controller/OauthController.php @@ -32,7 +32,7 @@ use Psr\Log\LoggerInterface; * - Public clients: PKCE only * - Confidential clients: PKCE + client_secret (defense in depth) */ -class OAuthController extends Controller { +class OauthController extends Controller { private IConfig $config; private ISession $session; private IUserSession $userSession; diff --git a/third_party/astrolabe/lib/Service/McpServerClient.php b/third_party/astrolabe/lib/Service/McpServerClient.php index 4316ed9..73225b8 100644 --- a/third_party/astrolabe/lib/Service/McpServerClient.php +++ b/third_party/astrolabe/lib/Service/McpServerClient.php @@ -605,4 +605,62 @@ class McpServerClient { return ['error' => $e->getMessage()]; } } + + /** + * Get PDF page preview (server-side rendered). + * + * Renders a PDF page to PNG using PyMuPDF on the server. + * This avoids client-side PDF.js issues with CSP and ES private fields. + * + * Requires OAuth bearer token for authentication. + * + * @param string $filePath WebDAV path to PDF file + * @param int $page Page number (1-indexed) + * @param float $scale Zoom factor (default: 2.0) + * @param string $token OAuth bearer token + * @return array{ + * success?: bool, + * image?: string, + * page_number?: int, + * total_pages?: int, + * error?: string + * } + */ + public function getPdfPreview( + string $filePath, + int $page, + float $scale, + string $token, + ): array { + try { + $response = $this->httpClient->get( + $this->baseUrl . '/api/v1/pdf-preview', + [ + 'headers' => [ + 'Authorization' => 'Bearer ' . $token + ], + 'query' => [ + 'file_path' => $filePath, + 'page' => $page, + 'scale' => $scale, + ] + ] + ); + /** @var array{success?: bool, image?: string, page_number?: int, total_pages?: int, error?: string} $data */ + $data = json_decode((string)$response->getBody(), true); + + if (json_last_error() !== JSON_ERROR_NONE) { + throw new \RuntimeException('Invalid JSON response from server'); + } + + return $data; + } catch (\Exception $e) { + $this->logger->error('Failed to get PDF preview', [ + 'error' => $e->getMessage(), + 'file_path' => $filePath, + 'page' => $page, + ]); + return ['error' => $e->getMessage()]; + } + } } diff --git a/third_party/astrolabe/lib/Settings/Personal.php b/third_party/astrolabe/lib/Settings/Personal.php index a93ca7f..f6f38fd 100644 --- a/third_party/astrolabe/lib/Settings/Personal.php +++ b/third_party/astrolabe/lib/Settings/Personal.php @@ -86,7 +86,9 @@ class Personal implements ISettings { if ($authMode === 'multi_user_basic' && $supportsAppPasswords) { // Check both credentials $hasOAuthToken = ($token !== null && !$this->tokenStorage->isExpired($token)); - $hasAppPassword = $this->tokenStorage->hasBackgroundSyncAccess($userId); + // In hybrid mode, check specifically for app password (not general background access) + // because MCP server needs the app password for background sync + $hasAppPassword = ($this->tokenStorage->getBackgroundSyncPassword($userId) !== null); $backgroundSyncType = $this->tokenStorage->getBackgroundSyncType($userId); $backgroundSyncProvisionedAt = $this->tokenStorage->getBackgroundSyncProvisionedAt($userId); diff --git a/third_party/astrolabe/package-lock.json b/third_party/astrolabe/package-lock.json index 1a3fcd8..e212c6a 100644 --- a/third_party/astrolabe/package-lock.json +++ b/third_party/astrolabe/package-lock.json @@ -1,12 +1,12 @@ { "name": "astrolabe", - "version": "0.8.2", + "version": "0.8.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "astrolabe", - "version": "0.8.2", + "version": "0.8.3", "license": "AGPL-3.0-or-later", "dependencies": { "@nextcloud/axios": "^2.5.1", @@ -17,7 +17,7 @@ "@nextcloud/vue": "^9.3.3", "markdown-it": "^14.1.0", "pdfjs-dist": "^4.0.379", - "plotly.js-dist-min": "^2.35.3", + "plotly.js-dist-min": "^3.0.0", "vue": "^3.0.0", "vue-material-design-icons": "^5.3.1" }, @@ -7905,7 +7905,9 @@ } }, "node_modules/plotly.js-dist-min": { - "version": "2.35.3", + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/plotly.js-dist-min/-/plotly.js-dist-min-3.3.1.tgz", + "integrity": "sha512-ZxKM9DlEoEF3wBzGRPGHt6gWTJrm5N81J9AgX9UBX/Qjc9L4lRxtPBPq+RmBJWoA71j1X5Z1ouuguLkdoo88tg==", "license": "MIT" }, "node_modules/possible-typed-array-names": { diff --git a/third_party/astrolabe/package.json b/third_party/astrolabe/package.json index c319e1f..c7ec153 100644 --- a/third_party/astrolabe/package.json +++ b/third_party/astrolabe/package.json @@ -25,8 +25,7 @@ "@nextcloud/router": "^3.0.1", "@nextcloud/vue": "^9.3.3", "markdown-it": "^14.1.0", - "pdfjs-dist": "^4.0.379", - "plotly.js-dist-min": "^2.35.3", + "plotly.js-dist-min": "^3.0.0", "vue": "^3.0.0", "vue-material-design-icons": "^5.3.1" }, diff --git a/third_party/astrolabe/psalm-baseline.xml b/third_party/astrolabe/psalm-baseline.xml index cf66cc8..626e059 100644 --- a/third_party/astrolabe/psalm-baseline.xml +++ b/third_party/astrolabe/psalm-baseline.xml @@ -13,13 +13,6 @@ - - - - - - - @@ -62,16 +55,6 @@ - - - - - - - - - - @@ -106,7 +89,7 @@ - + @@ -175,7 +158,7 @@ - + diff --git a/third_party/astrolabe/src/App.vue b/third_party/astrolabe/src/App.vue index 4996699..6886cd9 100644 --- a/third_party/astrolabe/src/App.vue +++ b/third_party/astrolabe/src/App.vue @@ -394,18 +394,6 @@ import MarkdownViewer from './components/MarkdownViewer.vue' import axios from '@nextcloud/axios' import { generateUrl } from '@nextcloud/router' import Plotly from 'plotly.js-dist-min' -import * as pdfjsLib from 'pdfjs-dist' - -// Set worker source with error handling -try { - pdfjsLib.GlobalWorkerOptions.workerSrc = new URL( - 'pdfjs-dist/build/pdf.worker.mjs', - import.meta.url, - ).toString() -} catch (e) { - console.warn('Failed to set PDF.js worker, will use fallback', e) - // PDF.js will use fake worker automatically -} export default { name: 'App', @@ -615,7 +603,20 @@ export default { } } catch (err) { console.error('Search error:', err) - this.error = this.t('astrolabe', 'Network error. Please try again.') + // Check if this is an HTTP error with a response + if (err.response && err.response.data && err.response.data.error) { + // Use the specific error message from the backend + this.error = err.response.data.error + } else if (err.response && err.response.status === 401) { + // Unauthorized - user needs to authorize the app + this.error = this.t('astrolabe', 'Authorization required. Please complete Step 1 in Settings → Astrolabe.') + } else if (err.response && err.response.status === 503) { + // Service unavailable - MCP server not reachable + this.error = this.t('astrolabe', 'Search service unavailable. Please try again later.') + } else { + // Actual network error or unknown error + this.error = this.t('astrolabe', 'Network error. Please try again.') + } this.results = [] } finally { this.loading = false @@ -637,7 +638,14 @@ export default { } } catch (err) { console.error('Status error:', err) - this.statusError = this.t('astrolabe', 'Network error. Please try again.') + // Extract error message from response if available + if (err.response && err.response.data && err.response.data.error) { + this.statusError = err.response.data.error + } else if (err.response && err.response.status === 401) { + this.statusError = this.t('astrolabe', 'Authorization required. Please complete Step 1 in Settings → Astrolabe.') + } else { + this.statusError = this.t('astrolabe', 'Network error. Please try again.') + } } finally { this.statusLoading = false } @@ -749,7 +757,7 @@ export default { colorscale: 'Viridis', showscale: true, colorbar: { - title: 'Relative Score', + title: { text: 'Relative Score' }, x: 1.02, xanchor: 'left', thickness: 20, @@ -784,13 +792,13 @@ export default { } const layout = { - title: `Vector Space (PCA 3D) - ${results.length} results`, + title: { text: `Vector Space (PCA 3D) - ${results.length} results` }, width, height, scene: { - xaxis: { title: 'PC1' }, - yaxis: { title: 'PC2' }, - zaxis: { title: 'PC3' }, + xaxis: { title: { text: 'PC1' } }, + yaxis: { title: { text: 'PC2' } }, + zaxis: { title: { text: 'PC3' } }, camera: { eye: { x: 1.5, y: 1.5, z: 1.5 }, }, diff --git a/third_party/astrolabe/src/components/PDFViewer.vue b/third_party/astrolabe/src/components/PDFViewer.vue index 60d6611..e4081a1 100644 --- a/third_party/astrolabe/src/components/PDFViewer.vue +++ b/third_party/astrolabe/src/components/PDFViewer.vue @@ -8,15 +8,28 @@

{{ error }}

-
- +
+ PDF page
@@ -206,19 +160,19 @@ onBeforeUnmount(() => { } } -.pdf-canvas-container { +.pdf-image-container { position: relative; border: 1px solid var(--color-border); box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); background: var(--color-main-background); max-width: 100%; overflow: auto; +} - canvas { - display: block; - max-width: 100%; - height: auto; - } +.pdf-page-image { + display: block; + max-width: 100%; + height: auto; } @media (max-width: 768px) { diff --git a/third_party/astrolabe/templates/index.php b/third_party/astrolabe/templates/index.php index a2ca69f..8059c60 100644 --- a/third_party/astrolabe/templates/index.php +++ b/third_party/astrolabe/templates/index.php @@ -2,10 +2,11 @@ declare(strict_types=1); +use OCA\Astrolabe\AppInfo\Application; use OCP\Util; -Util::addScript(OCA\Astrolabe\AppInfo\Application::APP_ID, OCA\Astrolabe\AppInfo\Application::APP_ID . '-main'); -Util::addStyle(OCA\Astrolabe\AppInfo\Application::APP_ID, OCA\Astrolabe\AppInfo\Application::APP_ID . '-main'); +Util::addScript(Application::APP_ID, Application::APP_ID . '-main'); +Util::addStyle(Application::APP_ID, Application::APP_ID . '-main'); ?>