From 97b48ca3dde175fae9841d85c872bb36322ac9d4 Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Sun, 14 Dec 2025 20:50:39 +0100 Subject: [PATCH] feat(astrolabe): add 3D PCA visualization for semantic search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Plotly.js 3D scatter plot showing search results in PCA space - Create shared visualization.py module to avoid code duplication - Pass include_pca parameter through API chain to enable coordinates - Fix OAuth redirects to use /settings/user/astroglobe The visualization shows document embeddings projected to 3D via PCA, with the query point highlighted in red. Uses Viridis colorscale for score visualization, matching the existing vector-viz page. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- nextcloud_mcp_server/api/__init__.py | 6 + nextcloud_mcp_server/api/management.py | 549 ++++++++++++++++++ nextcloud_mcp_server/vector/visualization.py | 190 ++++++ .../lib/Controller/ApiController.php | 30 +- .../lib/Controller/OAuthController.php | 8 +- third_party/astroglobe/package-lock.json | 7 + third_party/astroglobe/package.json | 1 + third_party/astroglobe/src/App.vue | 191 +++++- 8 files changed, 970 insertions(+), 12 deletions(-) create mode 100644 nextcloud_mcp_server/api/__init__.py create mode 100644 nextcloud_mcp_server/api/management.py create mode 100644 nextcloud_mcp_server/vector/visualization.py diff --git a/nextcloud_mcp_server/api/__init__.py b/nextcloud_mcp_server/api/__init__.py new file mode 100644 index 0000000..0b11b6d --- /dev/null +++ b/nextcloud_mcp_server/api/__init__.py @@ -0,0 +1,6 @@ +"""Management API for Nextcloud MCP Server. + +Provides REST endpoints for the Nextcloud PHP app to query server status, +user sessions, and vector sync metrics. All endpoints use OAuth bearer token +authentication via the UnifiedTokenVerifier. +""" diff --git a/nextcloud_mcp_server/api/management.py b/nextcloud_mcp_server/api/management.py new file mode 100644 index 0000000..84d6a3b --- /dev/null +++ b/nextcloud_mcp_server/api/management.py @@ -0,0 +1,549 @@ +"""Management API endpoints for Nextcloud PHP app integration. + +ADR-018: Provides REST API endpoints for the Nextcloud PHP app to query: +- Server status and version +- User session information and background access status +- Vector sync metrics +- Vector search for visualization + +All endpoints use OAuth bearer token authentication via UnifiedTokenVerifier. +The PHP app obtains tokens through PKCE flow and uses them to access these endpoints. +""" + +import logging +import os +import time +from importlib.metadata import version +from typing import Any + +from starlette.requests import Request +from starlette.responses import JSONResponse + +logger = logging.getLogger(__name__) + + +# Get package version from metadata +__version__ = version("nextcloud-mcp-server") + +# Track server start time for uptime calculation +_server_start_time = time.time() + + +def extract_bearer_token(request: Request) -> str | None: + """Extract OAuth bearer token from Authorization header. + + Args: + request: Starlette request + + Returns: + Token string or None if no valid Authorization header + """ + auth_header = request.headers.get("Authorization") + if not auth_header: + return None + + # Parse "Bearer " + parts = auth_header.split() + if len(parts) != 2 or parts[0].lower() != "bearer": + return None + + return parts[1] + + +async def validate_token_and_get_user( + request: Request, +) -> tuple[str, dict[str, Any]]: + """Validate OAuth bearer token and extract user ID. + + Args: + request: Starlette request with Authorization header + + Returns: + Tuple of (user_id, validated_token_data) + + Raises: + Exception: If token is invalid or missing + """ + token = extract_bearer_token(request) + if not token: + raise ValueError("Missing Authorization header") + + # Get token verifier from app state + # Note: This is set in app.py starlette_lifespan for OAuth mode + token_verifier = request.app.state.oauth_context["token_verifier"] + + # Validate token (handles both JWT and opaque tokens) + # verify_token returns AccessToken object or None + access_token = await token_verifier.verify_token(token) + + if not access_token: + raise ValueError("Token validation failed") + + # Extract user ID from AccessToken.resource field (set during verification) + user_id = access_token.resource + if not user_id: + raise ValueError("Token missing user identifier") + + # Return user_id and a dict with token info for compatibility + validated = { + "sub": user_id, + "client_id": access_token.client_id, + "scopes": access_token.scopes, + "expires_at": access_token.expires_at, + } + + return user_id, validated + + +async def get_server_status(request: Request) -> JSONResponse: + """GET /api/v1/status - Server status and version. + + Returns basic server information including version, auth mode, + vector sync status, and uptime. + + Public endpoint - no authentication required. + """ + # Public endpoint - no authentication required + + # Get configuration + from nextcloud_mcp_server.config import get_settings + + settings = get_settings() + + # Calculate uptime + uptime_seconds = int(time.time() - _server_start_time) + + # Determine auth mode + nextcloud_username = os.getenv("NEXTCLOUD_USERNAME") + nextcloud_password = os.getenv("NEXTCLOUD_PASSWORD") + + if nextcloud_username and nextcloud_password: + auth_mode = "basic" + else: + auth_mode = "oauth" + + response_data = { + "version": __version__, + "auth_mode": auth_mode, + "vector_sync_enabled": settings.vector_sync_enabled, + "uptime_seconds": uptime_seconds, + "management_api_version": "1.0", + } + + # Include OIDC configuration if in OAuth mode + if auth_mode == "oauth": + # Provide IdP discovery information for NC PHP app + oidc_config = {} + + if settings.oidc_discovery_url: + oidc_config["discovery_url"] = settings.oidc_discovery_url + + if settings.oidc_issuer: + oidc_config["issuer"] = settings.oidc_issuer + + if oidc_config: + response_data["oidc"] = oidc_config + + return JSONResponse(response_data) + + +async def get_vector_sync_status(request: Request) -> JSONResponse: + """GET /api/v1/vector-sync/status - Vector sync metrics. + + Returns real-time indexing status and metrics. + + Requires: VECTOR_SYNC_ENABLED=true + + Public endpoint - no authentication required. + """ + # Public endpoint - no authentication required + + from nextcloud_mcp_server.config import get_settings + + settings = get_settings() + if not settings.vector_sync_enabled: + return JSONResponse( + {"error": "Vector sync is disabled on this server"}, + status_code=404, + ) + + try: + # Get document receive stream from app state (set by starlette_lifespan in app.py) + document_receive_stream = getattr( + request.app.state, "document_receive_stream", None + ) + + if document_receive_stream is None: + logger.debug("document_receive_stream not available in app state") + return JSONResponse( + { + "status": "unknown", + "indexed_documents": 0, + "pending_documents": 0, + "message": "Vector sync stream not initialized", + } + ) + + # Get pending count from stream statistics + stream_stats = document_receive_stream.statistics() + pending_count = stream_stats.current_buffer_used + + # Get Qdrant client and query indexed count + indexed_count = 0 + try: + from qdrant_client.models import Filter + + from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter + from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client + + qdrant_client = await get_qdrant_client() + + # Count documents in collection, excluding placeholders + count_result = await qdrant_client.count( + collection_name=settings.get_collection_name(), + count_filter=Filter(must=[get_placeholder_filter()]), + ) + indexed_count = count_result.count + + except Exception as e: + logger.warning(f"Failed to query Qdrant for indexed count: {e}") + # Continue with indexed_count = 0 + + # Determine status + status = "syncing" if pending_count > 0 else "idle" + + return JSONResponse( + { + "status": status, + "indexed_documents": indexed_count, + "pending_documents": pending_count, + } + ) + + except Exception as e: + logger.error(f"Error getting vector sync status: {e}") + return JSONResponse( + {"error": "Internal error", "message": str(e)}, + status_code=500, + ) + + +async def get_user_session(request: Request) -> JSONResponse: + """GET /api/v1/users/{user_id}/session - User session details. + + Returns information about the user's MCP session including: + - Background access status (offline_access) + - IdP profile information + + Requires OAuth bearer token. The user_id in the path must match + the user_id in the token. + """ + try: + # Validate OAuth token and extract user + token_user_id, validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/users/{{user_id}}/session: {e}") + return JSONResponse( + {"error": "Unauthorized", "message": str(e)}, + status_code=401, + ) + + # Get user_id from path + path_user_id = request.path_params.get("user_id") + + # Verify token user matches requested user + if token_user_id != path_user_id: + logger.warning( + f"User {token_user_id} attempted to access session for {path_user_id}" + ) + return JSONResponse( + { + "error": "Forbidden", + "message": "Cannot access another user's session", + }, + status_code=403, + ) + + # Check if offline access is enabled + enable_offline_access = os.getenv("ENABLE_OFFLINE_ACCESS", "false").lower() in ( + "true", + "1", + "yes", + ) + + if not enable_offline_access: + # Offline access disabled - return minimal session info + return JSONResponse( + { + "session_id": token_user_id, + "background_access_granted": False, + } + ) + + # Get refresh token storage from app state + storage = request.app.state.oauth_context.get("storage") + if not storage: + logger.error("Refresh token storage not available in app state") + return JSONResponse( + { + "session_id": token_user_id, + "background_access_granted": False, + "error": "Storage not configured", + } + ) + + try: + # Check if user has refresh token stored + refresh_token_data = await storage.get_refresh_token(token_user_id) + + if not refresh_token_data: + # No refresh token - user hasn't provisioned background access + return JSONResponse( + { + "session_id": token_user_id, + "background_access_granted": False, + } + ) + + # User has background access - get profile info + profile = await storage.get_user_profile(token_user_id) + + response_data = { + "session_id": token_user_id, + "background_access_granted": True, + "background_access_details": { + "granted_at": refresh_token_data.get("created_at"), + "scopes": refresh_token_data.get("scope", "").split(), + }, + } + + if profile: + response_data["idp_profile"] = profile + + return JSONResponse(response_data) + + except Exception as e: + logger.error(f"Error getting user session for {token_user_id}: {e}") + return JSONResponse( + {"error": "Internal error", "message": str(e)}, + status_code=500, + ) + + +async def revoke_user_access(request: Request) -> JSONResponse: + """POST /api/v1/users/{user_id}/revoke - Revoke user's background access. + + Deletes the user's stored refresh token, removing their offline access. + + Requires OAuth bearer token. The user_id in the path must match + the user_id in the token. + """ + try: + # Validate OAuth token and extract user + token_user_id, validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/users/{{user_id}}/revoke: {e}") + return JSONResponse( + {"error": "Unauthorized", "message": str(e)}, + status_code=401, + ) + + # Get user_id from path + path_user_id = request.path_params.get("user_id") + + # Verify token user matches requested user + if token_user_id != path_user_id: + logger.warning( + f"User {token_user_id} attempted to revoke access for {path_user_id}" + ) + return JSONResponse( + { + "error": "Forbidden", + "message": "Cannot revoke another user's access", + }, + status_code=403, + ) + + # Get refresh token storage from app state + storage = request.app.state.oauth_context.get("storage") + if not storage: + logger.error("Refresh token storage not available in app state") + return JSONResponse( + {"error": "Storage not configured"}, + status_code=500, + ) + + try: + # Delete refresh token + await storage.delete_refresh_token(token_user_id) + logger.info(f"Revoked background access for user: {token_user_id}") + + return JSONResponse( + { + "success": True, + "message": f"Background access revoked for {token_user_id}", + } + ) + + except Exception as e: + logger.error(f"Error revoking access for {token_user_id}: {e}") + return JSONResponse( + {"error": "Internal error", "message": str(e)}, + status_code=500, + ) + + +async def vector_search(request: Request) -> JSONResponse: + """POST /api/v1/vector-viz/search - Vector search for visualization. + + Executes semantic search and returns results with optional PCA coordinates + for 2D visualization. + + Request body: + { + "query": "search query", + "algorithm": "semantic|bm25|hybrid", // default: hybrid + "limit": 10, // max: 50 + "include_pca": true, // whether to include 2D coordinates + "doc_types": ["note", "file"] // optional filter by document types + } + + Requires OAuth bearer token for user filtering. + """ + from nextcloud_mcp_server.config import get_settings + + settings = get_settings() + if not settings.vector_sync_enabled: + return JSONResponse( + {"error": "Vector sync is disabled on this server"}, + status_code=404, + ) + + # Validate OAuth token and extract user + try: + user_id, _validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/vector-viz/search: {e}") + return JSONResponse( + {"error": "Unauthorized", "message": str(e)}, + status_code=401, + ) + + try: + # Parse request body + body = await request.json() + query = body.get("query", "") + algorithm = body.get("algorithm", "hybrid") + limit = min(body.get("limit", 10), 50) # Enforce max limit + include_pca = body.get("include_pca", True) + doc_types = body.get("doc_types") # Optional list of document types + + if not query: + return JSONResponse( + {"error": "Missing required parameter: query"}, + status_code=400, + ) + + # Validate algorithm + valid_algorithms = {"semantic", "bm25", "hybrid"} + if algorithm not in valid_algorithms: + algorithm = "hybrid" + + # Execute search using the appropriate algorithm + from nextcloud_mcp_server.search import ( + BM25HybridSearchAlgorithm, + SemanticSearchAlgorithm, + ) + + # Select search algorithm + if algorithm == "semantic": + search_algo = SemanticSearchAlgorithm(score_threshold=0.0) + else: + # Both "hybrid" and "bm25" use the BM25HybridSearchAlgorithm + # which combines dense semantic and sparse BM25 vectors + search_algo = BM25HybridSearchAlgorithm(score_threshold=0.0, fusion="rrf") + + # Execute search for each doc_type if specified, otherwise search all + all_results = [] + if doc_types and isinstance(doc_types, list): + # Search each doc_type separately and merge results + for doc_type in doc_types: + if doc_type: # Skip empty strings + results = await search_algo.search( + query=query, + user_id=user_id, + limit=limit, + doc_type=doc_type, + ) + all_results.extend(results) + # Sort merged results by score and limit + all_results.sort(key=lambda r: r.score, reverse=True) + all_results = all_results[:limit] + else: + # Search all document types + all_results = await search_algo.search( + query=query, + user_id=user_id, + limit=limit, + ) + + # Format results for PHP client + formatted_results = [] + for result in all_results: + formatted_results.append( + { + "id": result.id, + "doc_type": result.doc_type, + "title": result.title, + "excerpt": result.excerpt[:200] if result.excerpt else "", + "score": result.score, + "metadata": result.metadata, + } + ) + + response_data: dict[str, Any] = { + "results": formatted_results, + "algorithm_used": algorithm, + "total_documents": len(formatted_results), + } + + # Compute PCA coordinates for visualization using shared function + if include_pca and len(all_results) >= 2: + try: + from nextcloud_mcp_server.vector.visualization import ( + compute_pca_coordinates, + ) + + # Get query embedding from search algorithm or generate it + if search_algo.query_embedding is not None: + query_embedding = search_algo.query_embedding + else: + from nextcloud_mcp_server.embedding.service import ( + get_embedding_service, + ) + + embedding_service = get_embedding_service() + query_embedding = await embedding_service.embed(query) + + pca_data = await compute_pca_coordinates(all_results, query_embedding) + response_data["coordinates_3d"] = pca_data["coordinates_3d"] + response_data["query_coords"] = pca_data["query_coords"] + if "pca_variance" in pca_data: + response_data["pca_variance"] = pca_data["pca_variance"] + except Exception as e: + logger.warning(f"Failed to compute PCA coordinates: {e}") + response_data["coordinates_3d"] = [] + response_data["query_coords"] = [] + elif include_pca: + # Not enough results for PCA + response_data["coordinates_3d"] = [] + response_data["query_coords"] = [] + + return JSONResponse(response_data) + + except Exception as e: + logger.error(f"Error executing vector search: {e}") + return JSONResponse( + {"error": "Internal error", "message": str(e)}, + status_code=500, + ) diff --git a/nextcloud_mcp_server/vector/visualization.py b/nextcloud_mcp_server/vector/visualization.py new file mode 100644 index 0000000..5ffb4bd --- /dev/null +++ b/nextcloud_mcp_server/vector/visualization.py @@ -0,0 +1,190 @@ +"""Shared visualization utilities for PCA coordinate computation. + +Extracts the PCA coordinate computation logic used by both: +- viz_routes.py (session-based auth) +- management.py (OAuth bearer token auth) + +Both endpoints need to compute 3D PCA coordinates for search results, +so this module provides the shared implementation. +""" + +import logging +from typing import Any + +import anyio.to_thread +import numpy as np + +from nextcloud_mcp_server.config import get_settings +from nextcloud_mcp_server.vector.pca import PCA +from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client + +logger = logging.getLogger(__name__) + + +async def compute_pca_coordinates( + search_results: list[Any], + query_embedding: np.ndarray | list[float], +) -> dict[str, Any]: + """Compute PCA 3D coordinates for search results visualization. + + This is the shared implementation used by both viz_routes.py and + the management API. It retrieves vectors from Qdrant and applies + PCA dimensionality reduction. + + Args: + search_results: List of SearchResult objects with point_id + query_embedding: The query embedding vector + + Returns: + Dict with: + - coordinates_3d: List of [x, y, z] for each result + - query_coords: [x, y, z] for the query point + - pca_variance: Dict with pc1, pc2, pc3 explained variance ratios + """ + settings = get_settings() + + # Collect point IDs from search results for batch retrieval + point_ids = [r.point_id for r in search_results if r.point_id] + + if len(point_ids) < 2: + return {"coordinates_3d": [], "query_coords": []} + + qdrant_client = await get_qdrant_client() + + # Batch retrieve vectors from Qdrant + points_response = await qdrant_client.retrieve( + collection_name=settings.get_collection_name(), + ids=point_ids, + with_vectors=["dense"], + with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"], + ) + + # Build chunk_vectors_map from batch response + chunk_vectors_map: dict[tuple[Any, Any, Any], Any] = {} + for point in points_response: + if point.vector is not None: + # Extract dense vector (handle both named and unnamed vectors) + if isinstance(point.vector, dict): + vector = point.vector.get("dense") + else: + vector = point.vector + + if vector is not None and point.payload: + doc_id = point.payload.get("doc_id") + chunk_start = point.payload.get("chunk_start_offset") + chunk_end = point.payload.get("chunk_end_offset") + chunk_key = (doc_id, chunk_start, chunk_end) + chunk_vectors_map[chunk_key] = vector + + if len(chunk_vectors_map) < 2: + return {"coordinates_3d": [], "query_coords": []} + + # Detect embedding dimension + embedding_dim = None + for vector in chunk_vectors_map.values(): + if vector is not None: + embedding_dim = len(vector) + break + + if embedding_dim is None: + return {"coordinates_3d": [], "query_coords": []} + + logger.info(f"Detected embedding dimension: {embedding_dim}") + + # Build chunk vectors array in search_results order (1:1 mapping) + chunk_vectors = [] + for result in search_results: + chunk_key = (result.id, result.chunk_start_offset, result.chunk_end_offset) + if chunk_key in chunk_vectors_map: + chunk_vectors.append(chunk_vectors_map[chunk_key]) + else: + # Chunk not found in vectors (shouldn't happen) + logger.warning( + f"Chunk {chunk_key} not found in fetched vectors, using zero vector" + ) + chunk_vectors.append(np.zeros(embedding_dim)) + + chunk_vectors = np.array(chunk_vectors) + + # Ensure query_embedding is a numpy array + if not isinstance(query_embedding, np.ndarray): + query_embedding = np.array(query_embedding) + + # Combine query vector with chunk vectors for PCA + # Query will be the last point in the array + all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])]) + + # Normalize vectors to unit length (L2 normalization) + # This is critical because Qdrant uses COSINE distance, which only measures + # vector direction (angle), not magnitude. PCA uses Euclidean distance which + # considers both direction and magnitude. By normalizing to unit length, + # Euclidean distances in PCA space will match cosine distances. + norms = np.linalg.norm(all_vectors, axis=1, keepdims=True) + + # Check for zero-norm vectors (can happen with empty/corrupted embeddings) + zero_norm_mask = norms[:, 0] < 1e-10 + if zero_norm_mask.any(): + zero_indices = np.where(zero_norm_mask)[0] + logger.warning( + f"Found {zero_norm_mask.sum()} zero-norm vectors at indices " + f"{zero_indices.tolist()}. Replacing with small epsilon to avoid " + "division by zero." + ) + # Replace zero norms with small epsilon to avoid NaN + norms[zero_norm_mask] = 1e-10 + + all_vectors_normalized = all_vectors / norms + logger.info( + f"Normalized vectors: query_norm={norms[-1][0]:.3f}, " + f"doc_norm_range=[{norms[:-1].min():.3f}, {norms[:-1].max():.3f}]" + ) + + # Apply PCA dimensionality reduction (768-dim → 3D) + # Run in thread pool to avoid blocking the event loop (CPU-bound) + def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]: + pca = PCA(n_components=3) + coords = pca.fit_transform(vectors) + return coords, pca + + coords_3d, pca = await anyio.to_thread.run_sync( + lambda: _compute_pca(all_vectors_normalized) + ) + + # After fit, these attributes are guaranteed to be set + assert pca.explained_variance_ratio_ is not None + + # Check for NaN values in PCA output (numerical instability) + nan_mask = np.isnan(coords_3d) + if nan_mask.any(): + nan_rows = np.where(nan_mask.any(axis=1))[0] + logger.error( + f"Found NaN values in PCA output at {len(nan_rows)} points: " + f"{nan_rows.tolist()[:10]}. Replacing NaN with 0.0 to prevent " + "JSON serialization error." + ) + # Replace NaN with 0 to allow JSON serialization + coords_3d = np.nan_to_num(coords_3d, nan=0.0) + + # Split query coords from chunk coords + # Round to 2 decimal places for cleaner display + query_coords_3d = [round(float(x), 2) for x in coords_3d[-1]] # Last point is query + chunk_coords_3d = coords_3d[:-1] # All but last are chunks + + logger.info( + f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, " + f"PC2={pca.explained_variance_ratio_[1]:.3f}, " + f"PC3={pca.explained_variance_ratio_[2]:.3f}" + ) + + # Coordinates already match search_results order (1:1 mapping) + result_coords = [[round(float(x), 2) for x in coord] for coord in chunk_coords_3d] + + return { + "coordinates_3d": result_coords, + "query_coords": query_coords_3d, + "pca_variance": { + "pc1": float(pca.explained_variance_ratio_[0]), + "pc2": float(pca.explained_variance_ratio_[1]), + "pc3": float(pca.explained_variance_ratio_[2]), + }, + } diff --git a/third_party/astroglobe/lib/Controller/ApiController.php b/third_party/astroglobe/lib/Controller/ApiController.php index 97dd4fc..24efc18 100644 --- a/third_party/astroglobe/lib/Controller/ApiController.php +++ b/third_party/astroglobe/lib/Controller/ApiController.php @@ -61,7 +61,7 @@ class ApiController extends Controller { // Should not happen (NoAdminRequired ensures user is logged in) $this->logger->error('Revoke access called without authenticated user'); return new RedirectResponse( - $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp']) + $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe']) ); } @@ -72,7 +72,7 @@ class ApiController extends Controller { if (!$token) { $this->logger->error("Cannot revoke access: No token found for user $userId"); return new RedirectResponse( - $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp']) + $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe']) ); } @@ -93,7 +93,7 @@ class ApiController extends Controller { // Redirect back to personal settings return new RedirectResponse( - $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp']) + $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe']) ); } @@ -107,6 +107,7 @@ class ApiController extends Controller { * @param string $algorithm Search algorithm (semantic, bm25, hybrid) * @param int $limit Number of results (max 50) * @param string $doc_types Comma-separated document types (e.g., "note,file") + * @param string $include_pca Whether to include PCA coordinates for visualization * @return JSONResponse */ #[NoAdminRequired] @@ -114,7 +115,8 @@ class ApiController extends Controller { string $query = '', string $algorithm = 'hybrid', int $limit = 10, - string $doc_types = '' + string $doc_types = '', + string $include_pca = 'true' ): JSONResponse { if (empty($query)) { return new JSONResponse([ @@ -166,8 +168,11 @@ class ApiController extends Controller { } } + // Parse include_pca (string "true"/"false" from query params) + $includePcaBool = in_array(strtolower($include_pca), ['true', '1', 'yes'], true); + // Execute search via MCP server with OAuth token - $result = $this->client->search($query, $algorithm, $limit, false, $docTypesArray, $accessToken); + $result = $this->client->search($query, $algorithm, $limit, $includePcaBool, $docTypesArray, $accessToken); if (isset($result['error'])) { return new JSONResponse([ @@ -176,12 +181,23 @@ class ApiController extends Controller { ], Http::STATUS_INTERNAL_SERVER_ERROR); } - return new JSONResponse([ + $response = [ 'success' => true, 'results' => $result['results'] ?? [], 'algorithm_used' => $result['algorithm_used'] ?? $algorithm, 'total_documents' => $result['total_documents'] ?? 0, - ]); + ]; + + // Include PCA visualization coordinates if requested and available + if ($includePcaBool) { + $response['coordinates_3d'] = $result['coordinates_3d'] ?? []; + $response['query_coords'] = $result['query_coords'] ?? []; + if (isset($result['pca_variance'])) { + $response['pca_variance'] = $result['pca_variance']; + } + } + + return new JSONResponse($response); } /** diff --git a/third_party/astroglobe/lib/Controller/OAuthController.php b/third_party/astroglobe/lib/Controller/OAuthController.php index c50f0f5..8b4a627 100644 --- a/third_party/astroglobe/lib/Controller/OAuthController.php +++ b/third_party/astroglobe/lib/Controller/OAuthController.php @@ -200,7 +200,7 @@ class OAuthController extends Controller { // Redirect back to personal settings return new RedirectResponse( - $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp']) + $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe']) ); } catch (\Exception $e) { $this->logger->error('OAuth callback failed', [ @@ -215,7 +215,7 @@ class OAuthController extends Controller { // Redirect to settings with error return new RedirectResponse( $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', [ - 'section' => 'mcp', + 'section' => 'astroglobe', 'error' => urlencode($e->getMessage()) ]) ); @@ -234,7 +234,7 @@ class OAuthController extends Controller { $user = $this->userSession->getUser(); if (!$user) { return new RedirectResponse( - $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp']) + $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe']) ); } @@ -250,7 +250,7 @@ class OAuthController extends Controller { } return new RedirectResponse( - $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp']) + $this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe']) ); } diff --git a/third_party/astroglobe/package-lock.json b/third_party/astroglobe/package-lock.json index 618867c..c7da5d4 100644 --- a/third_party/astroglobe/package-lock.json +++ b/third_party/astroglobe/package-lock.json @@ -13,6 +13,7 @@ "@nextcloud/l10n": "^3.1.0", "@nextcloud/router": "^3.0.1", "@nextcloud/vue": "^8.29.2", + "plotly.js-dist-min": "^2.35.3", "vue": "^2.7.16", "vue-material-design-icons": "^5.3.1" }, @@ -9969,6 +9970,12 @@ "pathe": "^2.0.3" } }, + "node_modules/plotly.js-dist-min": { + "version": "2.35.3", + "resolved": "https://registry.npmjs.org/plotly.js-dist-min/-/plotly.js-dist-min-2.35.3.tgz", + "integrity": "sha512-sz2HLP8gkysLx/BanM2PtJTtZ1PLPwdHwMWNri2YxLBy3IOeuDsVQtlmWa4hoK3j/fi4naaD3uZJqH5ozM3zGg==", + "license": "MIT" + }, "node_modules/possible-typed-array-names": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz", diff --git a/third_party/astroglobe/package.json b/third_party/astroglobe/package.json index 6250cb5..405c747 100644 --- a/third_party/astroglobe/package.json +++ b/third_party/astroglobe/package.json @@ -22,6 +22,7 @@ "@nextcloud/l10n": "^3.1.0", "@nextcloud/router": "^3.0.1", "@nextcloud/vue": "^8.29.2", + "plotly.js-dist-min": "^2.35.3", "vue": "^2.7.16", "vue-material-design-icons": "^5.3.1" }, diff --git a/third_party/astroglobe/src/App.vue b/third_party/astroglobe/src/App.vue index e180b81..9251df4 100644 --- a/third_party/astroglobe/src/App.vue +++ b/third_party/astroglobe/src/App.vue @@ -131,6 +131,22 @@ {{ algorithmUsed }} + +
+
+

{{ t('astroglobe', 'Vector Space Visualization') }}

+ + {{ t('astroglobe', 'Show query point') }} + +
+
+
+
+
+
0) { @@ -343,6 +367,15 @@ export default { if (response.data.success) { this.results = response.data.results || [] this.algorithmUsed = response.data.algorithm_used || this.algorithm + this.coordinates = response.data.coordinates_3d || [] + this.queryCoords = response.data.query_coords || [] + + // Render visualization after DOM updates + if (this.coordinates.length > 0) { + this.$nextTick(() => { + this.renderPlot() + }) + } } else { this.error = response.data.error || this.t('astroglobe', 'Search failed') this.results = [] @@ -382,7 +415,130 @@ export default { }, goToSettings() { - window.location.href = generateUrl('/settings/user/mcp') + window.location.href = generateUrl('/settings/user/astroglobe') + }, + + renderPlot() { + const container = document.getElementById('viz-plot-container') + if (!container) return + + const width = container.clientWidth + const height = container.clientHeight || 400 + + const coordinates = this.coordinates + const queryCoords = this.queryCoords + const results = this.results + + const scores = results.map(r => r.score) + + // Trace 1: Document results (always visible) + const documentTrace = { + x: coordinates.map(c => c[0]), + y: coordinates.map(c => c[1]), + z: coordinates.map(c => c[2]), + mode: 'markers', + type: 'scatter3d', + name: 'Documents', + visible: true, + customdata: results.map((r, i) => ({ + title: r.title, + raw_score: r.original_score || r.score, + relative_score: r.score, + x: coordinates[i][0], + y: coordinates[i][1], + z: coordinates[i][2], + })), + hovertemplate: + '%{customdata.title}
' + + 'Raw Score: %{customdata.raw_score:.3f} (%{customdata.relative_score:.0%} relative)
' + + '(x=%{customdata.x}, y=%{customdata.y}, z=%{customdata.z})' + + '', + marker: { + size: results.map(r => 4 + (Math.pow(r.score, 2) * 10)), + opacity: results.map(r => 0.3 + (r.score * 0.7)), + color: scores, + colorscale: 'Viridis', + showscale: true, + colorbar: { + title: 'Relative Score', + x: 1.02, + xanchor: 'left', + thickness: 20, + len: 0.8, + }, + cmin: 0, + cmax: 1, + }, + } + + // Trace 2: Query point (visibility controlled by toggle) + const queryTrace = { + x: [queryCoords[0]], + y: [queryCoords[1]], + z: [queryCoords[2]], + mode: 'markers', + type: 'scatter3d', + name: 'Query', + visible: this.showQueryPoint, + hovertemplate: + 'Search Query
' + + `(x=${queryCoords[0]}, y=${queryCoords[1]}, z=${queryCoords[2]})` + + '', + marker: { + size: 10, + color: '#ef5350', // Subdued red (Material Design Red 400) + line: { + color: '#c62828', // Darker red border (Material Design Red 800) + width: 1, + }, + }, + } + + const layout = { + title: `Vector Space (PCA 3D) - ${results.length} results`, + width, + height, + scene: { + xaxis: { title: 'PC1' }, + yaxis: { title: 'PC2' }, + zaxis: { title: 'PC3' }, + camera: { + eye: { x: 1.5, y: 1.5, z: 1.5 }, + }, + domain: { + x: [0, 1], + y: [0, 1], + }, + }, + hovermode: 'closest', + autosize: true, + showlegend: false, + margin: { l: 0, r: 100, t: 40, b: 0 }, + } + + const traces = [documentTrace, queryTrace] + + const config = { + responsive: true, + displayModeBar: true, + } + + Plotly.newPlot('viz-plot', traces, layout, config) + }, + + updatePlot() { + // Toggle query point visibility without recreating the plot + if (this.coordinates.length > 0 && this.queryCoords.length > 0 && this.results.length > 0) { + const plotDiv = document.getElementById('viz-plot') + + if (plotDiv && plotDiv.data && plotDiv.data.length >= 2) { + // Trace index 1 is the query point + Plotly.restyle('viz-plot', { visible: this.showQueryPoint }, [1]) + } else { + // Plot doesn't exist yet, render it + this.renderPlot() + } + } }, }, } @@ -478,6 +634,39 @@ export default { margin: 16px 0; } +// Visualization +.mcp-viz-container { + background: var(--color-background-hover); + border-radius: var(--border-radius-large); + padding: 16px; + margin-bottom: 24px; +} + +.mcp-viz-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 12px; + + h3 { + margin: 0; + font-size: 16px; + font-weight: 600; + } +} + +.mcp-viz-plot-container { + width: 100%; + height: 400px; + background: var(--color-main-background); + border-radius: var(--border-radius); +} + +#viz-plot { + width: 100%; + height: 100%; +} + // Results .mcp-results { margin-top: 24px;