feat(astrolabe): add 3D PCA visualization for semantic search
- Add Plotly.js 3D scatter plot showing search results in PCA space - Create shared visualization.py module to avoid code duplication - Pass include_pca parameter through API chain to enable coordinates - Fix OAuth redirects to use /settings/user/astroglobe The visualization shows document embeddings projected to 3D via PCA, with the query point highlighted in red. Uses Viridis colorscale for score visualization, matching the existing vector-viz page. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,6 @@
|
||||
"""Management API for Nextcloud MCP Server.
|
||||
|
||||
Provides REST endpoints for the Nextcloud PHP app to query server status,
|
||||
user sessions, and vector sync metrics. All endpoints use OAuth bearer token
|
||||
authentication via the UnifiedTokenVerifier.
|
||||
"""
|
||||
@@ -0,0 +1,549 @@
|
||||
"""Management API endpoints for Nextcloud PHP app integration.
|
||||
|
||||
ADR-018: Provides REST API endpoints for the Nextcloud PHP app to query:
|
||||
- Server status and version
|
||||
- User session information and background access status
|
||||
- Vector sync metrics
|
||||
- Vector search for visualization
|
||||
|
||||
All endpoints use OAuth bearer token authentication via UnifiedTokenVerifier.
|
||||
The PHP app obtains tokens through PKCE flow and uses them to access these endpoints.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from importlib.metadata import version
|
||||
from typing import Any
|
||||
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import JSONResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Get package version from metadata
|
||||
__version__ = version("nextcloud-mcp-server")
|
||||
|
||||
# Track server start time for uptime calculation
|
||||
_server_start_time = time.time()
|
||||
|
||||
|
||||
def extract_bearer_token(request: Request) -> str | None:
|
||||
"""Extract OAuth bearer token from Authorization header.
|
||||
|
||||
Args:
|
||||
request: Starlette request
|
||||
|
||||
Returns:
|
||||
Token string or None if no valid Authorization header
|
||||
"""
|
||||
auth_header = request.headers.get("Authorization")
|
||||
if not auth_header:
|
||||
return None
|
||||
|
||||
# Parse "Bearer <token>"
|
||||
parts = auth_header.split()
|
||||
if len(parts) != 2 or parts[0].lower() != "bearer":
|
||||
return None
|
||||
|
||||
return parts[1]
|
||||
|
||||
|
||||
async def validate_token_and_get_user(
|
||||
request: Request,
|
||||
) -> tuple[str, dict[str, Any]]:
|
||||
"""Validate OAuth bearer token and extract user ID.
|
||||
|
||||
Args:
|
||||
request: Starlette request with Authorization header
|
||||
|
||||
Returns:
|
||||
Tuple of (user_id, validated_token_data)
|
||||
|
||||
Raises:
|
||||
Exception: If token is invalid or missing
|
||||
"""
|
||||
token = extract_bearer_token(request)
|
||||
if not token:
|
||||
raise ValueError("Missing Authorization header")
|
||||
|
||||
# Get token verifier from app state
|
||||
# Note: This is set in app.py starlette_lifespan for OAuth mode
|
||||
token_verifier = request.app.state.oauth_context["token_verifier"]
|
||||
|
||||
# Validate token (handles both JWT and opaque tokens)
|
||||
# verify_token returns AccessToken object or None
|
||||
access_token = await token_verifier.verify_token(token)
|
||||
|
||||
if not access_token:
|
||||
raise ValueError("Token validation failed")
|
||||
|
||||
# Extract user ID from AccessToken.resource field (set during verification)
|
||||
user_id = access_token.resource
|
||||
if not user_id:
|
||||
raise ValueError("Token missing user identifier")
|
||||
|
||||
# Return user_id and a dict with token info for compatibility
|
||||
validated = {
|
||||
"sub": user_id,
|
||||
"client_id": access_token.client_id,
|
||||
"scopes": access_token.scopes,
|
||||
"expires_at": access_token.expires_at,
|
||||
}
|
||||
|
||||
return user_id, validated
|
||||
|
||||
|
||||
async def get_server_status(request: Request) -> JSONResponse:
|
||||
"""GET /api/v1/status - Server status and version.
|
||||
|
||||
Returns basic server information including version, auth mode,
|
||||
vector sync status, and uptime.
|
||||
|
||||
Public endpoint - no authentication required.
|
||||
"""
|
||||
# Public endpoint - no authentication required
|
||||
|
||||
# Get configuration
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
# Calculate uptime
|
||||
uptime_seconds = int(time.time() - _server_start_time)
|
||||
|
||||
# Determine auth mode
|
||||
nextcloud_username = os.getenv("NEXTCLOUD_USERNAME")
|
||||
nextcloud_password = os.getenv("NEXTCLOUD_PASSWORD")
|
||||
|
||||
if nextcloud_username and nextcloud_password:
|
||||
auth_mode = "basic"
|
||||
else:
|
||||
auth_mode = "oauth"
|
||||
|
||||
response_data = {
|
||||
"version": __version__,
|
||||
"auth_mode": auth_mode,
|
||||
"vector_sync_enabled": settings.vector_sync_enabled,
|
||||
"uptime_seconds": uptime_seconds,
|
||||
"management_api_version": "1.0",
|
||||
}
|
||||
|
||||
# Include OIDC configuration if in OAuth mode
|
||||
if auth_mode == "oauth":
|
||||
# Provide IdP discovery information for NC PHP app
|
||||
oidc_config = {}
|
||||
|
||||
if settings.oidc_discovery_url:
|
||||
oidc_config["discovery_url"] = settings.oidc_discovery_url
|
||||
|
||||
if settings.oidc_issuer:
|
||||
oidc_config["issuer"] = settings.oidc_issuer
|
||||
|
||||
if oidc_config:
|
||||
response_data["oidc"] = oidc_config
|
||||
|
||||
return JSONResponse(response_data)
|
||||
|
||||
|
||||
async def get_vector_sync_status(request: Request) -> JSONResponse:
|
||||
"""GET /api/v1/vector-sync/status - Vector sync metrics.
|
||||
|
||||
Returns real-time indexing status and metrics.
|
||||
|
||||
Requires: VECTOR_SYNC_ENABLED=true
|
||||
|
||||
Public endpoint - no authentication required.
|
||||
"""
|
||||
# Public endpoint - no authentication required
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
if not settings.vector_sync_enabled:
|
||||
return JSONResponse(
|
||||
{"error": "Vector sync is disabled on this server"},
|
||||
status_code=404,
|
||||
)
|
||||
|
||||
try:
|
||||
# Get document receive stream from app state (set by starlette_lifespan in app.py)
|
||||
document_receive_stream = getattr(
|
||||
request.app.state, "document_receive_stream", None
|
||||
)
|
||||
|
||||
if document_receive_stream is None:
|
||||
logger.debug("document_receive_stream not available in app state")
|
||||
return JSONResponse(
|
||||
{
|
||||
"status": "unknown",
|
||||
"indexed_documents": 0,
|
||||
"pending_documents": 0,
|
||||
"message": "Vector sync stream not initialized",
|
||||
}
|
||||
)
|
||||
|
||||
# Get pending count from stream statistics
|
||||
stream_stats = document_receive_stream.statistics()
|
||||
pending_count = stream_stats.current_buffer_used
|
||||
|
||||
# Get Qdrant client and query indexed count
|
||||
indexed_count = 0
|
||||
try:
|
||||
from qdrant_client.models import Filter
|
||||
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
# Count documents in collection, excluding placeholders
|
||||
count_result = await qdrant_client.count(
|
||||
collection_name=settings.get_collection_name(),
|
||||
count_filter=Filter(must=[get_placeholder_filter()]),
|
||||
)
|
||||
indexed_count = count_result.count
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to query Qdrant for indexed count: {e}")
|
||||
# Continue with indexed_count = 0
|
||||
|
||||
# Determine status
|
||||
status = "syncing" if pending_count > 0 else "idle"
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"status": status,
|
||||
"indexed_documents": indexed_count,
|
||||
"pending_documents": pending_count,
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting vector sync status: {e}")
|
||||
return JSONResponse(
|
||||
{"error": "Internal error", "message": str(e)},
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
|
||||
async def get_user_session(request: Request) -> JSONResponse:
|
||||
"""GET /api/v1/users/{user_id}/session - User session details.
|
||||
|
||||
Returns information about the user's MCP session including:
|
||||
- Background access status (offline_access)
|
||||
- IdP profile information
|
||||
|
||||
Requires OAuth bearer token. The user_id in the path must match
|
||||
the user_id in the token.
|
||||
"""
|
||||
try:
|
||||
# Validate OAuth token and extract user
|
||||
token_user_id, validated = await validate_token_and_get_user(request)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unauthorized access to /api/v1/users/{{user_id}}/session: {e}")
|
||||
return JSONResponse(
|
||||
{"error": "Unauthorized", "message": str(e)},
|
||||
status_code=401,
|
||||
)
|
||||
|
||||
# Get user_id from path
|
||||
path_user_id = request.path_params.get("user_id")
|
||||
|
||||
# Verify token user matches requested user
|
||||
if token_user_id != path_user_id:
|
||||
logger.warning(
|
||||
f"User {token_user_id} attempted to access session for {path_user_id}"
|
||||
)
|
||||
return JSONResponse(
|
||||
{
|
||||
"error": "Forbidden",
|
||||
"message": "Cannot access another user's session",
|
||||
},
|
||||
status_code=403,
|
||||
)
|
||||
|
||||
# Check if offline access is enabled
|
||||
enable_offline_access = os.getenv("ENABLE_OFFLINE_ACCESS", "false").lower() in (
|
||||
"true",
|
||||
"1",
|
||||
"yes",
|
||||
)
|
||||
|
||||
if not enable_offline_access:
|
||||
# Offline access disabled - return minimal session info
|
||||
return JSONResponse(
|
||||
{
|
||||
"session_id": token_user_id,
|
||||
"background_access_granted": False,
|
||||
}
|
||||
)
|
||||
|
||||
# Get refresh token storage from app state
|
||||
storage = request.app.state.oauth_context.get("storage")
|
||||
if not storage:
|
||||
logger.error("Refresh token storage not available in app state")
|
||||
return JSONResponse(
|
||||
{
|
||||
"session_id": token_user_id,
|
||||
"background_access_granted": False,
|
||||
"error": "Storage not configured",
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
# Check if user has refresh token stored
|
||||
refresh_token_data = await storage.get_refresh_token(token_user_id)
|
||||
|
||||
if not refresh_token_data:
|
||||
# No refresh token - user hasn't provisioned background access
|
||||
return JSONResponse(
|
||||
{
|
||||
"session_id": token_user_id,
|
||||
"background_access_granted": False,
|
||||
}
|
||||
)
|
||||
|
||||
# User has background access - get profile info
|
||||
profile = await storage.get_user_profile(token_user_id)
|
||||
|
||||
response_data = {
|
||||
"session_id": token_user_id,
|
||||
"background_access_granted": True,
|
||||
"background_access_details": {
|
||||
"granted_at": refresh_token_data.get("created_at"),
|
||||
"scopes": refresh_token_data.get("scope", "").split(),
|
||||
},
|
||||
}
|
||||
|
||||
if profile:
|
||||
response_data["idp_profile"] = profile
|
||||
|
||||
return JSONResponse(response_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting user session for {token_user_id}: {e}")
|
||||
return JSONResponse(
|
||||
{"error": "Internal error", "message": str(e)},
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
|
||||
async def revoke_user_access(request: Request) -> JSONResponse:
|
||||
"""POST /api/v1/users/{user_id}/revoke - Revoke user's background access.
|
||||
|
||||
Deletes the user's stored refresh token, removing their offline access.
|
||||
|
||||
Requires OAuth bearer token. The user_id in the path must match
|
||||
the user_id in the token.
|
||||
"""
|
||||
try:
|
||||
# Validate OAuth token and extract user
|
||||
token_user_id, validated = await validate_token_and_get_user(request)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unauthorized access to /api/v1/users/{{user_id}}/revoke: {e}")
|
||||
return JSONResponse(
|
||||
{"error": "Unauthorized", "message": str(e)},
|
||||
status_code=401,
|
||||
)
|
||||
|
||||
# Get user_id from path
|
||||
path_user_id = request.path_params.get("user_id")
|
||||
|
||||
# Verify token user matches requested user
|
||||
if token_user_id != path_user_id:
|
||||
logger.warning(
|
||||
f"User {token_user_id} attempted to revoke access for {path_user_id}"
|
||||
)
|
||||
return JSONResponse(
|
||||
{
|
||||
"error": "Forbidden",
|
||||
"message": "Cannot revoke another user's access",
|
||||
},
|
||||
status_code=403,
|
||||
)
|
||||
|
||||
# Get refresh token storage from app state
|
||||
storage = request.app.state.oauth_context.get("storage")
|
||||
if not storage:
|
||||
logger.error("Refresh token storage not available in app state")
|
||||
return JSONResponse(
|
||||
{"error": "Storage not configured"},
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
try:
|
||||
# Delete refresh token
|
||||
await storage.delete_refresh_token(token_user_id)
|
||||
logger.info(f"Revoked background access for user: {token_user_id}")
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
"message": f"Background access revoked for {token_user_id}",
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error revoking access for {token_user_id}: {e}")
|
||||
return JSONResponse(
|
||||
{"error": "Internal error", "message": str(e)},
|
||||
status_code=500,
|
||||
)
|
||||
|
||||
|
||||
async def vector_search(request: Request) -> JSONResponse:
|
||||
"""POST /api/v1/vector-viz/search - Vector search for visualization.
|
||||
|
||||
Executes semantic search and returns results with optional PCA coordinates
|
||||
for 2D visualization.
|
||||
|
||||
Request body:
|
||||
{
|
||||
"query": "search query",
|
||||
"algorithm": "semantic|bm25|hybrid", // default: hybrid
|
||||
"limit": 10, // max: 50
|
||||
"include_pca": true, // whether to include 2D coordinates
|
||||
"doc_types": ["note", "file"] // optional filter by document types
|
||||
}
|
||||
|
||||
Requires OAuth bearer token for user filtering.
|
||||
"""
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
if not settings.vector_sync_enabled:
|
||||
return JSONResponse(
|
||||
{"error": "Vector sync is disabled on this server"},
|
||||
status_code=404,
|
||||
)
|
||||
|
||||
# Validate OAuth token and extract user
|
||||
try:
|
||||
user_id, _validated = await validate_token_and_get_user(request)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unauthorized access to /api/v1/vector-viz/search: {e}")
|
||||
return JSONResponse(
|
||||
{"error": "Unauthorized", "message": str(e)},
|
||||
status_code=401,
|
||||
)
|
||||
|
||||
try:
|
||||
# Parse request body
|
||||
body = await request.json()
|
||||
query = body.get("query", "")
|
||||
algorithm = body.get("algorithm", "hybrid")
|
||||
limit = min(body.get("limit", 10), 50) # Enforce max limit
|
||||
include_pca = body.get("include_pca", True)
|
||||
doc_types = body.get("doc_types") # Optional list of document types
|
||||
|
||||
if not query:
|
||||
return JSONResponse(
|
||||
{"error": "Missing required parameter: query"},
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
# Validate algorithm
|
||||
valid_algorithms = {"semantic", "bm25", "hybrid"}
|
||||
if algorithm not in valid_algorithms:
|
||||
algorithm = "hybrid"
|
||||
|
||||
# Execute search using the appropriate algorithm
|
||||
from nextcloud_mcp_server.search import (
|
||||
BM25HybridSearchAlgorithm,
|
||||
SemanticSearchAlgorithm,
|
||||
)
|
||||
|
||||
# Select search algorithm
|
||||
if algorithm == "semantic":
|
||||
search_algo = SemanticSearchAlgorithm(score_threshold=0.0)
|
||||
else:
|
||||
# Both "hybrid" and "bm25" use the BM25HybridSearchAlgorithm
|
||||
# which combines dense semantic and sparse BM25 vectors
|
||||
search_algo = BM25HybridSearchAlgorithm(score_threshold=0.0, fusion="rrf")
|
||||
|
||||
# Execute search for each doc_type if specified, otherwise search all
|
||||
all_results = []
|
||||
if doc_types and isinstance(doc_types, list):
|
||||
# Search each doc_type separately and merge results
|
||||
for doc_type in doc_types:
|
||||
if doc_type: # Skip empty strings
|
||||
results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=user_id,
|
||||
limit=limit,
|
||||
doc_type=doc_type,
|
||||
)
|
||||
all_results.extend(results)
|
||||
# Sort merged results by score and limit
|
||||
all_results.sort(key=lambda r: r.score, reverse=True)
|
||||
all_results = all_results[:limit]
|
||||
else:
|
||||
# Search all document types
|
||||
all_results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=user_id,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
# Format results for PHP client
|
||||
formatted_results = []
|
||||
for result in all_results:
|
||||
formatted_results.append(
|
||||
{
|
||||
"id": result.id,
|
||||
"doc_type": result.doc_type,
|
||||
"title": result.title,
|
||||
"excerpt": result.excerpt[:200] if result.excerpt else "",
|
||||
"score": result.score,
|
||||
"metadata": result.metadata,
|
||||
}
|
||||
)
|
||||
|
||||
response_data: dict[str, Any] = {
|
||||
"results": formatted_results,
|
||||
"algorithm_used": algorithm,
|
||||
"total_documents": len(formatted_results),
|
||||
}
|
||||
|
||||
# Compute PCA coordinates for visualization using shared function
|
||||
if include_pca and len(all_results) >= 2:
|
||||
try:
|
||||
from nextcloud_mcp_server.vector.visualization import (
|
||||
compute_pca_coordinates,
|
||||
)
|
||||
|
||||
# Get query embedding from search algorithm or generate it
|
||||
if search_algo.query_embedding is not None:
|
||||
query_embedding = search_algo.query_embedding
|
||||
else:
|
||||
from nextcloud_mcp_server.embedding.service import (
|
||||
get_embedding_service,
|
||||
)
|
||||
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
|
||||
pca_data = await compute_pca_coordinates(all_results, query_embedding)
|
||||
response_data["coordinates_3d"] = pca_data["coordinates_3d"]
|
||||
response_data["query_coords"] = pca_data["query_coords"]
|
||||
if "pca_variance" in pca_data:
|
||||
response_data["pca_variance"] = pca_data["pca_variance"]
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to compute PCA coordinates: {e}")
|
||||
response_data["coordinates_3d"] = []
|
||||
response_data["query_coords"] = []
|
||||
elif include_pca:
|
||||
# Not enough results for PCA
|
||||
response_data["coordinates_3d"] = []
|
||||
response_data["query_coords"] = []
|
||||
|
||||
return JSONResponse(response_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing vector search: {e}")
|
||||
return JSONResponse(
|
||||
{"error": "Internal error", "message": str(e)},
|
||||
status_code=500,
|
||||
)
|
||||
@@ -0,0 +1,190 @@
|
||||
"""Shared visualization utilities for PCA coordinate computation.
|
||||
|
||||
Extracts the PCA coordinate computation logic used by both:
|
||||
- viz_routes.py (session-based auth)
|
||||
- management.py (OAuth bearer token auth)
|
||||
|
||||
Both endpoints need to compute 3D PCA coordinates for search results,
|
||||
so this module provides the shared implementation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import anyio.to_thread
|
||||
import numpy as np
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.pca import PCA
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def compute_pca_coordinates(
|
||||
search_results: list[Any],
|
||||
query_embedding: np.ndarray | list[float],
|
||||
) -> dict[str, Any]:
|
||||
"""Compute PCA 3D coordinates for search results visualization.
|
||||
|
||||
This is the shared implementation used by both viz_routes.py and
|
||||
the management API. It retrieves vectors from Qdrant and applies
|
||||
PCA dimensionality reduction.
|
||||
|
||||
Args:
|
||||
search_results: List of SearchResult objects with point_id
|
||||
query_embedding: The query embedding vector
|
||||
|
||||
Returns:
|
||||
Dict with:
|
||||
- coordinates_3d: List of [x, y, z] for each result
|
||||
- query_coords: [x, y, z] for the query point
|
||||
- pca_variance: Dict with pc1, pc2, pc3 explained variance ratios
|
||||
"""
|
||||
settings = get_settings()
|
||||
|
||||
# Collect point IDs from search results for batch retrieval
|
||||
point_ids = [r.point_id for r in search_results if r.point_id]
|
||||
|
||||
if len(point_ids) < 2:
|
||||
return {"coordinates_3d": [], "query_coords": []}
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
# Batch retrieve vectors from Qdrant
|
||||
points_response = await qdrant_client.retrieve(
|
||||
collection_name=settings.get_collection_name(),
|
||||
ids=point_ids,
|
||||
with_vectors=["dense"],
|
||||
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
|
||||
)
|
||||
|
||||
# Build chunk_vectors_map from batch response
|
||||
chunk_vectors_map: dict[tuple[Any, Any, Any], Any] = {}
|
||||
for point in points_response:
|
||||
if point.vector is not None:
|
||||
# Extract dense vector (handle both named and unnamed vectors)
|
||||
if isinstance(point.vector, dict):
|
||||
vector = point.vector.get("dense")
|
||||
else:
|
||||
vector = point.vector
|
||||
|
||||
if vector is not None and point.payload:
|
||||
doc_id = point.payload.get("doc_id")
|
||||
chunk_start = point.payload.get("chunk_start_offset")
|
||||
chunk_end = point.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, chunk_start, chunk_end)
|
||||
chunk_vectors_map[chunk_key] = vector
|
||||
|
||||
if len(chunk_vectors_map) < 2:
|
||||
return {"coordinates_3d": [], "query_coords": []}
|
||||
|
||||
# Detect embedding dimension
|
||||
embedding_dim = None
|
||||
for vector in chunk_vectors_map.values():
|
||||
if vector is not None:
|
||||
embedding_dim = len(vector)
|
||||
break
|
||||
|
||||
if embedding_dim is None:
|
||||
return {"coordinates_3d": [], "query_coords": []}
|
||||
|
||||
logger.info(f"Detected embedding dimension: {embedding_dim}")
|
||||
|
||||
# Build chunk vectors array in search_results order (1:1 mapping)
|
||||
chunk_vectors = []
|
||||
for result in search_results:
|
||||
chunk_key = (result.id, result.chunk_start_offset, result.chunk_end_offset)
|
||||
if chunk_key in chunk_vectors_map:
|
||||
chunk_vectors.append(chunk_vectors_map[chunk_key])
|
||||
else:
|
||||
# Chunk not found in vectors (shouldn't happen)
|
||||
logger.warning(
|
||||
f"Chunk {chunk_key} not found in fetched vectors, using zero vector"
|
||||
)
|
||||
chunk_vectors.append(np.zeros(embedding_dim))
|
||||
|
||||
chunk_vectors = np.array(chunk_vectors)
|
||||
|
||||
# Ensure query_embedding is a numpy array
|
||||
if not isinstance(query_embedding, np.ndarray):
|
||||
query_embedding = np.array(query_embedding)
|
||||
|
||||
# Combine query vector with chunk vectors for PCA
|
||||
# Query will be the last point in the array
|
||||
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
|
||||
|
||||
# Normalize vectors to unit length (L2 normalization)
|
||||
# This is critical because Qdrant uses COSINE distance, which only measures
|
||||
# vector direction (angle), not magnitude. PCA uses Euclidean distance which
|
||||
# considers both direction and magnitude. By normalizing to unit length,
|
||||
# Euclidean distances in PCA space will match cosine distances.
|
||||
norms = np.linalg.norm(all_vectors, axis=1, keepdims=True)
|
||||
|
||||
# Check for zero-norm vectors (can happen with empty/corrupted embeddings)
|
||||
zero_norm_mask = norms[:, 0] < 1e-10
|
||||
if zero_norm_mask.any():
|
||||
zero_indices = np.where(zero_norm_mask)[0]
|
||||
logger.warning(
|
||||
f"Found {zero_norm_mask.sum()} zero-norm vectors at indices "
|
||||
f"{zero_indices.tolist()}. Replacing with small epsilon to avoid "
|
||||
"division by zero."
|
||||
)
|
||||
# Replace zero norms with small epsilon to avoid NaN
|
||||
norms[zero_norm_mask] = 1e-10
|
||||
|
||||
all_vectors_normalized = all_vectors / norms
|
||||
logger.info(
|
||||
f"Normalized vectors: query_norm={norms[-1][0]:.3f}, "
|
||||
f"doc_norm_range=[{norms[:-1].min():.3f}, {norms[:-1].max():.3f}]"
|
||||
)
|
||||
|
||||
# Apply PCA dimensionality reduction (768-dim → 3D)
|
||||
# Run in thread pool to avoid blocking the event loop (CPU-bound)
|
||||
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
|
||||
pca = PCA(n_components=3)
|
||||
coords = pca.fit_transform(vectors)
|
||||
return coords, pca
|
||||
|
||||
coords_3d, pca = await anyio.to_thread.run_sync(
|
||||
lambda: _compute_pca(all_vectors_normalized)
|
||||
)
|
||||
|
||||
# After fit, these attributes are guaranteed to be set
|
||||
assert pca.explained_variance_ratio_ is not None
|
||||
|
||||
# Check for NaN values in PCA output (numerical instability)
|
||||
nan_mask = np.isnan(coords_3d)
|
||||
if nan_mask.any():
|
||||
nan_rows = np.where(nan_mask.any(axis=1))[0]
|
||||
logger.error(
|
||||
f"Found NaN values in PCA output at {len(nan_rows)} points: "
|
||||
f"{nan_rows.tolist()[:10]}. Replacing NaN with 0.0 to prevent "
|
||||
"JSON serialization error."
|
||||
)
|
||||
# Replace NaN with 0 to allow JSON serialization
|
||||
coords_3d = np.nan_to_num(coords_3d, nan=0.0)
|
||||
|
||||
# Split query coords from chunk coords
|
||||
# Round to 2 decimal places for cleaner display
|
||||
query_coords_3d = [round(float(x), 2) for x in coords_3d[-1]] # Last point is query
|
||||
chunk_coords_3d = coords_3d[:-1] # All but last are chunks
|
||||
|
||||
logger.info(
|
||||
f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, "
|
||||
f"PC2={pca.explained_variance_ratio_[1]:.3f}, "
|
||||
f"PC3={pca.explained_variance_ratio_[2]:.3f}"
|
||||
)
|
||||
|
||||
# Coordinates already match search_results order (1:1 mapping)
|
||||
result_coords = [[round(float(x), 2) for x in coord] for coord in chunk_coords_3d]
|
||||
|
||||
return {
|
||||
"coordinates_3d": result_coords,
|
||||
"query_coords": query_coords_3d,
|
||||
"pca_variance": {
|
||||
"pc1": float(pca.explained_variance_ratio_[0]),
|
||||
"pc2": float(pca.explained_variance_ratio_[1]),
|
||||
"pc3": float(pca.explained_variance_ratio_[2]),
|
||||
},
|
||||
}
|
||||
+23
-7
@@ -61,7 +61,7 @@ class ApiController extends Controller {
|
||||
// Should not happen (NoAdminRequired ensures user is logged in)
|
||||
$this->logger->error('Revoke access called without authenticated user');
|
||||
return new RedirectResponse(
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp'])
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe'])
|
||||
);
|
||||
}
|
||||
|
||||
@@ -72,7 +72,7 @@ class ApiController extends Controller {
|
||||
if (!$token) {
|
||||
$this->logger->error("Cannot revoke access: No token found for user $userId");
|
||||
return new RedirectResponse(
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp'])
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe'])
|
||||
);
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ class ApiController extends Controller {
|
||||
|
||||
// Redirect back to personal settings
|
||||
return new RedirectResponse(
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp'])
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe'])
|
||||
);
|
||||
}
|
||||
|
||||
@@ -107,6 +107,7 @@ class ApiController extends Controller {
|
||||
* @param string $algorithm Search algorithm (semantic, bm25, hybrid)
|
||||
* @param int $limit Number of results (max 50)
|
||||
* @param string $doc_types Comma-separated document types (e.g., "note,file")
|
||||
* @param string $include_pca Whether to include PCA coordinates for visualization
|
||||
* @return JSONResponse
|
||||
*/
|
||||
#[NoAdminRequired]
|
||||
@@ -114,7 +115,8 @@ class ApiController extends Controller {
|
||||
string $query = '',
|
||||
string $algorithm = 'hybrid',
|
||||
int $limit = 10,
|
||||
string $doc_types = ''
|
||||
string $doc_types = '',
|
||||
string $include_pca = 'true'
|
||||
): JSONResponse {
|
||||
if (empty($query)) {
|
||||
return new JSONResponse([
|
||||
@@ -166,8 +168,11 @@ class ApiController extends Controller {
|
||||
}
|
||||
}
|
||||
|
||||
// Parse include_pca (string "true"/"false" from query params)
|
||||
$includePcaBool = in_array(strtolower($include_pca), ['true', '1', 'yes'], true);
|
||||
|
||||
// Execute search via MCP server with OAuth token
|
||||
$result = $this->client->search($query, $algorithm, $limit, false, $docTypesArray, $accessToken);
|
||||
$result = $this->client->search($query, $algorithm, $limit, $includePcaBool, $docTypesArray, $accessToken);
|
||||
|
||||
if (isset($result['error'])) {
|
||||
return new JSONResponse([
|
||||
@@ -176,12 +181,23 @@ class ApiController extends Controller {
|
||||
], Http::STATUS_INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
|
||||
return new JSONResponse([
|
||||
$response = [
|
||||
'success' => true,
|
||||
'results' => $result['results'] ?? [],
|
||||
'algorithm_used' => $result['algorithm_used'] ?? $algorithm,
|
||||
'total_documents' => $result['total_documents'] ?? 0,
|
||||
]);
|
||||
];
|
||||
|
||||
// Include PCA visualization coordinates if requested and available
|
||||
if ($includePcaBool) {
|
||||
$response['coordinates_3d'] = $result['coordinates_3d'] ?? [];
|
||||
$response['query_coords'] = $result['query_coords'] ?? [];
|
||||
if (isset($result['pca_variance'])) {
|
||||
$response['pca_variance'] = $result['pca_variance'];
|
||||
}
|
||||
}
|
||||
|
||||
return new JSONResponse($response);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -200,7 +200,7 @@ class OAuthController extends Controller {
|
||||
|
||||
// Redirect back to personal settings
|
||||
return new RedirectResponse(
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp'])
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe'])
|
||||
);
|
||||
} catch (\Exception $e) {
|
||||
$this->logger->error('OAuth callback failed', [
|
||||
@@ -215,7 +215,7 @@ class OAuthController extends Controller {
|
||||
// Redirect to settings with error
|
||||
return new RedirectResponse(
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', [
|
||||
'section' => 'mcp',
|
||||
'section' => 'astroglobe',
|
||||
'error' => urlencode($e->getMessage())
|
||||
])
|
||||
);
|
||||
@@ -234,7 +234,7 @@ class OAuthController extends Controller {
|
||||
$user = $this->userSession->getUser();
|
||||
if (!$user) {
|
||||
return new RedirectResponse(
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp'])
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe'])
|
||||
);
|
||||
}
|
||||
|
||||
@@ -250,7 +250,7 @@ class OAuthController extends Controller {
|
||||
}
|
||||
|
||||
return new RedirectResponse(
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'mcp'])
|
||||
$this->urlGenerator->linkToRoute('settings.PersonalSettings.index', ['section' => 'astroglobe'])
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
+7
@@ -13,6 +13,7 @@
|
||||
"@nextcloud/l10n": "^3.1.0",
|
||||
"@nextcloud/router": "^3.0.1",
|
||||
"@nextcloud/vue": "^8.29.2",
|
||||
"plotly.js-dist-min": "^2.35.3",
|
||||
"vue": "^2.7.16",
|
||||
"vue-material-design-icons": "^5.3.1"
|
||||
},
|
||||
@@ -9969,6 +9970,12 @@
|
||||
"pathe": "^2.0.3"
|
||||
}
|
||||
},
|
||||
"node_modules/plotly.js-dist-min": {
|
||||
"version": "2.35.3",
|
||||
"resolved": "https://registry.npmjs.org/plotly.js-dist-min/-/plotly.js-dist-min-2.35.3.tgz",
|
||||
"integrity": "sha512-sz2HLP8gkysLx/BanM2PtJTtZ1PLPwdHwMWNri2YxLBy3IOeuDsVQtlmWa4hoK3j/fi4naaD3uZJqH5ozM3zGg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/possible-typed-array-names": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz",
|
||||
|
||||
Vendored
+1
@@ -22,6 +22,7 @@
|
||||
"@nextcloud/l10n": "^3.1.0",
|
||||
"@nextcloud/router": "^3.0.1",
|
||||
"@nextcloud/vue": "^8.29.2",
|
||||
"plotly.js-dist-min": "^2.35.3",
|
||||
"vue": "^2.7.16",
|
||||
"vue-material-design-icons": "^5.3.1"
|
||||
},
|
||||
|
||||
Vendored
+190
-1
@@ -131,6 +131,22 @@
|
||||
<span class="mcp-algorithm-badge">{{ algorithmUsed }}</span>
|
||||
</div>
|
||||
|
||||
<!-- 3D Visualization -->
|
||||
<div v-if="coordinates.length > 0" class="mcp-viz-container">
|
||||
<div class="mcp-viz-header">
|
||||
<h3>{{ t('astroglobe', 'Vector Space Visualization') }}</h3>
|
||||
<NcCheckboxRadioSwitch
|
||||
:checked.sync="showQueryPoint"
|
||||
type="switch"
|
||||
@update:checked="updatePlot">
|
||||
{{ t('astroglobe', 'Show query point') }}
|
||||
</NcCheckboxRadioSwitch>
|
||||
</div>
|
||||
<div id="viz-plot-container" class="mcp-viz-plot-container">
|
||||
<div id="viz-plot" ref="vizPlot" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mcp-results-list">
|
||||
<div
|
||||
v-for="result in results"
|
||||
@@ -248,6 +264,7 @@ import Refresh from 'vue-material-design-icons/Refresh.vue'
|
||||
|
||||
import axios from '@nextcloud/axios'
|
||||
import { generateUrl } from '@nextcloud/router'
|
||||
import Plotly from 'plotly.js-dist-min'
|
||||
|
||||
// App name for translations
|
||||
const APP_NAME = 'astroglobe'
|
||||
@@ -287,6 +304,10 @@ export default {
|
||||
results: [],
|
||||
algorithmUsed: '',
|
||||
searched: false,
|
||||
// Visualization state
|
||||
coordinates: [],
|
||||
queryCoords: [],
|
||||
showQueryPoint: true,
|
||||
// Vector status state
|
||||
vectorStatus: null,
|
||||
statusLoading: false,
|
||||
@@ -325,6 +346,8 @@ export default {
|
||||
this.loading = true
|
||||
this.error = null
|
||||
this.searched = true
|
||||
this.coordinates = []
|
||||
this.queryCoords = []
|
||||
|
||||
try {
|
||||
const url = generateUrl('/apps/astroglobe/api/search')
|
||||
@@ -332,6 +355,7 @@ export default {
|
||||
query: queryText,
|
||||
algorithm: this.algorithm,
|
||||
limit: parseInt(this.limit) || 20,
|
||||
include_pca: true,
|
||||
}
|
||||
|
||||
if (this.selectedDocTypes.length > 0) {
|
||||
@@ -343,6 +367,15 @@ export default {
|
||||
if (response.data.success) {
|
||||
this.results = response.data.results || []
|
||||
this.algorithmUsed = response.data.algorithm_used || this.algorithm
|
||||
this.coordinates = response.data.coordinates_3d || []
|
||||
this.queryCoords = response.data.query_coords || []
|
||||
|
||||
// Render visualization after DOM updates
|
||||
if (this.coordinates.length > 0) {
|
||||
this.$nextTick(() => {
|
||||
this.renderPlot()
|
||||
})
|
||||
}
|
||||
} else {
|
||||
this.error = response.data.error || this.t('astroglobe', 'Search failed')
|
||||
this.results = []
|
||||
@@ -382,7 +415,130 @@ export default {
|
||||
},
|
||||
|
||||
goToSettings() {
|
||||
window.location.href = generateUrl('/settings/user/mcp')
|
||||
window.location.href = generateUrl('/settings/user/astroglobe')
|
||||
},
|
||||
|
||||
renderPlot() {
|
||||
const container = document.getElementById('viz-plot-container')
|
||||
if (!container) return
|
||||
|
||||
const width = container.clientWidth
|
||||
const height = container.clientHeight || 400
|
||||
|
||||
const coordinates = this.coordinates
|
||||
const queryCoords = this.queryCoords
|
||||
const results = this.results
|
||||
|
||||
const scores = results.map(r => r.score)
|
||||
|
||||
// Trace 1: Document results (always visible)
|
||||
const documentTrace = {
|
||||
x: coordinates.map(c => c[0]),
|
||||
y: coordinates.map(c => c[1]),
|
||||
z: coordinates.map(c => c[2]),
|
||||
mode: 'markers',
|
||||
type: 'scatter3d',
|
||||
name: 'Documents',
|
||||
visible: true,
|
||||
customdata: results.map((r, i) => ({
|
||||
title: r.title,
|
||||
raw_score: r.original_score || r.score,
|
||||
relative_score: r.score,
|
||||
x: coordinates[i][0],
|
||||
y: coordinates[i][1],
|
||||
z: coordinates[i][2],
|
||||
})),
|
||||
hovertemplate:
|
||||
'<b>%{customdata.title}</b><br>'
|
||||
+ 'Raw Score: %{customdata.raw_score:.3f} (%{customdata.relative_score:.0%} relative)<br>'
|
||||
+ '(x=%{customdata.x}, y=%{customdata.y}, z=%{customdata.z})'
|
||||
+ '<extra></extra>',
|
||||
marker: {
|
||||
size: results.map(r => 4 + (Math.pow(r.score, 2) * 10)),
|
||||
opacity: results.map(r => 0.3 + (r.score * 0.7)),
|
||||
color: scores,
|
||||
colorscale: 'Viridis',
|
||||
showscale: true,
|
||||
colorbar: {
|
||||
title: 'Relative Score',
|
||||
x: 1.02,
|
||||
xanchor: 'left',
|
||||
thickness: 20,
|
||||
len: 0.8,
|
||||
},
|
||||
cmin: 0,
|
||||
cmax: 1,
|
||||
},
|
||||
}
|
||||
|
||||
// Trace 2: Query point (visibility controlled by toggle)
|
||||
const queryTrace = {
|
||||
x: [queryCoords[0]],
|
||||
y: [queryCoords[1]],
|
||||
z: [queryCoords[2]],
|
||||
mode: 'markers',
|
||||
type: 'scatter3d',
|
||||
name: 'Query',
|
||||
visible: this.showQueryPoint,
|
||||
hovertemplate:
|
||||
'<b>Search Query</b><br>'
|
||||
+ `(x=${queryCoords[0]}, y=${queryCoords[1]}, z=${queryCoords[2]})`
|
||||
+ '<extra></extra>',
|
||||
marker: {
|
||||
size: 10,
|
||||
color: '#ef5350', // Subdued red (Material Design Red 400)
|
||||
line: {
|
||||
color: '#c62828', // Darker red border (Material Design Red 800)
|
||||
width: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
const layout = {
|
||||
title: `Vector Space (PCA 3D) - ${results.length} results`,
|
||||
width,
|
||||
height,
|
||||
scene: {
|
||||
xaxis: { title: 'PC1' },
|
||||
yaxis: { title: 'PC2' },
|
||||
zaxis: { title: 'PC3' },
|
||||
camera: {
|
||||
eye: { x: 1.5, y: 1.5, z: 1.5 },
|
||||
},
|
||||
domain: {
|
||||
x: [0, 1],
|
||||
y: [0, 1],
|
||||
},
|
||||
},
|
||||
hovermode: 'closest',
|
||||
autosize: true,
|
||||
showlegend: false,
|
||||
margin: { l: 0, r: 100, t: 40, b: 0 },
|
||||
}
|
||||
|
||||
const traces = [documentTrace, queryTrace]
|
||||
|
||||
const config = {
|
||||
responsive: true,
|
||||
displayModeBar: true,
|
||||
}
|
||||
|
||||
Plotly.newPlot('viz-plot', traces, layout, config)
|
||||
},
|
||||
|
||||
updatePlot() {
|
||||
// Toggle query point visibility without recreating the plot
|
||||
if (this.coordinates.length > 0 && this.queryCoords.length > 0 && this.results.length > 0) {
|
||||
const plotDiv = document.getElementById('viz-plot')
|
||||
|
||||
if (plotDiv && plotDiv.data && plotDiv.data.length >= 2) {
|
||||
// Trace index 1 is the query point
|
||||
Plotly.restyle('viz-plot', { visible: this.showQueryPoint }, [1])
|
||||
} else {
|
||||
// Plot doesn't exist yet, render it
|
||||
this.renderPlot()
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -478,6 +634,39 @@ export default {
|
||||
margin: 16px 0;
|
||||
}
|
||||
|
||||
// Visualization
|
||||
.mcp-viz-container {
|
||||
background: var(--color-background-hover);
|
||||
border-radius: var(--border-radius-large);
|
||||
padding: 16px;
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
|
||||
.mcp-viz-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 12px;
|
||||
|
||||
h3 {
|
||||
margin: 0;
|
||||
font-size: 16px;
|
||||
font-weight: 600;
|
||||
}
|
||||
}
|
||||
|
||||
.mcp-viz-plot-container {
|
||||
width: 100%;
|
||||
height: 400px;
|
||||
background: var(--color-main-background);
|
||||
border-radius: var(--border-radius);
|
||||
}
|
||||
|
||||
#viz-plot {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
// Results
|
||||
.mcp-results {
|
||||
margin-top: 24px;
|
||||
|
||||
Reference in New Issue
Block a user