From 73783b85d5bb86f0e69279cf6c0b6321ec0d7341 Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Sun, 14 Dec 2025 23:56:01 +0100 Subject: [PATCH] feat(astrolabe): use proper icons and thumbnails in unified search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve search result display to match Nextcloud's native search providers by using mimetype-specific icons and preview thumbnails. **File Results:** - Use preview thumbnails for images/PDFs (core.Preview API) - Use mimetype-specific icon classes (icon-pdf, icon-text, icon-image, etc.) - Detect folders and use icon-folder appropriately **Other Document Types:** - Notes: icon-notes - Deck Cards: icon-deck - Calendar: icon-calendar - News: icon-rss - Contacts: icon-contacts **API Changes:** - Management API now includes mime_type in search results - SemanticSearchProvider uses IMimeTypeDetector and IPreview services This makes Astroglobe search results visually consistent with Files, Notes, and other native providers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- nextcloud_mcp_server/api/management.py | 225 ++++++++++++++++++ .../lib/Search/SemanticSearchProvider.php | 60 +++-- 2 files changed, 271 insertions(+), 14 deletions(-) diff --git a/nextcloud_mcp_server/api/management.py b/nextcloud_mcp_server/api/management.py index 84d6a3b..46187a6 100644 --- a/nextcloud_mcp_server/api/management.py +++ b/nextcloud_mcp_server/api/management.py @@ -393,6 +393,231 @@ async def revoke_user_access(request: Request) -> JSONResponse: ) +async def unified_search(request: Request) -> JSONResponse: + """POST /api/v1/search - Search endpoint for Nextcloud Unified Search. + + Optimized search endpoint for the Nextcloud Unified Search provider + and other PHP app integrations. Returns results with metadata needed + for navigation to source documents. + + Request body: + { + "query": "search query", + "algorithm": "semantic|bm25|hybrid", // default: hybrid + "limit": 20, // max: 100 + "offset": 0, // pagination offset + "include_pca": false, // optional PCA coordinates + "include_chunks": true // include text snippets + } + + Response: + { + "results": [{ + "id": "doc123", + "doc_type": "note", + "title": "Document Title", + "excerpt": "Matching text snippet...", + "score": 0.85, + "path": "/path/to/file.txt", // for files + "board_id": 1, // for deck cards + "card_id": 42 + }], + "total_found": 150, + "algorithm_used": "hybrid" + } + + Requires OAuth bearer token for user filtering. + """ + from nextcloud_mcp_server.config import get_settings + + settings = get_settings() + if not settings.vector_sync_enabled: + return JSONResponse( + {"error": "Vector sync is disabled on this server"}, + status_code=404, + ) + + # Validate OAuth token and extract user + try: + user_id, _validated = await validate_token_and_get_user(request) + except Exception as e: + logger.warning(f"Unauthorized access to /api/v1/search: {e}") + return JSONResponse( + {"error": "Unauthorized", "message": str(e)}, + status_code=401, + ) + + try: + # Parse request body + body = await request.json() + query = body.get("query", "") + algorithm = body.get("algorithm", "hybrid") + fusion = body.get("fusion", "rrf") + score_threshold = body.get("score_threshold", 0.0) + limit = min(body.get("limit", 20), 100) # Enforce max limit + offset = body.get("offset", 0) + include_pca = body.get("include_pca", False) + include_chunks = body.get("include_chunks", True) + doc_types = body.get("doc_types") # Optional filter + + if not query: + return JSONResponse({"results": [], "total_found": 0}) + + # Validate algorithm + valid_algorithms = {"semantic", "bm25", "hybrid"} + if algorithm not in valid_algorithms: + algorithm = "hybrid" + + # Validate fusion method + valid_fusions = {"rrf", "dbsf"} + if fusion not in valid_fusions: + fusion = "rrf" + + # Validate score threshold + score_threshold = max(0.0, min(1.0, float(score_threshold))) + + # Execute search using the appropriate algorithm + from nextcloud_mcp_server.search import ( + BM25HybridSearchAlgorithm, + SemanticSearchAlgorithm, + ) + + # Select search algorithm + if algorithm == "semantic": + search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold) + else: + search_algo = BM25HybridSearchAlgorithm( + score_threshold=score_threshold, fusion=fusion + ) + + # Request extra results to handle offset + search_limit = limit + offset + + # Execute search + all_results = [] + if doc_types and isinstance(doc_types, list): + for doc_type in doc_types: + if doc_type: + results = await search_algo.search( + query=query, + user_id=user_id, + limit=search_limit, + doc_type=doc_type, + ) + all_results.extend(results) + all_results.sort(key=lambda r: r.score, reverse=True) + else: + all_results = await search_algo.search( + query=query, + user_id=user_id, + limit=search_limit, + ) + + # Deduplicate results by document (multiple chunks may come from same doc) + # Keep highest-scoring chunk per document + doc_map: dict[str, Any] = {} # key: "doc_type:id" -> best result + for result in all_results: + # Build document key from type and ID + doc_id = result.id + if result.metadata: + # Use note_id if present (for notes), otherwise use result.id + doc_id = result.metadata.get("note_id", result.id) + doc_key = f"{result.doc_type}:{doc_id}" + + # Keep only the highest-scoring chunk per document + if doc_key not in doc_map or result.score > doc_map[doc_key].score: + doc_map[doc_key] = result + + # Convert back to list and sort by score + deduplicated_results = sorted( + doc_map.values(), key=lambda r: r.score, reverse=True + ) + + # Calculate total and apply pagination (on deduplicated results) + total_found = len(deduplicated_results) + paginated_results = deduplicated_results[offset : offset + limit] + + # Format results for Unified Search + formatted_results = [] + for result in paginated_results: + # Get document ID (prefer note_id for notes) + doc_id = result.id + if result.metadata and "note_id" in result.metadata: + doc_id = result.metadata["note_id"] + + result_data: dict[str, Any] = { + "id": doc_id, + "doc_type": result.doc_type, + "title": result.title, + "score": result.score, + } + + # Include excerpt/chunk if requested (full content, no truncation) + if include_chunks and result.excerpt: + result_data["excerpt"] = result.excerpt + + # Include navigation metadata from result.metadata + if result.metadata: + # File path and mimetype for files + if "path" in result.metadata: + result_data["path"] = result.metadata["path"] + if "mime_type" in result.metadata: + result_data["mime_type"] = result.metadata["mime_type"] + + # Deck card navigation + if "board_id" in result.metadata: + result_data["board_id"] = result.metadata["board_id"] + if "card_id" in result.metadata: + result_data["card_id"] = result.metadata["card_id"] + + # Calendar event metadata + if "calendar_id" in result.metadata: + result_data["calendar_id"] = result.metadata["calendar_id"] + if "event_uid" in result.metadata: + result_data["event_uid"] = result.metadata["event_uid"] + + formatted_results.append(result_data) + + response_data: dict[str, Any] = { + "results": formatted_results, + "total_found": total_found, + "algorithm_used": algorithm, + } + + # Optional PCA coordinates + if include_pca and len(paginated_results) >= 2: + try: + from nextcloud_mcp_server.vector.visualization import ( + compute_pca_coordinates, + ) + + if search_algo.query_embedding is not None: + query_embedding = search_algo.query_embedding + else: + from nextcloud_mcp_server.embedding.service import ( + get_embedding_service, + ) + + embedding_service = get_embedding_service() + query_embedding = await embedding_service.embed(query) + + pca_data = await compute_pca_coordinates( + paginated_results, query_embedding + ) + response_data["pca_data"] = pca_data + except Exception as e: + logger.warning(f"Failed to compute PCA for unified search: {e}") + + return JSONResponse(response_data) + + except Exception as e: + logger.error(f"Error in unified search: {e}") + return JSONResponse( + {"error": "Internal error", "message": str(e)}, + status_code=500, + ) + + async def vector_search(request: Request) -> JSONResponse: """POST /api/v1/vector-viz/search - Vector search for visualization. diff --git a/third_party/astroglobe/lib/Search/SemanticSearchProvider.php b/third_party/astroglobe/lib/Search/SemanticSearchProvider.php index c19eb25..8256196 100644 --- a/third_party/astroglobe/lib/Search/SemanticSearchProvider.php +++ b/third_party/astroglobe/lib/Search/SemanticSearchProvider.php @@ -8,8 +8,11 @@ use OCA\Astroglobe\AppInfo\Application; use OCA\Astroglobe\Service\McpServerClient; use OCA\Astroglobe\Service\McpTokenStorage; use OCA\Astroglobe\Settings\Admin as AdminSettings; +use OCP\Files\FileInfo; +use OCP\Files\IMimeTypeDetector; use OCP\IConfig; use OCP\IL10N; +use OCP\IPreview; use OCP\IURLGenerator; use OCP\IUser; use OCP\Search\IProvider; @@ -35,6 +38,8 @@ class SemanticSearchProvider implements IProvider { private IConfig $config, private IL10N $l10n, private IURLGenerator $urlGenerator, + private IMimeTypeDetector $mimeTypeDetector, + private IPreview $previewManager, private LoggerInterface $logger, ) { } @@ -174,12 +179,14 @@ class SemanticSearchProvider implements IProvider { $title = $result['title'] ?? $this->l10n->t('Untitled'); $excerpt = $result['excerpt'] ?? ''; $score = $result['score'] ?? 0; + $id = $result['id'] ?? null; + $mimeType = $result['mime_type'] ?? null; // Build resource URL based on document type $resourceUrl = $this->buildResourceUrl($result); - // Build thumbnail URL based on document type - $thumbnailUrl = $this->buildThumbnailUrl($docType); + // Get icon and thumbnail based on document type + [$thumbnailUrl, $iconClass] = $this->getIconAndThumbnail($docType, $id, $mimeType); // Subline shows full excerpt if available, otherwise document type and score if (!empty($excerpt)) { @@ -199,7 +206,7 @@ class SemanticSearchProvider implements IProvider { $title, $subline, $resourceUrl, - '', // icon class (empty, using thumbnail) + $iconClass, false // not rounded ); } @@ -239,19 +246,44 @@ class SemanticSearchProvider implements IProvider { } /** - * Get thumbnail URL for document type. + * Get icon and thumbnail for document type. + * + * Returns [thumbnailUrl, iconClass] tuple. + * For files, uses mimetype-specific icons and preview thumbnails when available. + * For other document types, uses appropriate icon classes. + * + * @return array{string, string} [thumbnailUrl, iconClass] */ - private function buildThumbnailUrl(string $docType): string { - // Use app icons for different document types - return match ($docType) { - 'note' => $this->urlGenerator->imagePath('notes', 'app.svg'), - 'file' => $this->urlGenerator->imagePath('files', 'app.svg'), - 'deck_card' => $this->urlGenerator->imagePath('deck', 'app.svg'), - 'calendar', 'calendar_event' => $this->urlGenerator->imagePath('calendar', 'app.svg'), - 'news_item' => $this->urlGenerator->imagePath('news', 'app.svg'), - 'contact' => $this->urlGenerator->imagePath('contacts', 'app.svg'), - default => $this->urlGenerator->imagePath(Application::APP_ID, 'app.svg'), + private function getIconAndThumbnail(string $docType, ?string $id, ?string $mimeType): array { + if ($docType === 'file' && $id !== null && $mimeType !== null) { + // For files, check if preview is supported + $thumbnailUrl = ''; + if ($this->previewManager->isMimeSupported($mimeType)) { + $thumbnailUrl = $this->urlGenerator->linkToRouteAbsolute( + 'core.Preview.getPreviewByFileId', + ['x' => 32, 'y' => 32, 'fileId' => $id] + ); + } + + // Get mimetype-specific icon class + $iconClass = $mimeType === FileInfo::MIMETYPE_FOLDER + ? 'icon-folder' + : $this->mimeTypeDetector->mimeTypeIcon($mimeType); + + return [$thumbnailUrl, $iconClass]; + } + + // For non-file document types, use icon classes + $iconClass = match ($docType) { + 'note' => 'icon-notes', + 'deck_card' => 'icon-deck', + 'calendar', 'calendar_event' => 'icon-calendar', + 'news_item' => 'icon-rss', + 'contact' => 'icon-contacts', + default => 'icon-file', }; + + return ['', $iconClass]; } /**