From 3464b21845dd810b06d646ea708491593ab94221 Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Mon, 17 Nov 2025 06:32:30 +0100 Subject: [PATCH] fix: Relax SearchResult validation to support DBSF fusion scores > 1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix false-positive validation error where DBSF (Distribution-Based Score Fusion) correctly produces scores > 1.0 but SearchResult validation incorrectly rejected them. **Root Cause**: SearchResult.__post_init__() enforced scores in [0.0, 1.0] range, but DBSF sums normalized scores from multiple retrieval systems (dense semantic + sparse BM25), resulting in scores like 1.55 when both systems strongly agree a document is relevant. **Changes**: - Relaxed validation to allow any score ≥ 0.0 (algorithms.py:147-157) - Updated SearchResult and SemanticSearchResult documentation to explain score ranges for RRF ([0.0, 1.0]) vs DBSF (unbounded) - Added comprehensive test coverage for both fusion methods - Added DBSF fusion option to vector visualization UI - Updated viz routes and vizApp() to support fusion parameter selection **Testing**: All 157 unit tests pass, type checking passes, ruff passes Fixes error: "Configuration error: Score must be between 0.0 and 1.0, got 1.1528953" 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../auth/templates/vector_viz.html | 323 +++++++++++++++ nextcloud_mcp_server/auth/userinfo_routes.py | 53 +++ nextcloud_mcp_server/auth/viz_routes.py | 386 +++++++----------- nextcloud_mcp_server/models/semantic.py | 15 +- nextcloud_mcp_server/search/algorithms.py | 21 +- nextcloud_mcp_server/search/bm25_hybrid.py | 39 +- tests/unit/search/__init__.py | 1 + tests/unit/search/test_search_result.py | 135 ++++++ 8 files changed, 709 insertions(+), 264 deletions(-) create mode 100644 nextcloud_mcp_server/auth/templates/vector_viz.html create mode 100644 tests/unit/search/__init__.py create mode 100644 tests/unit/search/test_search_result.py diff --git a/nextcloud_mcp_server/auth/templates/vector_viz.html b/nextcloud_mcp_server/auth/templates/vector_viz.html new file mode 100644 index 0000000..7756b74 --- /dev/null +++ b/nextcloud_mcp_server/auth/templates/vector_viz.html @@ -0,0 +1,323 @@ + + +
+
+

Vector Visualization

+
+ Testing search algorithms on your indexed documents. User: {{ username }} +
+ +
+
+ +
+ + +
+ +
+
+ + +
+ +
+ + +
+ +
+ +
+ +
+ +
+
+ + +
+

Advanced Options

+ +
+
+ + + + Hold Ctrl/Cmd to select multiple + +
+ +
+
+ + +
+ +
+ + +
+
+
+ + +
+

+ BM25 Hybrid Search: Combines dense semantic vectors with sparse BM25 keyword vectors. +

+

+ RRF: Reciprocal Rank Fusion - Rank-based fusion producing scores in [0.0, 1.0] +

+

+ DBSF: Distribution-Based Score Fusion - Sums normalized scores (can exceed 1.0) +

+
+
+
+
+
+ +
+
+
+ Executing search and computing PCA projection... +
+
+
+
+ +
+

Search Results ()

+ +
+ Loading results... +
+ +
+ No results found. Try a different query or adjust your search parameters. +
+ + +
+
diff --git a/nextcloud_mcp_server/auth/userinfo_routes.py b/nextcloud_mcp_server/auth/userinfo_routes.py index 3566148..335f995 100644 --- a/nextcloud_mcp_server/auth/userinfo_routes.py +++ b/nextcloud_mcp_server/auth/userinfo_routes.py @@ -677,12 +677,15 @@ async def user_info_html(request: Request) -> HTMLResponse: return {{ query: '', algorithm: 'bm25_hybrid', + fusion: 'rrf', // Default fusion method for BM25 Hybrid showAdvanced: false, docTypes: [''], // Default to "All Types" limit: 50, scoreThreshold: 0.0, loading: false, results: [], + expandedChunks: {{}}, // Track which chunks are expanded (result_id -> chunk data) + chunkLoading: {{}}, // Track loading state per result async executeSearch() {{ this.loading = true; @@ -696,6 +699,11 @@ async def user_info_html(request: Request) -> HTMLResponse: score_threshold: this.scoreThreshold, }}); + // Add fusion parameter for BM25 Hybrid + if (this.algorithm === 'bm25_hybrid') {{ + params.append('fusion', this.fusion); + }} + // Add doc_types parameter (filter out empty string for "All Types") const selectedTypes = this.docTypes.filter(t => t !== ''); if (selectedTypes.length > 0) {{ @@ -778,6 +786,51 @@ async def user_info_html(request: Request) -> HTMLResponse: default: return `${{baseUrl}}`; }} + }}, + + hasChunkPosition(result) {{ + // Check if result has position metadata + return result.chunk_start_offset != null && result.chunk_end_offset != null; + }}, + + isChunkExpanded(resultKey) {{ + return this.expandedChunks[resultKey] !== undefined; + }}, + + async toggleChunk(result) {{ + const resultKey = `${{result.doc_type}}_${{result.id}}`; + + // If already expanded, collapse + if (this.isChunkExpanded(resultKey)) {{ + delete this.expandedChunks[resultKey]; + return; + }} + + // Otherwise, fetch and expand + this.chunkLoading[resultKey] = true; + + try {{ + const params = new URLSearchParams({{ + doc_type: result.doc_type, + doc_id: result.id, + start: result.chunk_start_offset, + end: result.chunk_end_offset, + context: 500 // 500 chars before/after + }}); + + const response = await fetch(`/app/chunk-context?${{params}}`); + const data = await response.json(); + + if (data.success) {{ + this.expandedChunks[resultKey] = data; + }} else {{ + alert('Failed to load chunk: ' + data.error); + }} + }} catch (error) {{ + alert('Error loading chunk: ' + error.message); + }} finally {{ + delete this.chunkLoading[resultKey]; + }} }} }} }} diff --git a/nextcloud_mcp_server/auth/viz_routes.py b/nextcloud_mcp_server/auth/viz_routes.py index a92a7e7..98925d0 100644 --- a/nextcloud_mcp_server/auth/viz_routes.py +++ b/nextcloud_mcp_server/auth/viz_routes.py @@ -12,8 +12,10 @@ All processing happens server-side following ADR-012: import logging import time +from pathlib import Path import numpy as np +from jinja2 import Environment, FileSystemLoader from starlette.authentication import requires from starlette.requests import Request from starlette.responses import HTMLResponse, JSONResponse @@ -28,6 +30,10 @@ from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client logger = logging.getLogger(__name__) +# Setup Jinja2 environment for templates +_template_dir = Path(__file__).parent / "templates" +_jinja_env = Environment(loader=FileSystemLoader(_template_dir)) + @requires("authenticated", redirect="oauth_login") async def vector_visualization_html(request: Request) -> HTMLResponse: @@ -63,252 +69,9 @@ async def vector_visualization_html(request: Request) -> HTMLResponse: else "unknown" ) - html_content = f""" - - -
-
-

Vector Visualization

-
- Testing search algorithms on your indexed documents. User: {username} -
- -
-
- -
- - -
- -
-
- - -
- -
- -
- -
- -
-
- - -
-

Advanced Options

- -
-
- - - - Hold Ctrl/Cmd to select multiple - -
- -
-
- - -
- -
- - -
-
-
- - -
-

- BM25 Hybrid Search: Uses Qdrant's native Reciprocal Rank Fusion (RRF) - to automatically combine dense semantic vectors with sparse BM25 keyword vectors. - No manual weight tuning required. -

-
-
-
-
-
- -
-
-
- Executing search and computing PCA projection... -
-
-
-
- -
-

Search Results ()

- -
- Loading results... -
- -
- No results found. Try a different query or adjust your search parameters. -
- - -
-
- """ - + # Load and render template + template = _jinja_env.get_template("vector_viz.html") + html_content = template.render(username=username) return HTMLResponse(content=html_content) @@ -352,6 +115,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse: algorithm = request.query_params.get("algorithm", "bm25_hybrid") limit = int(request.query_params.get("limit", "50")) score_threshold = float(request.query_params.get("score_threshold", "0.0")) + fusion = request.query_params.get("fusion", "rrf") # Default to RRF # Parse doc_types (comma-separated list, None = all types) doc_types_param = request.query_params.get("doc_types", "") @@ -359,7 +123,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse: logger.info( f"Viz search: user={username}, query='{query}', " - f"algorithm={algorithm}, limit={limit}, doc_types={doc_types}" + f"algorithm={algorithm}, fusion={fusion}, limit={limit}, doc_types={doc_types}" ) try: @@ -377,7 +141,9 @@ async def vector_visualization_search(request: Request) -> JSONResponse: if algorithm == "semantic": search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold) elif algorithm == "bm25_hybrid": - search_algo = BM25HybridSearchAlgorithm(score_threshold=score_threshold) + search_algo = BM25HybridSearchAlgorithm( + score_threshold=score_threshold, fusion=fusion + ) else: return JSONResponse( {"success": False, "error": f"Unknown algorithm: {algorithm}"}, @@ -552,6 +318,8 @@ async def vector_visualization_search(request: Request) -> JSONResponse: "title": r.title, "excerpt": r.excerpt, "score": r.score, + "chunk_start_offset": r.chunk_start_offset, + "chunk_end_offset": r.chunk_end_offset, } for r in search_results ] @@ -594,3 +362,125 @@ async def vector_visualization_search(request: Request) -> JSONResponse: {"success": False, "error": str(e)}, status_code=500, ) + + +@requires("authenticated", redirect="oauth_login") +async def chunk_context_endpoint(request: Request) -> JSONResponse: + """Fetch chunk text with surrounding context for visualization. + + This endpoint retrieves the matched chunk along with surrounding text + to provide context for the search result. Used by the viz pane to + display chunks inline. + + Query parameters: + doc_type: Document type (e.g., "note") + doc_id: Document ID + start: Chunk start offset (character position) + end: Chunk end offset (character position) + context: Characters of context before/after (default: 500) + + Returns: + JSON with chunk_text, before_context, after_context, and flags + """ + try: + # Get query parameters + doc_type = request.query_params.get("doc_type") + doc_id = request.query_params.get("doc_id") + start_str = request.query_params.get("start") + end_str = request.query_params.get("end") + context_chars = int(request.query_params.get("context", "500")) + + # Validate required parameters + if not all([doc_type, doc_id, start_str, end_str]): + return JSONResponse( + { + "success": False, + "error": "Missing required parameters: doc_type, doc_id, start, end", + }, + status_code=400, + ) + + start = int(start_str) + end = int(end_str) + + # Currently only support notes + if doc_type != "note": + return JSONResponse( + {"success": False, "error": f"Unsupported doc_type: {doc_type}"}, + status_code=400, + ) + + # Get authenticated HTTP client and fetch note + from nextcloud_mcp_server.auth.userinfo_routes import ( + _get_authenticated_client_for_userinfo, + ) + from nextcloud_mcp_server.client.notes import NotesClient + + # Get username from request auth + username = ( + request.user.display_name + if hasattr(request.user, "display_name") + else "unknown" + ) + + # Create notes client with authenticated HTTP client + http_client = await _get_authenticated_client_for_userinfo(request) + notes_client = NotesClient(http_client, username) + + # Fetch full note content + note = await notes_client.get_note(int(doc_id)) + full_content = f"{note['title']}\n\n{note['content']}" + + # Validate offsets + if start < 0 or end > len(full_content) or start >= end: + return JSONResponse( + { + "success": False, + "error": f"Invalid offsets: start={start}, end={end}, content_length={len(full_content)}", + }, + status_code=400, + ) + + # Extract chunk + chunk_text = full_content[start:end] + + # Extract context before and after + before_start = max(0, start - context_chars) + before_context = full_content[before_start:start] + + after_end = min(len(full_content), end + context_chars) + after_context = full_content[end:after_end] + + # Determine if there's more content + has_more_before = before_start > 0 + has_more_after = after_end < len(full_content) + + logger.info( + f"Fetched chunk context for {doc_type}_{doc_id}: " + f"chunk_len={len(chunk_text)}, before_len={len(before_context)}, " + f"after_len={len(after_context)}" + ) + + return JSONResponse( + { + "success": True, + "chunk_text": chunk_text, + "before_context": before_context, + "after_context": after_context, + "has_more_before": has_more_before, + "has_more_after": has_more_after, + } + ) + + except ValueError as e: + logger.error(f"Invalid parameter format: {e}") + return JSONResponse( + {"success": False, "error": f"Invalid parameter format: {e}"}, + status_code=400, + ) + except Exception as e: + logger.error(f"Chunk context error: {e}", exc_info=True) + return JSONResponse( + {"success": False, "error": str(e)}, + status_code=500, + ) diff --git a/nextcloud_mcp_server/models/semantic.py b/nextcloud_mcp_server/models/semantic.py index b8233f0..2586195 100644 --- a/nextcloud_mcp_server/models/semantic.py +++ b/nextcloud_mcp_server/models/semantic.py @@ -19,9 +19,22 @@ class SemanticSearchResult(BaseModel): default="", description="Document category (notes) or location (calendar)" ) excerpt: str = Field(description="Excerpt from matching chunk") - score: float = Field(description="Semantic similarity score (0-1)") + score: float = Field( + description=( + "Relevance score (≥ 0.0, higher is better). " + "Score range depends on fusion method: " + "RRF produces scores in [0.0, 1.0], " + "DBSF can exceed 1.0 (sum of normalized scores from multiple systems)" + ) + ) chunk_index: int = Field(description="Index of matching chunk in document") total_chunks: int = Field(description="Total number of chunks in document") + chunk_start_offset: Optional[int] = Field( + default=None, description="Character position where chunk starts in document" + ) + chunk_end_offset: Optional[int] = Field( + default=None, description="Character position where chunk ends in document" + ) class SemanticSearchResponse(BaseResponse): diff --git a/nextcloud_mcp_server/search/algorithms.py b/nextcloud_mcp_server/search/algorithms.py index 49bec40..c859960 100644 --- a/nextcloud_mcp_server/search/algorithms.py +++ b/nextcloud_mcp_server/search/algorithms.py @@ -127,8 +127,12 @@ class SearchResult: doc_type: Document type (note, file, calendar, contact, etc.) title: Document title excerpt: Content excerpt showing match context - score: Relevance score (0.0-1.0, higher is better) + score: Relevance score (≥ 0.0, higher is better) + - RRF fusion: scores in [0.0, 1.0] + - DBSF fusion: scores can exceed 1.0 (sum of normalized scores) metadata: Additional algorithm-specific metadata + chunk_start_offset: Character position where chunk starts (None if not available) + chunk_end_offset: Character position where chunk ends (None if not available) """ id: int @@ -137,11 +141,20 @@ class SearchResult: excerpt: str score: float metadata: dict[str, Any] | None = None + chunk_start_offset: int | None = None + chunk_end_offset: int | None = None def __post_init__(self): - """Validate score is in valid range.""" - if not 0.0 <= self.score <= 1.0: - raise ValueError(f"Score must be between 0.0 and 1.0, got {self.score}") + """Validate score is non-negative. + + Note: Different fusion methods produce different score ranges: + - RRF (Reciprocal Rank Fusion): Bounded to [0.0, 1.0] + - DBSF (Distribution-Based Score Fusion): Unbounded (can exceed 1.0) + DBSF sums normalized scores from multiple systems, so scores can be + 1.5, 2.0, etc. when multiple systems agree a document is highly relevant. + """ + if self.score < 0.0: + raise ValueError(f"Score must be non-negative, got {self.score}") class SearchAlgorithm(ABC): diff --git a/nextcloud_mcp_server/search/bm25_hybrid.py b/nextcloud_mcp_server/search/bm25_hybrid.py index d8d5975..bdd3446 100644 --- a/nextcloud_mcp_server/search/bm25_hybrid.py +++ b/nextcloud_mcp_server/search/bm25_hybrid.py @@ -28,15 +28,27 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm): eliminating the need for application-layer result merging. """ - def __init__(self, score_threshold: float = 0.0): + def __init__(self, score_threshold: float = 0.0, fusion: str = "rrf"): """ Initialize BM25 hybrid search algorithm. Args: - score_threshold: Minimum RRF score (0-1, default: 0.0 to allow RRF scoring) - Note: RRF produces normalized scores, so threshold is typically lower + score_threshold: Minimum fusion score (0-1, default: 0.0 to allow fusion scoring) + Note: Both RRF and DBSF produce normalized scores + fusion: Fusion algorithm to use: "rrf" (Reciprocal Rank Fusion, default) + or "dbsf" (Distribution-Based Score Fusion) + + Raises: + ValueError: If fusion is not "rrf" or "dbsf" """ + if fusion not in ("rrf", "dbsf"): + raise ValueError( + f"Invalid fusion algorithm '{fusion}'. Must be 'rrf' or 'dbsf'" + ) + self.score_threshold = score_threshold + self.fusion = models.Fusion.RRF if fusion == "rrf" else models.Fusion.DBSF + self.fusion_name = fusion @property def name(self) -> str: @@ -78,7 +90,8 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm): logger.info( f"BM25 hybrid search: query='{query}', user={user_id}, " - f"limit={limit}, score_threshold={score_threshold}, doc_type={doc_type}" + f"limit={limit}, score_threshold={score_threshold}, doc_type={doc_type}, " + f"fusion={self.fusion_name}" ) # Generate dense embedding for semantic search @@ -139,8 +152,8 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm): filter=query_filter, ), ], - # RRF fusion query (no additional query needed, just fusion) - query=models.FusionQuery(fusion=models.Fusion.RRF), + # Fusion query (RRF or DBSF based on initialization) + query=models.FusionQuery(fusion=self.fusion), limit=limit * 2, # Get extra for deduplication score_threshold=score_threshold, with_payload=True, @@ -152,14 +165,16 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm): raise logger.info( - f"Qdrant RRF fusion returned {len(search_response.points)} results " + f"Qdrant {self.fusion_name.upper()} fusion returned {len(search_response.points)} results " f"(before deduplication)" ) if search_response.points: - # Log top 3 RRF scores to help with threshold tuning + # Log top 3 fusion scores to help with threshold tuning top_scores = [p.score for p in search_response.points[:3]] - logger.debug(f"Top 3 RRF fusion scores: {top_scores}") + logger.debug( + f"Top 3 {self.fusion_name.upper()} fusion scores: {top_scores}" + ) # Deduplicate by (doc_id, doc_type) - multiple chunks per document seen_docs = set() @@ -183,12 +198,14 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm): doc_type=doc_type, title=result.payload.get("title", "Untitled"), excerpt=result.payload.get("excerpt", ""), - score=result.score, # RRF fusion score + score=result.score, # Fusion score (RRF or DBSF) metadata={ "chunk_index": result.payload.get("chunk_index"), "total_chunks": result.payload.get("total_chunks"), - "search_method": "bm25_hybrid_rrf", + "search_method": f"bm25_hybrid_{self.fusion_name}", }, + chunk_start_offset=result.payload.get("chunk_start_offset"), + chunk_end_offset=result.payload.get("chunk_end_offset"), ) ) diff --git a/tests/unit/search/__init__.py b/tests/unit/search/__init__.py new file mode 100644 index 0000000..3ac51e6 --- /dev/null +++ b/tests/unit/search/__init__.py @@ -0,0 +1 @@ +"""Unit tests for search algorithms.""" diff --git a/tests/unit/search/test_search_result.py b/tests/unit/search/test_search_result.py new file mode 100644 index 0000000..c9dbf0b --- /dev/null +++ b/tests/unit/search/test_search_result.py @@ -0,0 +1,135 @@ +"""Unit tests for SearchResult validation.""" + +import pytest + +from nextcloud_mcp_server.search.algorithms import SearchResult + + +@pytest.mark.unit +def test_search_result_rrf_score_in_range(): + """Test SearchResult accepts RRF scores in [0.0, 1.0] range.""" + result = SearchResult( + id=1, + doc_type="note", + title="Test Note", + excerpt="Test excerpt", + score=0.85, + ) + + assert result.score == 0.85 + + +@pytest.mark.unit +def test_search_result_rrf_score_at_lower_bound(): + """Test SearchResult accepts RRF score at lower bound (0.0).""" + result = SearchResult( + id=1, + doc_type="note", + title="Test Note", + excerpt="Test excerpt", + score=0.0, + ) + + assert result.score == 0.0 + + +@pytest.mark.unit +def test_search_result_rrf_score_at_upper_bound(): + """Test SearchResult accepts RRF score at upper bound (1.0).""" + result = SearchResult( + id=1, + doc_type="note", + title="Test Note", + excerpt="Test excerpt", + score=1.0, + ) + + assert result.score == 1.0 + + +@pytest.mark.unit +def test_search_result_dbsf_score_above_one(): + """Test SearchResult accepts DBSF scores > 1.0. + + DBSF (Distribution-Based Score Fusion) sums normalized scores from multiple + systems (dense semantic + sparse BM25), so scores can exceed 1.0 when both + systems strongly agree a document is relevant. + """ + # Typical DBSF score when both systems agree + result = SearchResult( + id=1, + doc_type="note", + title="Highly Relevant Note", + excerpt="Contains keywords and is semantically similar", + score=1.55, + ) + + assert result.score == 1.55 + + +@pytest.mark.unit +def test_search_result_dbsf_score_edge_case(): + """Test SearchResult accepts DBSF maximum theoretical score (2.0). + + Maximum DBSF score with 2 systems: 1.0 (dense) + 1.0 (sparse) = 2.0 + """ + result = SearchResult( + id=1, + doc_type="note", + title="Perfect Match", + excerpt="Perfect semantic and keyword match", + score=2.0, + ) + + assert result.score == 2.0 + + +@pytest.mark.unit +def test_search_result_negative_score_raises_error(): + """Test SearchResult rejects negative scores.""" + with pytest.raises(ValueError) as exc_info: + SearchResult( + id=1, + doc_type="note", + title="Test Note", + excerpt="Test excerpt", + score=-0.1, + ) + + assert "Score must be non-negative" in str(exc_info.value) + assert "got -0.1" in str(exc_info.value) + + +@pytest.mark.unit +def test_search_result_with_metadata(): + """Test SearchResult with optional metadata field.""" + result = SearchResult( + id=1, + doc_type="note", + title="Test Note", + excerpt="Test excerpt", + score=1.25, + metadata={"fusion_method": "dbsf", "dense_score": 0.8, "sparse_score": 0.45}, + ) + + assert result.score == 1.25 + assert result.metadata["fusion_method"] == "dbsf" + assert result.metadata["dense_score"] == 0.8 + assert result.metadata["sparse_score"] == 0.45 + + +@pytest.mark.unit +def test_search_result_with_chunk_offsets(): + """Test SearchResult with chunk offset information.""" + result = SearchResult( + id=1, + doc_type="note", + title="Test Note", + excerpt="matching chunk text", + score=0.9, + chunk_start_offset=100, + chunk_end_offset=500, + ) + + assert result.chunk_start_offset == 100 + assert result.chunk_end_offset == 500