fix: Handle named vectors in visualization and semantic search

- viz_routes.py: Extract "dense" vector from named vector dict - semantic.py: Specify using="dense" for BM25 hybrid collections - Fixes "X must be 2D array" error in hybrid search - Fixes "Dense vector is not found" error in semantic search 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 08:16:35 +01:00
parent fc6a2f14e4
commit 944b6dcf5a
2 changed files with 15 additions and 3 deletions
@@ -489,7 +489,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
                ]
            ),
            limit=len(doc_ids) * 2,  # Account for multiple chunks per doc
-            with_vectors=True,
+            with_vectors=["dense"],  # Only fetch dense vectors for visualization
            with_payload=["doc_id"],  # Need doc_id to map vectors to results
        )

@@ -505,8 +505,19 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
                }
            )

-        # Extract vectors
-        vectors = np.array([p.vector for p in points if p.vector is not None])
+        # Extract dense vectors (handle both named and unnamed vectors)
+        def extract_dense_vector(point):
+            if point.vector is None:
+                return None
+            # If named vectors (dict), extract "dense"
+            if isinstance(point.vector, dict):
+                return point.vector.get("dense")
+            # If unnamed vector (array), use directly
+            return point.vector
+
+        vectors = np.array(
+            [v for v in (extract_dense_vector(p) for p in points) if v is not None]
+        )

        if len(vectors) < 2:
            # Not enough points for PCA
@@ -101,6 +101,7 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
            search_response = await qdrant_client.query_points(
                collection_name=settings.get_collection_name(),
                query=query_embedding,
+                using="dense",  # Use named dense vector (BM25 hybrid collections)
                query_filter=Filter(must=filter_conditions),
                limit=limit * 2,  # Get extra for deduplication
                score_threshold=score_threshold,