Merge branch 'feature/bm25'

Resolves conflict in viz_routes.py by combining:
- Named vector extraction from feature/bm25
- Performance timing from master
This commit is contained in:
Chris Coutinho
2025-11-16 08:18:39 +01:00
2 changed files with 15 additions and 3 deletions
+14 -3
View File
@@ -468,7 +468,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
]
),
limit=len(doc_ids) * 2, # Account for multiple chunks per doc
with_vectors=True,
with_vectors=["dense"], # Only fetch dense vectors for visualization
with_payload=["doc_id"], # Need doc_id to map vectors to results
)
@@ -484,8 +484,19 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
}
)
# Extract vectors
vectors = np.array([p.vector for p in points if p.vector is not None])
# Extract dense vectors (handle both named and unnamed vectors)
def extract_dense_vector(point):
if point.vector is None:
return None
# If named vectors (dict), extract "dense"
if isinstance(point.vector, dict):
return point.vector.get("dense")
# If unnamed vector (array), use directly
return point.vector
vectors = np.array(
[v for v in (extract_dense_vector(p) for p in points) if v is not None]
)
vector_fetch_duration = time.perf_counter() - vector_fetch_start
if len(vectors) < 2:
+1
View File
@@ -101,6 +101,7 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
search_response = await qdrant_client.query_points(
collection_name=settings.get_collection_name(),
query=query_embedding,
using="dense", # Use named dense vector (BM25 hybrid collections)
query_filter=Filter(must=filter_conditions),
limit=limit * 2, # Get extra for deduplication
score_threshold=score_threshold,