feat: Add multi-document Protocol with cross-app search support
Implements NextcloudClientProtocol for multi-document type search following user requirement that document types are not 1:1 with apps (e.g., Notes app specializes in markdown, while Files/WebDAV handles multiple file types). Key Changes: - NextcloudClientProtocol: Generic protocol with app-specific client properties - get_indexed_doc_types(): Query Qdrant for actually-indexed document types - Document dispatch: All algorithms check Qdrant before attempting access - Cross-type deduplication: Use (doc_id, doc_type) tuples in hybrid RRF Search Algorithm Updates: - Semantic: Added _verify_document_access() with dispatch to appropriate client - Deduplication by (doc_id, doc_type) tuple - Only "note" verification implemented, others return None with info log - Keyword: Added _fetch_documents() dispatch method - Queries Qdrant for available types before fetching - Supports cross-app search when doc_type=None - Fuzzy: Same pattern as keyword search - Hybrid: Already uses (doc_id, doc_type) for deduplication (no changes needed) Future-Proof Design: - File/calendar verification stubs in place - Clear logging when unsupported types found - Easy to extend when processor indexes new document types Currently Supported: - "note" documents fully implemented and tested - Other types gracefully handled (logged but skipped) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -10,15 +10,22 @@ All algorithms share the same interface and can be used interchangeably by both
|
||||
MCP tools and the visualization pane.
|
||||
"""
|
||||
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.search.algorithms import (
|
||||
NextcloudClientProtocol,
|
||||
SearchAlgorithm,
|
||||
SearchResult,
|
||||
get_indexed_doc_types,
|
||||
)
|
||||
from nextcloud_mcp_server.search.fuzzy import FuzzySearchAlgorithm
|
||||
from nextcloud_mcp_server.search.hybrid import HybridSearchAlgorithm
|
||||
from nextcloud_mcp_server.search.keyword import KeywordSearchAlgorithm
|
||||
from nextcloud_mcp_server.search.semantic import SemanticSearchAlgorithm
|
||||
|
||||
__all__ = [
|
||||
"NextcloudClientProtocol",
|
||||
"SearchAlgorithm",
|
||||
"SearchResult",
|
||||
"get_indexed_doc_types",
|
||||
"SemanticSearchAlgorithm",
|
||||
"KeywordSearchAlgorithm",
|
||||
"FuzzySearchAlgorithm",
|
||||
|
||||
@@ -2,7 +2,120 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
from typing import Any, Protocol, runtime_checkable
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class NextcloudClientProtocol(Protocol):
|
||||
"""Protocol for Nextcloud client supporting multi-document search.
|
||||
|
||||
This protocol defines the interface that search algorithms need from a
|
||||
Nextcloud client to access documents across different apps (Notes, Files,
|
||||
Calendar, etc.). The client provides access to app-specific sub-clients
|
||||
that handle the actual API calls.
|
||||
|
||||
Document types (e.g., "note", "file", "calendar") are NOT 1:1 with apps.
|
||||
For example, the Notes app specializes in markdown files, while Files/WebDAV
|
||||
handles multiple file types. The abstraction is at the document type level.
|
||||
|
||||
Search algorithms query Qdrant to determine which document types are actually
|
||||
indexed before attempting to access them, enabling graceful cross-app search.
|
||||
"""
|
||||
|
||||
username: str
|
||||
|
||||
# App-specific clients that search algorithms dispatch to
|
||||
@property
|
||||
def notes(self) -> Any:
|
||||
"""Notes client for accessing note documents."""
|
||||
...
|
||||
|
||||
@property
|
||||
def webdav(self) -> Any:
|
||||
"""WebDAV client for accessing file documents."""
|
||||
...
|
||||
|
||||
@property
|
||||
def calendar(self) -> Any:
|
||||
"""Calendar client for accessing event/task documents."""
|
||||
...
|
||||
|
||||
@property
|
||||
def contacts(self) -> Any:
|
||||
"""Contacts client for accessing contact card documents."""
|
||||
...
|
||||
|
||||
@property
|
||||
def deck(self) -> Any:
|
||||
"""Deck client for accessing deck card documents."""
|
||||
...
|
||||
|
||||
@property
|
||||
def cookbook(self) -> Any:
|
||||
"""Cookbook client for accessing recipe documents."""
|
||||
...
|
||||
|
||||
@property
|
||||
def tables(self) -> Any:
|
||||
"""Tables client for accessing table row documents."""
|
||||
...
|
||||
|
||||
|
||||
async def get_indexed_doc_types(user_id: str) -> set[str]:
|
||||
"""Query Qdrant to get actually-indexed document types for a user.
|
||||
|
||||
This enables search algorithms to check which document types are available
|
||||
before attempting to search/verify them, allowing graceful cross-app search.
|
||||
|
||||
Args:
|
||||
user_id: User ID to filter by
|
||||
|
||||
Returns:
|
||||
Set of document type strings (e.g., {"note", "file", "calendar"})
|
||||
|
||||
Example:
|
||||
>>> types = await get_indexed_doc_types("alice")
|
||||
>>> if "note" in types:
|
||||
... # Search notes
|
||||
"""
|
||||
import logging
|
||||
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
settings = get_settings()
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
collection = settings.qdrant_collection
|
||||
|
||||
# Use scroll to sample documents and extract doc_types
|
||||
# Note: This could be optimized with a facet/aggregation query if Qdrant adds support
|
||||
try:
|
||||
scroll_results, _next_offset = await qdrant_client.scroll(
|
||||
collection_name=collection,
|
||||
scroll_filter=Filter(
|
||||
must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
|
||||
),
|
||||
limit=1000, # Sample size to discover types
|
||||
with_payload=["doc_type"],
|
||||
with_vectors=False, # Don't need vectors for type discovery
|
||||
)
|
||||
|
||||
doc_types = {
|
||||
point.payload.get("doc_type")
|
||||
for point in scroll_results
|
||||
if point.payload.get("doc_type")
|
||||
}
|
||||
|
||||
logger.debug(f"Found indexed document types for user {user_id}: {doc_types}")
|
||||
return doc_types
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to query Qdrant for doc_types: {e}")
|
||||
return set()
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -3,8 +3,12 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.search.algorithms import (
|
||||
NextcloudClientProtocol,
|
||||
SearchAlgorithm,
|
||||
SearchResult,
|
||||
get_indexed_doc_types,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -38,7 +42,7 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
|
||||
user_id: str,
|
||||
limit: int = 10,
|
||||
doc_type: str | None = None,
|
||||
nextcloud_client: NextcloudClient | None = None,
|
||||
nextcloud_client: NextcloudClientProtocol | None = None,
|
||||
**kwargs: Any,
|
||||
) -> list[SearchResult]:
|
||||
"""Execute fuzzy search using character overlap.
|
||||
@@ -67,22 +71,39 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
|
||||
f"limit={limit}, threshold={threshold}, doc_type={doc_type}"
|
||||
)
|
||||
|
||||
# Currently only supports notes
|
||||
if doc_type and doc_type != "note":
|
||||
logger.warning(f"Fuzzy search not yet implemented for doc_type={doc_type}")
|
||||
return []
|
||||
# Get available document types from Qdrant
|
||||
indexed_types = await get_indexed_doc_types(user_id)
|
||||
logger.debug(f"Indexed document types for user: {indexed_types}")
|
||||
|
||||
# Fetch all notes for the user
|
||||
notes = await nextcloud_client.notes.get_notes()
|
||||
logger.debug(f"Fetched {len(notes)} notes for fuzzy search")
|
||||
# Determine which types to search
|
||||
if doc_type:
|
||||
# Search specific type if requested
|
||||
search_types = [doc_type] if doc_type in indexed_types else []
|
||||
if not search_types:
|
||||
logger.info(f"Doc type '{doc_type}' not indexed for user {user_id}")
|
||||
return []
|
||||
else:
|
||||
# Search all indexed types
|
||||
search_types = list(indexed_types)
|
||||
|
||||
# Score and filter notes
|
||||
scored_notes = []
|
||||
# Fetch documents for each type and score them
|
||||
all_documents = []
|
||||
for dtype in search_types:
|
||||
documents = await self._fetch_documents(nextcloud_client, dtype)
|
||||
for doc in documents:
|
||||
doc["_doc_type"] = dtype # Tag with type
|
||||
all_documents.extend(documents)
|
||||
|
||||
logger.debug(f"Fetched {len(all_documents)} total documents for fuzzy search")
|
||||
|
||||
# Score and filter documents
|
||||
scored_results = []
|
||||
query_lower = query.lower()
|
||||
|
||||
for note in notes:
|
||||
title = note.get("title", "")
|
||||
content = note.get("content", "")
|
||||
for doc in all_documents:
|
||||
dtype = doc.get("_doc_type", "note")
|
||||
title = doc.get("title", "")
|
||||
content = doc.get("content", "")
|
||||
|
||||
# Check title match
|
||||
title_score = self._calculate_char_overlap(query_lower, title.lower())
|
||||
@@ -100,16 +121,16 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
|
||||
else:
|
||||
excerpt = self._extract_excerpt(content, max_length=200)
|
||||
|
||||
scored_notes.append(
|
||||
scored_results.append(
|
||||
SearchResult(
|
||||
id=note["id"],
|
||||
doc_type="note",
|
||||
id=doc["id"],
|
||||
doc_type=dtype,
|
||||
title=title or "Untitled",
|
||||
excerpt=excerpt,
|
||||
score=best_score,
|
||||
metadata={
|
||||
"category": note.get("category", ""),
|
||||
"modified": note.get("modified"),
|
||||
"category": doc.get("category", ""),
|
||||
"modified": doc.get("modified"),
|
||||
"match_location": "title"
|
||||
if title_score >= content_score
|
||||
else "content",
|
||||
@@ -118,8 +139,8 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
|
||||
)
|
||||
|
||||
# Sort by score (descending) and limit
|
||||
scored_notes.sort(key=lambda x: x.score, reverse=True)
|
||||
results = scored_notes[:limit]
|
||||
scored_results.sort(key=lambda x: x.score, reverse=True)
|
||||
results = scored_results[:limit]
|
||||
|
||||
logger.info(f"Fuzzy search returned {len(results)} matching notes")
|
||||
if results:
|
||||
@@ -131,6 +152,32 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
|
||||
|
||||
return results
|
||||
|
||||
async def _fetch_documents(
|
||||
self, nextcloud_client: NextcloudClientProtocol, doc_type: str
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch documents of a specific type from Nextcloud.
|
||||
|
||||
Args:
|
||||
nextcloud_client: Client for API access
|
||||
doc_type: Document type to fetch ("note", "file", "calendar", etc.)
|
||||
|
||||
Returns:
|
||||
List of document dictionaries with at minimum: id, title, content
|
||||
"""
|
||||
if doc_type == "note":
|
||||
return await nextcloud_client.notes.get_notes()
|
||||
elif doc_type == "file":
|
||||
# Future: fetch files when indexed
|
||||
logger.info("File documents not yet supported for fuzzy search")
|
||||
return []
|
||||
elif doc_type == "calendar":
|
||||
# Future: fetch calendar events when indexed
|
||||
logger.info("Calendar documents not yet supported for fuzzy search")
|
||||
return []
|
||||
else:
|
||||
logger.warning(f"Unknown document type '{doc_type}' for fuzzy search")
|
||||
return []
|
||||
|
||||
def _calculate_char_overlap(self, query: str, text: str) -> float:
|
||||
"""Calculate character overlap ratio between query and text.
|
||||
|
||||
|
||||
@@ -5,8 +5,11 @@ import logging
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.search.algorithms import (
|
||||
NextcloudClientProtocol,
|
||||
SearchAlgorithm,
|
||||
SearchResult,
|
||||
)
|
||||
from nextcloud_mcp_server.search.fuzzy import FuzzySearchAlgorithm
|
||||
from nextcloud_mcp_server.search.keyword import KeywordSearchAlgorithm
|
||||
from nextcloud_mcp_server.search.semantic import SemanticSearchAlgorithm
|
||||
@@ -82,7 +85,7 @@ class HybridSearchAlgorithm(SearchAlgorithm):
|
||||
user_id: str,
|
||||
limit: int = 10,
|
||||
doc_type: str | None = None,
|
||||
nextcloud_client: NextcloudClient | None = None,
|
||||
nextcloud_client: NextcloudClientProtocol | None = None,
|
||||
**kwargs: Any,
|
||||
) -> list[SearchResult]:
|
||||
"""Execute hybrid search using RRF to combine algorithms.
|
||||
|
||||
@@ -3,8 +3,12 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.search.algorithms import (
|
||||
NextcloudClientProtocol,
|
||||
SearchAlgorithm,
|
||||
SearchResult,
|
||||
get_indexed_doc_types,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -32,7 +36,7 @@ class KeywordSearchAlgorithm(SearchAlgorithm):
|
||||
user_id: str,
|
||||
limit: int = 10,
|
||||
doc_type: str | None = None,
|
||||
nextcloud_client: NextcloudClient | None = None,
|
||||
nextcloud_client: NextcloudClientProtocol | None = None,
|
||||
**kwargs: Any,
|
||||
) -> list[SearchResult]:
|
||||
"""Execute keyword search using token matching.
|
||||
@@ -63,52 +67,66 @@ class KeywordSearchAlgorithm(SearchAlgorithm):
|
||||
query_tokens = self._process_query(query)
|
||||
logger.debug(f"Query tokens: {query_tokens}")
|
||||
|
||||
# Currently only supports notes
|
||||
# TODO: Extend to other document types (files, calendar, etc.)
|
||||
if doc_type and doc_type != "note":
|
||||
logger.warning(
|
||||
f"Keyword search not yet implemented for doc_type={doc_type}"
|
||||
)
|
||||
return []
|
||||
# Get available document types from Qdrant
|
||||
indexed_types = await get_indexed_doc_types(user_id)
|
||||
logger.debug(f"Indexed document types for user: {indexed_types}")
|
||||
|
||||
# Fetch all notes for the user
|
||||
notes = await nextcloud_client.notes.get_notes()
|
||||
logger.debug(f"Fetched {len(notes)} notes for keyword search")
|
||||
# Determine which types to search
|
||||
if doc_type:
|
||||
# Search specific type if requested
|
||||
search_types = [doc_type] if doc_type in indexed_types else []
|
||||
if not search_types:
|
||||
logger.info(f"Doc type '{doc_type}' not indexed for user {user_id}")
|
||||
return []
|
||||
else:
|
||||
# Search all indexed types
|
||||
search_types = list(indexed_types)
|
||||
|
||||
# Score and filter notes
|
||||
scored_notes = []
|
||||
for note in notes:
|
||||
# Fetch documents for each type and score them
|
||||
all_documents = []
|
||||
for dtype in search_types:
|
||||
documents = await self._fetch_documents(nextcloud_client, dtype)
|
||||
for doc in documents:
|
||||
doc["_doc_type"] = dtype # Tag with type
|
||||
all_documents.extend(documents)
|
||||
|
||||
logger.debug(f"Fetched {len(all_documents)} total documents for keyword search")
|
||||
|
||||
# Score and filter documents
|
||||
scored_results = []
|
||||
for doc in all_documents:
|
||||
dtype = doc.get("_doc_type", "note")
|
||||
score = self._calculate_score(
|
||||
query_tokens,
|
||||
note.get("title", ""),
|
||||
note.get("content", ""),
|
||||
doc.get("title", ""),
|
||||
doc.get("content", ""),
|
||||
)
|
||||
|
||||
if score > 0: # Only include matches
|
||||
# Extract excerpt with context
|
||||
excerpt = self._extract_excerpt(
|
||||
note.get("content", ""),
|
||||
doc.get("content", ""),
|
||||
query_tokens,
|
||||
max_length=200,
|
||||
)
|
||||
|
||||
scored_notes.append(
|
||||
scored_results.append(
|
||||
SearchResult(
|
||||
id=note["id"],
|
||||
doc_type="note",
|
||||
title=note.get("title", "Untitled"),
|
||||
id=doc["id"],
|
||||
doc_type=dtype,
|
||||
title=doc.get("title", "Untitled"),
|
||||
excerpt=excerpt,
|
||||
score=score,
|
||||
metadata={
|
||||
"category": note.get("category", ""),
|
||||
"modified": note.get("modified"),
|
||||
"category": doc.get("category", ""),
|
||||
"modified": doc.get("modified"),
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
# Sort by score (descending) and limit
|
||||
scored_notes.sort(key=lambda x: x.score, reverse=True)
|
||||
results = scored_notes[:limit]
|
||||
scored_results.sort(key=lambda x: x.score, reverse=True)
|
||||
results = scored_results[:limit]
|
||||
|
||||
logger.info(f"Keyword search returned {len(results)} matching notes")
|
||||
if results:
|
||||
@@ -120,6 +138,32 @@ class KeywordSearchAlgorithm(SearchAlgorithm):
|
||||
|
||||
return results
|
||||
|
||||
async def _fetch_documents(
|
||||
self, nextcloud_client: NextcloudClientProtocol, doc_type: str
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Fetch documents of a specific type from Nextcloud.
|
||||
|
||||
Args:
|
||||
nextcloud_client: Client for API access
|
||||
doc_type: Document type to fetch ("note", "file", "calendar", etc.)
|
||||
|
||||
Returns:
|
||||
List of document dictionaries with at minimum: id, title, content
|
||||
"""
|
||||
if doc_type == "note":
|
||||
return await nextcloud_client.notes.get_notes()
|
||||
elif doc_type == "file":
|
||||
# Future: fetch files when indexed
|
||||
logger.info("File documents not yet supported for keyword search")
|
||||
return []
|
||||
elif doc_type == "calendar":
|
||||
# Future: fetch calendar events when indexed
|
||||
logger.info("Calendar documents not yet supported for keyword search")
|
||||
return []
|
||||
else:
|
||||
logger.warning(f"Unknown document type '{doc_type}' for keyword search")
|
||||
return []
|
||||
|
||||
def _process_query(self, query: str) -> list[str]:
|
||||
"""Tokenize and normalize query.
|
||||
|
||||
|
||||
@@ -6,11 +6,14 @@ from typing import Any
|
||||
from httpx import HTTPStatusError
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.embedding import get_embedding_service
|
||||
from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.search.algorithms import (
|
||||
NextcloudClientProtocol,
|
||||
SearchAlgorithm,
|
||||
SearchResult,
|
||||
)
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -45,7 +48,7 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
user_id: str,
|
||||
limit: int = 10,
|
||||
doc_type: str | None = None,
|
||||
nextcloud_client: NextcloudClient | None = None,
|
||||
nextcloud_client: NextcloudClientProtocol | None = None,
|
||||
**kwargs: Any,
|
||||
) -> list[SearchResult]:
|
||||
"""Execute semantic search using vector similarity.
|
||||
@@ -144,9 +147,13 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
self,
|
||||
points: list[Any],
|
||||
limit: int,
|
||||
nextcloud_client: NextcloudClient | None,
|
||||
nextcloud_client: NextcloudClientProtocol | None,
|
||||
) -> list[SearchResult]:
|
||||
"""Deduplicate results by doc_id and verify access.
|
||||
"""Deduplicate results by (doc_id, doc_type) and verify access.
|
||||
|
||||
Supports multiple document types with dispatch to appropriate client methods.
|
||||
Deduplication is now by (doc_id, doc_type) tuple to handle cases where
|
||||
the same ID might exist across different document types.
|
||||
|
||||
Args:
|
||||
points: Qdrant search results
|
||||
@@ -156,58 +163,32 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
Returns:
|
||||
List of SearchResult objects
|
||||
"""
|
||||
seen_doc_ids = set()
|
||||
seen_docs = set() # Track (doc_id, doc_type) tuples
|
||||
results = []
|
||||
|
||||
for result in points:
|
||||
doc_id = int(result.payload["doc_id"])
|
||||
doc_type = result.payload.get("doc_type", "note")
|
||||
doc_key = (doc_id, doc_type)
|
||||
|
||||
# Skip if we've already seen this document
|
||||
if doc_id in seen_doc_ids:
|
||||
if doc_key in seen_docs:
|
||||
continue
|
||||
|
||||
seen_doc_ids.add(doc_id)
|
||||
seen_docs.add(doc_key)
|
||||
|
||||
# Verify access via Nextcloud API if client provided
|
||||
# Currently only supports notes
|
||||
if nextcloud_client and doc_type == "note":
|
||||
try:
|
||||
note = await nextcloud_client.notes.get_note(doc_id)
|
||||
# Dispatch to appropriate client based on doc_type
|
||||
verified_result = None
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
id=doc_id,
|
||||
doc_type="note",
|
||||
title=result.payload["title"],
|
||||
excerpt=result.payload["excerpt"],
|
||||
score=result.score,
|
||||
metadata={
|
||||
"category": note.get("category", ""),
|
||||
"chunk_index": result.payload["chunk_index"],
|
||||
"total_chunks": result.payload["total_chunks"],
|
||||
},
|
||||
)
|
||||
)
|
||||
if nextcloud_client:
|
||||
verified_result = await self._verify_document_access(
|
||||
nextcloud_client, doc_id, doc_type, result
|
||||
)
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code in (403, 404):
|
||||
# User lost access or document deleted
|
||||
logger.debug(
|
||||
f"Skipping note {doc_id}: {e.response.status_code}"
|
||||
)
|
||||
continue
|
||||
else:
|
||||
# Log other errors but continue processing
|
||||
logger.warning(
|
||||
f"Error verifying access to note {doc_id}: "
|
||||
f"{e.response.status_code}"
|
||||
)
|
||||
continue
|
||||
else:
|
||||
if verified_result:
|
||||
results.append(verified_result)
|
||||
elif not nextcloud_client:
|
||||
# No access verification, return result directly
|
||||
results.append(
|
||||
SearchResult(
|
||||
@@ -223,7 +204,72 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
)
|
||||
)
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
async def _verify_document_access(
|
||||
self,
|
||||
nextcloud_client: NextcloudClientProtocol,
|
||||
doc_id: int,
|
||||
doc_type: str,
|
||||
qdrant_result: Any,
|
||||
) -> SearchResult | None:
|
||||
"""Verify user has access to a document via Nextcloud API.
|
||||
|
||||
Dispatches to appropriate client method based on document type.
|
||||
|
||||
Args:
|
||||
nextcloud_client: Client for API access
|
||||
doc_id: Document ID
|
||||
doc_type: Document type ("note", "file", "calendar", etc.)
|
||||
qdrant_result: Original Qdrant search result
|
||||
|
||||
Returns:
|
||||
SearchResult if access verified, None if access denied or error
|
||||
"""
|
||||
try:
|
||||
if doc_type == "note":
|
||||
note = await nextcloud_client.notes.get_note(doc_id)
|
||||
return SearchResult(
|
||||
id=doc_id,
|
||||
doc_type="note",
|
||||
title=qdrant_result.payload["title"],
|
||||
excerpt=qdrant_result.payload["excerpt"],
|
||||
score=qdrant_result.score,
|
||||
metadata={
|
||||
"category": note.get("category", ""),
|
||||
"chunk_index": qdrant_result.payload["chunk_index"],
|
||||
"total_chunks": qdrant_result.payload["total_chunks"],
|
||||
},
|
||||
)
|
||||
elif doc_type == "file":
|
||||
# Future: verify file access when files are indexed
|
||||
logger.info(
|
||||
f"File {doc_id} found in search but file verification not yet implemented"
|
||||
)
|
||||
return None
|
||||
elif doc_type == "calendar":
|
||||
# Future: verify calendar access when calendar events are indexed
|
||||
logger.info(
|
||||
f"Calendar event {doc_id} found in search but calendar verification not yet implemented"
|
||||
)
|
||||
return None
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unknown document type '{doc_type}' for doc_id {doc_id}"
|
||||
)
|
||||
return None
|
||||
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code in (403, 404):
|
||||
# User lost access or document deleted
|
||||
logger.debug(f"Skipping {doc_type} {doc_id}: {e.response.status_code}")
|
||||
return None
|
||||
else:
|
||||
# Log other errors but continue processing
|
||||
logger.warning(
|
||||
f"Error verifying access to {doc_type} {doc_id}: {e.response.status_code}"
|
||||
)
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user