feat(news): add Nextcloud News app integration
Add full integration for the Nextcloud News (RSS/Atom reader) app: - Add NewsClient with complete CRUD operations for folders, feeds, and items - Add 8 read-only MCP tools for listing/getting folders, feeds, items - Add Pydantic models for News entities with camelCase alias support - Add vector sync support for starred + unread items - Add HTML to Markdown converter using markdownify for better embeddings - Add Docker post-install hook to enable News app - Add 25 unit tests for NewsClient API methods Vector sync indexes starred and unread items, providing a balanced approach that captures important (starred) and current (unread) content without indexing the entire article history. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -60,6 +60,7 @@ from nextcloud_mcp_server.server import (
|
||||
configure_contacts_tools,
|
||||
configure_cookbook_tools,
|
||||
configure_deck_tools,
|
||||
configure_news_tools,
|
||||
configure_notes_tools,
|
||||
configure_semantic_tools,
|
||||
configure_sharing_tools,
|
||||
@@ -514,7 +515,7 @@ async def load_oauth_client_credentials(
|
||||
# and the authorization server will limit them to these allowed scopes.
|
||||
#
|
||||
# The PRM endpoint advertises the same scopes dynamically via @require_scopes decorators.
|
||||
dcr_scopes = "openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write"
|
||||
dcr_scopes = "openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write news:read news:write"
|
||||
|
||||
# Add offline_access scope if refresh tokens are enabled
|
||||
enable_offline_access = os.getenv("ENABLE_OFFLINE_ACCESS", "false").lower() in (
|
||||
@@ -1046,6 +1047,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
|
||||
"contacts": configure_contacts_tools,
|
||||
"cookbook": configure_cookbook_tools,
|
||||
"deck": configure_deck_tools,
|
||||
"news": configure_news_tools,
|
||||
}
|
||||
|
||||
# If no specific apps are specified, enable all
|
||||
|
||||
@@ -18,6 +18,7 @@ from .contacts import ContactsClient
|
||||
from .cookbook import CookbookClient
|
||||
from .deck import DeckClient
|
||||
from .groups import GroupsClient
|
||||
from .news import NewsClient
|
||||
from .notes import NotesClient
|
||||
from .sharing import SharingClient
|
||||
from .tables import TablesClient
|
||||
@@ -81,6 +82,7 @@ class NextcloudClient:
|
||||
self.contacts = ContactsClient(self._client, username)
|
||||
self.cookbook = CookbookClient(self._client, username)
|
||||
self.deck = DeckClient(self._client, username)
|
||||
self.news = NewsClient(self._client, username)
|
||||
self.users = UsersClient(self._client, username)
|
||||
self.groups = GroupsClient(self._client, username)
|
||||
self.sharing = SharingClient(self._client, username)
|
||||
|
||||
@@ -0,0 +1,394 @@
|
||||
"""Client for Nextcloud News app operations."""
|
||||
|
||||
import logging
|
||||
from enum import IntEnum
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseNextcloudClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NewsItemType(IntEnum):
|
||||
"""Type constants for News API item queries."""
|
||||
|
||||
FEED = 0 # Single feed
|
||||
FOLDER = 1 # Folder and its feeds
|
||||
STARRED = 2 # All starred items
|
||||
ALL = 3 # All items
|
||||
|
||||
|
||||
class NewsClient(BaseNextcloudClient):
|
||||
"""Client for Nextcloud News app operations."""
|
||||
|
||||
app_name = "news"
|
||||
API_BASE = "/apps/news/api/v1-3"
|
||||
|
||||
# --- Folders ---
|
||||
|
||||
async def get_folders(self) -> list[dict[str, Any]]:
|
||||
"""Get all folders."""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/folders")
|
||||
return response.json().get("folders", [])
|
||||
|
||||
async def create_folder(self, name: str) -> dict[str, Any]:
|
||||
"""Create a new folder.
|
||||
|
||||
Args:
|
||||
name: Folder name
|
||||
|
||||
Returns:
|
||||
Created folder data
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 409 if folder name already exists,
|
||||
422 if name is empty
|
||||
"""
|
||||
response = await self._make_request(
|
||||
"POST", f"{self.API_BASE}/folders", json={"name": name}
|
||||
)
|
||||
folders = response.json().get("folders", [])
|
||||
return folders[0] if folders else {}
|
||||
|
||||
async def rename_folder(self, folder_id: int, name: str) -> None:
|
||||
"""Rename a folder.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
name: New folder name
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found, 409 if name exists
|
||||
"""
|
||||
await self._make_request(
|
||||
"PUT", f"{self.API_BASE}/folders/{folder_id}", json={"name": name}
|
||||
)
|
||||
|
||||
async def delete_folder(self, folder_id: int) -> None:
|
||||
"""Delete a folder and all its feeds/items.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found
|
||||
"""
|
||||
await self._make_request("DELETE", f"{self.API_BASE}/folders/{folder_id}")
|
||||
|
||||
async def mark_folder_read(self, folder_id: int, newest_item_id: int) -> None:
|
||||
"""Mark all items in a folder as read.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
newest_item_id: ID of newest item to mark read (prevents marking
|
||||
items user hasn't seen yet)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/folders/{folder_id}/read",
|
||||
json={"newestItemId": newest_item_id},
|
||||
)
|
||||
|
||||
# --- Feeds ---
|
||||
|
||||
async def get_feeds(self) -> dict[str, Any]:
|
||||
"""Get all feeds with metadata.
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
- feeds: List of feed objects
|
||||
- starredCount: Number of starred items
|
||||
- newestItemId: ID of newest item (omitted if no items)
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/feeds")
|
||||
return response.json()
|
||||
|
||||
async def create_feed(
|
||||
self, url: str, folder_id: int | None = None
|
||||
) -> dict[str, Any]:
|
||||
"""Subscribe to a new feed.
|
||||
|
||||
Args:
|
||||
url: Feed URL
|
||||
folder_id: Optional folder ID (None for root)
|
||||
|
||||
Returns:
|
||||
Created feed data
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 409 if feed already exists, 422 if URL is invalid
|
||||
"""
|
||||
body: dict[str, Any] = {"url": url}
|
||||
if folder_id is not None:
|
||||
body["folderId"] = folder_id
|
||||
response = await self._make_request("POST", f"{self.API_BASE}/feeds", json=body)
|
||||
data = response.json()
|
||||
feeds = data.get("feeds", [])
|
||||
return feeds[0] if feeds else {}
|
||||
|
||||
async def delete_feed(self, feed_id: int) -> None:
|
||||
"""Unsubscribe from a feed (deletes all items).
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request("DELETE", f"{self.API_BASE}/feeds/{feed_id}")
|
||||
|
||||
async def move_feed(self, feed_id: int, folder_id: int | None) -> None:
|
||||
"""Move a feed to a different folder.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
folder_id: Target folder ID (None for root)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/move",
|
||||
json={"folderId": folder_id},
|
||||
)
|
||||
|
||||
async def rename_feed(self, feed_id: int, title: str) -> None:
|
||||
"""Rename a feed.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
title: New feed title
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/rename",
|
||||
json={"feedTitle": title},
|
||||
)
|
||||
|
||||
async def mark_feed_read(self, feed_id: int, newest_item_id: int) -> None:
|
||||
"""Mark all items in a feed as read.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
newest_item_id: ID of newest item to mark read
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/read",
|
||||
json={"newestItemId": newest_item_id},
|
||||
)
|
||||
|
||||
# --- Items ---
|
||||
|
||||
async def get_items(
|
||||
self,
|
||||
batch_size: int = 50,
|
||||
offset: int = 0,
|
||||
type_: int = NewsItemType.ALL,
|
||||
id_: int = 0,
|
||||
get_read: bool = True,
|
||||
oldest_first: bool = False,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Get items (articles) with filtering.
|
||||
|
||||
Args:
|
||||
batch_size: Number of items to return (-1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
type_: Item type filter (NewsItemType)
|
||||
id_: Feed/folder ID (ignored for STARRED/ALL types)
|
||||
get_read: Include read items
|
||||
oldest_first: Sort oldest first instead of newest
|
||||
|
||||
Returns:
|
||||
List of item objects
|
||||
"""
|
||||
params: dict[str, Any] = {
|
||||
"batchSize": batch_size,
|
||||
"offset": offset,
|
||||
"type": type_,
|
||||
"id": id_,
|
||||
"getRead": str(get_read).lower(),
|
||||
"oldestFirst": str(oldest_first).lower(),
|
||||
}
|
||||
response = await self._make_request(
|
||||
"GET", f"{self.API_BASE}/items", params=params
|
||||
)
|
||||
return response.json().get("items", [])
|
||||
|
||||
async def get_item(self, item_id: int) -> dict[str, Any]:
|
||||
"""Get a specific item by ID.
|
||||
|
||||
Note: The News API doesn't have a direct single-item endpoint,
|
||||
so we fetch all items and filter. For efficiency, consider
|
||||
caching or using get_items with specific feed if known.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Returns:
|
||||
Item data
|
||||
|
||||
Raises:
|
||||
ValueError: If item not found
|
||||
"""
|
||||
# Fetch all items and find the one we need
|
||||
# This is inefficient but the API doesn't provide a direct endpoint
|
||||
items = await self.get_items(batch_size=-1, get_read=True)
|
||||
for item in items:
|
||||
if item.get("id") == item_id:
|
||||
return item
|
||||
raise ValueError(f"Item {item_id} not found")
|
||||
|
||||
async def get_updated_items(
|
||||
self,
|
||||
last_modified: int,
|
||||
type_: int = NewsItemType.ALL,
|
||||
id_: int = 0,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Get items modified since a timestamp (for delta sync).
|
||||
|
||||
Args:
|
||||
last_modified: Unix timestamp (seconds or microseconds)
|
||||
type_: Item type filter
|
||||
id_: Feed/folder ID
|
||||
|
||||
Returns:
|
||||
List of modified items (includes deleted items)
|
||||
"""
|
||||
params: dict[str, Any] = {
|
||||
"lastModified": last_modified,
|
||||
"type": type_,
|
||||
"id": id_,
|
||||
}
|
||||
response = await self._make_request(
|
||||
"GET", f"{self.API_BASE}/items/updated", params=params
|
||||
)
|
||||
return response.json().get("items", [])
|
||||
|
||||
async def mark_item_read(self, item_id: int) -> None:
|
||||
"""Mark a single item as read.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/read")
|
||||
|
||||
async def mark_item_unread(self, item_id: int) -> None:
|
||||
"""Mark a single item as unread.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unread")
|
||||
|
||||
async def star_item(self, item_id: int) -> None:
|
||||
"""Star (favorite) a single item.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/star")
|
||||
|
||||
async def unstar_item(self, item_id: int) -> None:
|
||||
"""Unstar a single item.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unstar")
|
||||
|
||||
async def mark_items_read(self, item_ids: list[int]) -> None:
|
||||
"""Mark multiple items as read.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/read/multiple", json={"itemIds": item_ids}
|
||||
)
|
||||
|
||||
async def mark_items_unread(self, item_ids: list[int]) -> None:
|
||||
"""Mark multiple items as unread.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/items/unread/multiple",
|
||||
json={"itemIds": item_ids},
|
||||
)
|
||||
|
||||
async def star_items(self, item_ids: list[int]) -> None:
|
||||
"""Star multiple items.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/star/multiple", json={"itemIds": item_ids}
|
||||
)
|
||||
|
||||
async def unstar_items(self, item_ids: list[int]) -> None:
|
||||
"""Unstar multiple items.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/items/unstar/multiple",
|
||||
json={"itemIds": item_ids},
|
||||
)
|
||||
|
||||
async def mark_all_read(self, newest_item_id: int) -> None:
|
||||
"""Mark all items as read.
|
||||
|
||||
Args:
|
||||
newest_item_id: ID of newest item to mark read
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/read", json={"newestItemId": newest_item_id}
|
||||
)
|
||||
|
||||
# --- Status ---
|
||||
|
||||
async def get_status(self) -> dict[str, Any]:
|
||||
"""Get News app status and configuration.
|
||||
|
||||
Returns:
|
||||
Dict with version and warnings
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/status")
|
||||
return response.json()
|
||||
|
||||
async def get_version(self) -> str:
|
||||
"""Get News app version.
|
||||
|
||||
Returns:
|
||||
Version string (e.g., "25.0.0")
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/version")
|
||||
return response.json().get("version", "")
|
||||
@@ -0,0 +1,170 @@
|
||||
"""Pydantic models for Nextcloud News app responses."""
|
||||
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from .base import BaseResponse
|
||||
|
||||
|
||||
class NewsFolder(BaseModel):
|
||||
"""Model for a News folder."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Folder ID")
|
||||
name: str = Field(description="Folder name")
|
||||
|
||||
|
||||
class NewsFeed(BaseModel):
|
||||
"""Model for a News feed (RSS/Atom subscription)."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Feed ID")
|
||||
url: str = Field(description="Feed URL")
|
||||
title: str = Field(description="Feed title")
|
||||
favicon_link: str | None = Field(
|
||||
None, alias="faviconLink", description="Favicon URL"
|
||||
)
|
||||
link: str | None = Field(None, description="Website link")
|
||||
added: int = Field(description="Unix timestamp when feed was added")
|
||||
folder_id: int | None = Field(
|
||||
None, alias="folderId", description="Parent folder ID"
|
||||
)
|
||||
unread_count: int = Field(
|
||||
0, alias="unreadCount", description="Number of unread items"
|
||||
)
|
||||
ordering: int = Field(
|
||||
0, description="Feed ordering (0=default, 1=oldest, 2=newest)"
|
||||
)
|
||||
pinned: bool = Field(False, description="Whether feed is pinned to top")
|
||||
update_error_count: int = Field(
|
||||
0, alias="updateErrorCount", description="Consecutive update failures"
|
||||
)
|
||||
last_update_error: str | None = Field(
|
||||
None, alias="lastUpdateError", description="Last update error message"
|
||||
)
|
||||
|
||||
@property
|
||||
def has_errors(self) -> bool:
|
||||
"""Check if feed has update errors."""
|
||||
return self.update_error_count > 0
|
||||
|
||||
|
||||
class NewsItem(BaseModel):
|
||||
"""Model for a News item (article) with full content."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Item ID")
|
||||
guid: str = Field(description="Globally unique identifier")
|
||||
guid_hash: str = Field(alias="guidHash", description="MD5 hash of GUID")
|
||||
url: str | None = Field(None, description="Article URL")
|
||||
title: str = Field(description="Article title")
|
||||
author: str | None = Field(None, description="Article author")
|
||||
pub_date: int | None = Field(
|
||||
None, alias="pubDate", description="Publication timestamp"
|
||||
)
|
||||
body: str | None = Field(None, description="Article content (HTML)")
|
||||
enclosure_mime: str | None = Field(
|
||||
None, alias="enclosureMime", description="Enclosure MIME type"
|
||||
)
|
||||
enclosure_link: str | None = Field(
|
||||
None, alias="enclosureLink", description="Enclosure URL"
|
||||
)
|
||||
media_thumbnail: str | None = Field(
|
||||
None, alias="mediaThumbnail", description="Media thumbnail URL"
|
||||
)
|
||||
media_description: str | None = Field(
|
||||
None, alias="mediaDescription", description="Media description"
|
||||
)
|
||||
feed_id: int = Field(alias="feedId", description="Parent feed ID")
|
||||
unread: bool = Field(True, description="Whether item is unread")
|
||||
starred: bool = Field(False, description="Whether item is starred")
|
||||
rtl: bool = Field(False, description="Right-to-left text")
|
||||
last_modified: int = Field(
|
||||
alias="lastModified", description="Last modification timestamp"
|
||||
)
|
||||
fingerprint: str | None = Field(
|
||||
None, description="Content fingerprint for deduplication"
|
||||
)
|
||||
content_hash: str | None = Field(
|
||||
None, alias="contentHash", description="Content hash"
|
||||
)
|
||||
|
||||
|
||||
class NewsItemSummary(BaseModel):
|
||||
"""Lightweight model for News item list responses."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Item ID")
|
||||
title: str = Field(description="Article title")
|
||||
feed_id: int = Field(alias="feedId", description="Parent feed ID")
|
||||
unread: bool = Field(True, description="Whether item is unread")
|
||||
starred: bool = Field(False, description="Whether item is starred")
|
||||
pub_date: int | None = Field(
|
||||
None, alias="pubDate", description="Publication timestamp"
|
||||
)
|
||||
url: str | None = Field(None, description="Article URL")
|
||||
author: str | None = Field(None, description="Article author")
|
||||
|
||||
|
||||
class NewsStatus(BaseModel):
|
||||
"""Model for News app status."""
|
||||
|
||||
version: str = Field(description="News app version")
|
||||
warnings: dict = Field(default_factory=dict, description="Configuration warnings")
|
||||
|
||||
|
||||
# --- Response Models ---
|
||||
|
||||
|
||||
class ListFoldersResponse(BaseResponse):
|
||||
"""Response model for listing folders."""
|
||||
|
||||
results: List[NewsFolder] = Field(description="List of folders")
|
||||
total_count: int = Field(description="Total number of folders")
|
||||
|
||||
|
||||
class ListFeedsResponse(BaseResponse):
|
||||
"""Response model for listing feeds."""
|
||||
|
||||
results: List[NewsFeed] = Field(description="List of feeds")
|
||||
starred_count: int = Field(0, description="Number of starred items")
|
||||
newest_item_id: int | None = Field(None, description="ID of newest item")
|
||||
total_count: int = Field(description="Total number of feeds")
|
||||
|
||||
|
||||
class ListItemsResponse(BaseResponse):
|
||||
"""Response model for listing items."""
|
||||
|
||||
results: List[NewsItemSummary] = Field(description="List of items")
|
||||
total_count: int = Field(description="Number of items returned")
|
||||
has_more: bool = Field(False, description="Whether more items exist")
|
||||
oldest_id: int | None = Field(None, description="Oldest item ID (for pagination)")
|
||||
|
||||
|
||||
class GetItemResponse(BaseResponse):
|
||||
"""Response model for getting a single item."""
|
||||
|
||||
item: NewsItem = Field(description="Full item details")
|
||||
|
||||
|
||||
class FeedHealthResponse(BaseResponse):
|
||||
"""Response model for feed health status."""
|
||||
|
||||
feed_id: int = Field(description="Feed ID")
|
||||
title: str = Field(description="Feed title")
|
||||
url: str = Field(description="Feed URL")
|
||||
has_errors: bool = Field(description="Whether feed has update errors")
|
||||
error_count: int = Field(description="Number of consecutive errors")
|
||||
last_error: str | None = Field(None, description="Last error message")
|
||||
|
||||
|
||||
class GetStatusResponse(BaseResponse):
|
||||
"""Response model for app status."""
|
||||
|
||||
version: str = Field(description="News app version")
|
||||
warnings: dict = Field(default_factory=dict, description="Configuration warnings")
|
||||
@@ -2,6 +2,7 @@ from .calendar import configure_calendar_tools
|
||||
from .contacts import configure_contacts_tools
|
||||
from .cookbook import configure_cookbook_tools
|
||||
from .deck import configure_deck_tools
|
||||
from .news import configure_news_tools
|
||||
from .notes import configure_notes_tools
|
||||
from .semantic import configure_semantic_tools
|
||||
from .sharing import configure_sharing_tools
|
||||
@@ -13,6 +14,7 @@ __all__ = [
|
||||
"configure_contacts_tools",
|
||||
"configure_cookbook_tools",
|
||||
"configure_deck_tools",
|
||||
"configure_news_tools",
|
||||
"configure_notes_tools",
|
||||
"configure_semantic_tools",
|
||||
"configure_sharing_tools",
|
||||
|
||||
@@ -0,0 +1,360 @@
|
||||
"""MCP tools for Nextcloud News app."""
|
||||
|
||||
import logging
|
||||
|
||||
from httpx import HTTPStatusError, RequestError
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.shared.exceptions import McpError
|
||||
from mcp.types import ErrorData
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.client.news import NewsItemType
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
from nextcloud_mcp_server.models.news import (
|
||||
FeedHealthResponse,
|
||||
GetItemResponse,
|
||||
GetStatusResponse,
|
||||
ListFeedsResponse,
|
||||
ListFoldersResponse,
|
||||
ListItemsResponse,
|
||||
NewsFeed,
|
||||
NewsFolder,
|
||||
NewsItem,
|
||||
NewsItemSummary,
|
||||
)
|
||||
from nextcloud_mcp_server.observability.metrics import instrument_tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def configure_news_tools(mcp: FastMCP):
|
||||
"""Configure News app MCP tools."""
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_list_folders(ctx: Context) -> ListFoldersResponse:
|
||||
"""List all News folders (requires news:read scope)."""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
folders_data = await client.news.get_folders()
|
||||
folders = [NewsFolder(**f) for f in folders_data]
|
||||
return ListFoldersResponse(results=folders, total_count=len(folders))
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(code=-1, message=f"Network error listing folders: {str(e)}")
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to list folders: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_list_feeds(ctx: Context) -> ListFeedsResponse:
|
||||
"""List all News feeds with metadata (requires news:read scope).
|
||||
|
||||
Returns feeds with unread counts, error status, and overall starred count.
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
data = await client.news.get_feeds()
|
||||
feeds = [NewsFeed(**f) for f in data.get("feeds", [])]
|
||||
return ListFeedsResponse(
|
||||
results=feeds,
|
||||
starred_count=data.get("starredCount", 0),
|
||||
newest_item_id=data.get("newestItemId"),
|
||||
total_count=len(feeds),
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(code=-1, message=f"Network error listing feeds: {str(e)}")
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to list feeds: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_list_items(
|
||||
ctx: Context,
|
||||
feed_id: int | None = None,
|
||||
folder_id: int | None = None,
|
||||
starred_only: bool = False,
|
||||
unread_only: bool = False,
|
||||
limit: int = 50,
|
||||
offset: int = 0,
|
||||
) -> ListItemsResponse:
|
||||
"""List News items (articles) with optional filtering (requires news:read scope).
|
||||
|
||||
Args:
|
||||
feed_id: Filter by specific feed ID
|
||||
folder_id: Filter by specific folder ID
|
||||
starred_only: Return only starred items
|
||||
unread_only: Return only unread items
|
||||
limit: Maximum number of items to return (default 50, -1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
|
||||
Returns:
|
||||
ListItemsResponse with items, count, and pagination info
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
|
||||
# Determine item type filter
|
||||
type_ = NewsItemType.ALL
|
||||
id_ = 0
|
||||
if starred_only:
|
||||
type_ = NewsItemType.STARRED
|
||||
elif feed_id is not None:
|
||||
type_ = NewsItemType.FEED
|
||||
id_ = feed_id
|
||||
elif folder_id is not None:
|
||||
type_ = NewsItemType.FOLDER
|
||||
id_ = folder_id
|
||||
|
||||
try:
|
||||
items_data = await client.news.get_items(
|
||||
batch_size=limit,
|
||||
offset=offset,
|
||||
type_=type_,
|
||||
id_=id_,
|
||||
get_read=not unread_only,
|
||||
)
|
||||
items = [NewsItemSummary(**i) for i in items_data]
|
||||
|
||||
# Determine pagination info
|
||||
oldest_id = min((i.id for i in items), default=None) if items else None
|
||||
has_more = len(items) == limit and limit > 0
|
||||
|
||||
return ListItemsResponse(
|
||||
results=items,
|
||||
total_count=len(items),
|
||||
has_more=has_more,
|
||||
oldest_id=oldest_id,
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(code=-1, message=f"Network error listing items: {str(e)}")
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to list items: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_item(item_id: int, ctx: Context) -> GetItemResponse:
|
||||
"""Get a specific News item by ID with full content (requires news:read scope).
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Returns:
|
||||
GetItemResponse with full item details including HTML body
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
item_data = await client.news.get_item(item_id)
|
||||
item = NewsItem(**item_data)
|
||||
return GetItemResponse(item=item)
|
||||
except ValueError as e:
|
||||
raise McpError(ErrorData(code=-1, message=str(e)))
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1, message=f"Network error getting item {item_id}: {str(e)}"
|
||||
)
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
raise McpError(ErrorData(code=-1, message=f"Item {item_id} not found"))
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get item {item_id}: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_starred_items(
|
||||
ctx: Context, limit: int = 50, offset: int = 0
|
||||
) -> ListItemsResponse:
|
||||
"""Get starred (favorited) News items (requires news:read scope).
|
||||
|
||||
Convenience method for retrieving user's starred articles.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of items to return (default 50, -1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
|
||||
Returns:
|
||||
ListItemsResponse with starred items
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
items_data = await client.news.get_items(
|
||||
batch_size=limit,
|
||||
offset=offset,
|
||||
type_=NewsItemType.STARRED,
|
||||
get_read=True, # Include read starred items
|
||||
)
|
||||
items = [NewsItemSummary(**i) for i in items_data]
|
||||
|
||||
oldest_id = min((i.id for i in items), default=None) if items else None
|
||||
has_more = len(items) == limit and limit > 0
|
||||
|
||||
return ListItemsResponse(
|
||||
results=items,
|
||||
total_count=len(items),
|
||||
has_more=has_more,
|
||||
oldest_id=oldest_id,
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1, message=f"Network error getting starred items: {str(e)}"
|
||||
)
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get starred items: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_unread_items(
|
||||
ctx: Context, limit: int = 50, offset: int = 0
|
||||
) -> ListItemsResponse:
|
||||
"""Get unread News items (requires news:read scope).
|
||||
|
||||
Convenience method for retrieving unread articles across all feeds.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of items to return (default 50, -1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
|
||||
Returns:
|
||||
ListItemsResponse with unread items
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
items_data = await client.news.get_items(
|
||||
batch_size=limit,
|
||||
offset=offset,
|
||||
type_=NewsItemType.ALL,
|
||||
get_read=False, # Only unread items
|
||||
)
|
||||
items = [NewsItemSummary(**i) for i in items_data]
|
||||
|
||||
oldest_id = min((i.id for i in items), default=None) if items else None
|
||||
has_more = len(items) == limit and limit > 0
|
||||
|
||||
return ListItemsResponse(
|
||||
results=items,
|
||||
total_count=len(items),
|
||||
has_more=has_more,
|
||||
oldest_id=oldest_id,
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1, message=f"Network error getting unread items: {str(e)}"
|
||||
)
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get unread items: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_feed_health(feed_id: int, ctx: Context) -> FeedHealthResponse:
|
||||
"""Get health status for a specific feed (requires news:read scope).
|
||||
|
||||
Returns error count and last error message if the feed has update issues.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID to check
|
||||
|
||||
Returns:
|
||||
FeedHealthResponse with error status
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
data = await client.news.get_feeds()
|
||||
for feed_data in data.get("feeds", []):
|
||||
if feed_data.get("id") == feed_id:
|
||||
feed = NewsFeed(**feed_data)
|
||||
return FeedHealthResponse(
|
||||
feed_id=feed.id,
|
||||
title=feed.title,
|
||||
url=feed.url,
|
||||
has_errors=feed.has_errors,
|
||||
error_count=feed.update_error_count,
|
||||
last_error=feed.last_update_error,
|
||||
)
|
||||
raise McpError(ErrorData(code=-1, message=f"Feed {feed_id} not found"))
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Network error getting feed health: {str(e)}",
|
||||
)
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get feed health: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("news:read")
|
||||
@instrument_tool
|
||||
async def nc_news_get_status(ctx: Context) -> GetStatusResponse:
|
||||
"""Get News app status and version (requires news:read scope).
|
||||
|
||||
Returns version information and any configuration warnings.
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
try:
|
||||
status_data = await client.news.get_status()
|
||||
return GetStatusResponse(
|
||||
version=status_data.get("version", "unknown"),
|
||||
warnings=status_data.get("warnings", {}),
|
||||
)
|
||||
except RequestError as e:
|
||||
raise McpError(
|
||||
ErrorData(code=-1, message=f"Network error getting status: {str(e)}")
|
||||
)
|
||||
except HTTPStatusError as e:
|
||||
raise McpError(
|
||||
ErrorData(
|
||||
code=-1,
|
||||
message=f"Failed to get status: {e.response.status_code}",
|
||||
)
|
||||
)
|
||||
@@ -0,0 +1,49 @@
|
||||
"""HTML to Markdown conversion utilities for vector sync."""
|
||||
|
||||
import logging
|
||||
|
||||
from markdownify import markdownify as md
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def html_to_markdown(html_content: str | None) -> str:
|
||||
"""Convert HTML content to Markdown, preserving semantic structure.
|
||||
|
||||
This function converts HTML (typically from RSS/Atom feed items) to Markdown
|
||||
for better text embedding. Markdown preserves:
|
||||
- Heading hierarchy (important for document structure)
|
||||
- Lists (bullet and numbered)
|
||||
- Links (as [text](url))
|
||||
- Bold/italic emphasis
|
||||
- Paragraphs and line breaks
|
||||
|
||||
Args:
|
||||
html_content: HTML string to convert (may be None or empty)
|
||||
|
||||
Returns:
|
||||
Markdown string, or empty string if input is None/empty
|
||||
|
||||
Example:
|
||||
>>> html_to_markdown("<h1>Title</h1><p>Content with <b>bold</b>.</p>")
|
||||
'# Title\\n\\nContent with **bold**.\\n\\n'
|
||||
"""
|
||||
if not html_content:
|
||||
return ""
|
||||
|
||||
try:
|
||||
markdown = md(
|
||||
html_content,
|
||||
heading_style="ATX", # Use # style headings
|
||||
strip=["script", "style", "iframe", "noscript"], # Remove unsafe elements
|
||||
bullets="-", # Use - for unordered lists
|
||||
code_language="", # Don't add language hints to code blocks
|
||||
)
|
||||
return markdown.strip()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to convert HTML to Markdown: {e}")
|
||||
# Fallback: strip all HTML tags as a last resort
|
||||
import re
|
||||
|
||||
text = re.sub(r"<[^>]+>", " ", html_content)
|
||||
return " ".join(text.split()) # Normalize whitespace
|
||||
@@ -272,6 +272,45 @@ async def _index_document(
|
||||
file_path = None # Notes don't have file paths
|
||||
content_bytes = None # Notes don't have binary content
|
||||
content_type = None
|
||||
elif doc_task.doc_type == "news_item":
|
||||
from nextcloud_mcp_server.vector.html_processor import html_to_markdown
|
||||
|
||||
item = await nc_client.news.get_item(int(doc_task.doc_id))
|
||||
# Convert HTML body to Markdown for better embedding
|
||||
body_markdown = html_to_markdown(item.get("body", ""))
|
||||
# Build content: title + URL + body
|
||||
item_title = item.get("title", "")
|
||||
item_url = item.get("url", "")
|
||||
feed_title = item.get("feedTitle", "")
|
||||
|
||||
# Structure content for embedding
|
||||
content_parts = [item_title]
|
||||
if feed_title:
|
||||
content_parts.append(f"Source: {feed_title}")
|
||||
if item_url:
|
||||
content_parts.append(f"URL: {item_url}")
|
||||
content_parts.append("") # Blank line
|
||||
content_parts.append(body_markdown)
|
||||
content = "\n".join(content_parts)
|
||||
|
||||
title = item_title
|
||||
etag = item.get("guidHash", "")
|
||||
# Store news-specific metadata for later use in payload
|
||||
file_metadata = {
|
||||
"feed_id": item.get("feedId"),
|
||||
"feed_title": feed_title,
|
||||
"author": item.get("author"),
|
||||
"pub_date": item.get("pubDate"),
|
||||
"starred": item.get("starred", False),
|
||||
"unread": item.get("unread", True),
|
||||
"url": item_url,
|
||||
"guid_hash": item.get("guidHash"),
|
||||
"enclosure_link": item.get("enclosureLink"),
|
||||
"enclosure_mime": item.get("enclosureMime"),
|
||||
}
|
||||
file_path = None
|
||||
content_bytes = None
|
||||
content_type = None
|
||||
elif doc_task.doc_type == "file":
|
||||
# For files, doc_id is now the numeric file ID, file_path comes from DocumentTask
|
||||
if not doc_task.file_path:
|
||||
@@ -358,15 +397,16 @@ async def _index_document(
|
||||
chunks = await chunker.chunk_text(content)
|
||||
|
||||
# Assign page numbers to chunks if page boundaries are available (PDFs)
|
||||
if doc_task.doc_type == "file" and "page_boundaries" in file_metadata:
|
||||
page_boundaries = file_metadata.get("page_boundaries")
|
||||
if doc_task.doc_type == "file" and page_boundaries is not None:
|
||||
with trace_operation(
|
||||
"vector_sync.assign_page_numbers",
|
||||
attributes={
|
||||
"vector_sync.chunk_count": len(chunks),
|
||||
"vector_sync.page_count": len(file_metadata["page_boundaries"]),
|
||||
"vector_sync.page_count": len(page_boundaries),
|
||||
},
|
||||
):
|
||||
assign_page_numbers(chunks, file_metadata["page_boundaries"])
|
||||
assign_page_numbers(chunks, page_boundaries)
|
||||
|
||||
# Diagnostic: Verify page number assignment
|
||||
assigned_count = sum(1 for c in chunks if c.page_number is not None)
|
||||
@@ -389,8 +429,8 @@ async def _index_document(
|
||||
f"Text length: {len(content)}, "
|
||||
f"Chunks: {len(chunks)}, "
|
||||
f"Chunk offset range: [{chunks[0].start_offset}:{chunks[-1].end_offset}], "
|
||||
f"Page boundaries: {len(file_metadata['page_boundaries'])} pages, "
|
||||
f"First boundary: {file_metadata['page_boundaries'][0] if file_metadata['page_boundaries'] else 'None'}"
|
||||
f"Page boundaries: {len(page_boundaries)} pages, "
|
||||
f"First boundary: {page_boundaries[0] if page_boundaries else 'None'}"
|
||||
)
|
||||
|
||||
# Extract chunk texts for embedding
|
||||
@@ -566,6 +606,23 @@ async def _index_document(
|
||||
if doc_task.doc_type == "file"
|
||||
else {}
|
||||
),
|
||||
# News item-specific metadata
|
||||
**(
|
||||
{
|
||||
"feed_id": file_metadata.get("feed_id"),
|
||||
"feed_title": file_metadata.get("feed_title"),
|
||||
"author": file_metadata.get("author"),
|
||||
"pub_date": file_metadata.get("pub_date"),
|
||||
"starred": file_metadata.get("starred"),
|
||||
"unread": file_metadata.get("unread"),
|
||||
"url": file_metadata.get("url"),
|
||||
"guid_hash": file_metadata.get("guid_hash"),
|
||||
"enclosure_link": file_metadata.get("enclosure_link"),
|
||||
"enclosure_mime": file_metadata.get("enclosure_mime"),
|
||||
}
|
||||
if doc_task.doc_type == "news_item"
|
||||
else {}
|
||||
),
|
||||
# Highlighted page image (PDF only)
|
||||
**(
|
||||
{
|
||||
|
||||
@@ -544,9 +544,217 @@ async def scan_user_documents(
|
||||
|
||||
queued += file_queued
|
||||
|
||||
# Scan News items (starred + unread)
|
||||
news_queued = 0
|
||||
try:
|
||||
news_queued = await scan_news_items(
|
||||
user_id=user_id,
|
||||
send_stream=send_stream,
|
||||
nc_client=nc_client,
|
||||
initial_sync=initial_sync,
|
||||
scan_id=scan_id,
|
||||
)
|
||||
queued += news_queued
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to scan news items for {user_id}: {e}")
|
||||
|
||||
if queued > 0:
|
||||
logger.info(
|
||||
f"Sent {queued} documents ({file_queued} files) for incremental sync: {user_id}"
|
||||
f"Sent {queued} documents ({file_queued} files, {news_queued} news items) for incremental sync: {user_id}"
|
||||
)
|
||||
else:
|
||||
logger.debug(f"No changes detected for {user_id}")
|
||||
|
||||
|
||||
async def scan_news_items(
|
||||
user_id: str,
|
||||
send_stream: MemoryObjectSendStream[DocumentTask],
|
||||
nc_client: NextcloudClient,
|
||||
initial_sync: bool,
|
||||
scan_id: int,
|
||||
) -> int:
|
||||
"""
|
||||
Scan user's News items (starred + unread) and queue changed items.
|
||||
|
||||
Indexes starred and unread items for semantic search. This provides
|
||||
a balanced approach - important items (starred) and current items
|
||||
(unread) are searchable, while avoiding indexing the entire history.
|
||||
|
||||
Args:
|
||||
user_id: User to scan
|
||||
send_stream: Stream to send changed documents to processors
|
||||
nc_client: Authenticated Nextcloud client
|
||||
initial_sync: If True, send all documents (first-time sync)
|
||||
scan_id: Scan identifier for logging
|
||||
|
||||
Returns:
|
||||
Number of items queued for processing
|
||||
"""
|
||||
from nextcloud_mcp_server.client.news import NewsItemType
|
||||
|
||||
settings = get_settings()
|
||||
queued = 0
|
||||
|
||||
# Get indexed news item IDs from Qdrant (for deletion tracking)
|
||||
indexed_item_ids: set[str] = set()
|
||||
if not initial_sync:
|
||||
qdrant_client = await get_qdrant_client()
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="news_item")),
|
||||
]
|
||||
),
|
||||
with_payload=["doc_id"],
|
||||
with_vectors=False,
|
||||
limit=10000,
|
||||
)
|
||||
indexed_item_ids = {point.payload["doc_id"] for point in scroll_result[0]}
|
||||
logger.debug(f"Found {len(indexed_item_ids)} indexed news items in Qdrant")
|
||||
|
||||
# Fetch starred items (type=STARRED)
|
||||
starred_items = await nc_client.news.get_items(
|
||||
batch_size=-1, # Get all
|
||||
type_=NewsItemType.STARRED,
|
||||
get_read=True, # Include read starred items
|
||||
)
|
||||
logger.debug(f"[SCAN-{scan_id}] Found {len(starred_items)} starred news items")
|
||||
|
||||
# Fetch unread items (type=ALL, get_read=False)
|
||||
unread_items = await nc_client.news.get_items(
|
||||
batch_size=-1,
|
||||
type_=NewsItemType.ALL,
|
||||
get_read=False, # Only unread
|
||||
)
|
||||
logger.debug(f"[SCAN-{scan_id}] Found {len(unread_items)} unread news items")
|
||||
|
||||
# Combine and deduplicate (an item can be both starred and unread)
|
||||
items_by_id: dict[int, dict] = {}
|
||||
for item in starred_items:
|
||||
items_by_id[item["id"]] = item
|
||||
for item in unread_items:
|
||||
items_by_id[item["id"]] = item
|
||||
|
||||
item_count = len(items_by_id)
|
||||
nextcloud_item_ids: set[str] = set()
|
||||
|
||||
for item_id, item in items_by_id.items():
|
||||
doc_id = str(item_id)
|
||||
nextcloud_item_ids.add(doc_id)
|
||||
|
||||
# Use lastModified timestamp (microseconds in News API)
|
||||
modified_at = item.get("lastModified", 0)
|
||||
# Convert to seconds if needed (News API uses microseconds)
|
||||
if modified_at > 10000000000: # > year 2286 in seconds
|
||||
modified_at = modified_at // 1000000
|
||||
|
||||
if initial_sync:
|
||||
# Send everything on first sync - write placeholder first
|
||||
await write_placeholder_point(
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
operation="index",
|
||||
modified_at=modified_at,
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
else:
|
||||
# Incremental sync: check if item exists and compare modified_at
|
||||
doc_key = (user_id, doc_id)
|
||||
if doc_key in _potentially_deleted:
|
||||
logger.debug(
|
||||
f"News item {doc_id} reappeared, removing from deletion grace period"
|
||||
)
|
||||
del _potentially_deleted[doc_key]
|
||||
|
||||
# Query Qdrant for existing entry
|
||||
existing_metadata = await query_document_metadata(
|
||||
doc_id=doc_id, doc_type="news_item", user_id=user_id
|
||||
)
|
||||
|
||||
needs_indexing = False
|
||||
if existing_metadata is None:
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("modified_at", 0) < modified_at:
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = settings.vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for news item {doc_id} "
|
||||
f"(age={placeholder_age:.1f}s), requeuing"
|
||||
)
|
||||
needs_indexing = True
|
||||
|
||||
if needs_indexing:
|
||||
await write_placeholder_point(
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
user_id=user_id,
|
||||
modified_at=modified_at,
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
operation="index",
|
||||
modified_at=modified_at,
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
|
||||
logger.info(
|
||||
f"[SCAN-{scan_id}] Found {item_count} news items (starred+unread) for {user_id}"
|
||||
)
|
||||
record_vector_sync_scan(item_count)
|
||||
|
||||
# Check for deleted items (not initial sync)
|
||||
# Items become "deleted" when they are no longer starred AND become read
|
||||
if not initial_sync:
|
||||
grace_period = settings.vector_sync_scan_interval * 1.5
|
||||
current_time = time.time()
|
||||
|
||||
for doc_id in indexed_item_ids:
|
||||
if doc_id not in nextcloud_item_ids:
|
||||
doc_key = (user_id, doc_id)
|
||||
|
||||
if doc_key in _potentially_deleted:
|
||||
first_missing_time = _potentially_deleted[doc_key]
|
||||
time_missing = current_time - first_missing_time
|
||||
|
||||
if time_missing >= grace_period:
|
||||
logger.info(
|
||||
f"News item {doc_id} missing for {time_missing:.1f}s "
|
||||
f"(>{grace_period:.1f}s grace period), sending deletion"
|
||||
)
|
||||
await send_stream.send(
|
||||
DocumentTask(
|
||||
user_id=user_id,
|
||||
doc_id=doc_id,
|
||||
doc_type="news_item",
|
||||
operation="delete",
|
||||
modified_at=0,
|
||||
)
|
||||
)
|
||||
queued += 1
|
||||
del _potentially_deleted[doc_key]
|
||||
else:
|
||||
logger.debug(
|
||||
f"News item {doc_id} missing for first time, starting grace period"
|
||||
)
|
||||
_potentially_deleted[doc_key] = current_time
|
||||
|
||||
return queued
|
||||
|
||||
Reference in New Issue
Block a user