From a33f6a2f154db5b5d1fd09133a80a42e45fffa5e Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Sat, 29 Nov 2025 14:30:23 +0100 Subject: [PATCH] feat(news): add Nextcloud News app integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add full integration for the Nextcloud News (RSS/Atom reader) app: - Add NewsClient with complete CRUD operations for folders, feeds, and items - Add 8 read-only MCP tools for listing/getting folders, feeds, items - Add Pydantic models for News entities with camelCase alias support - Add vector sync support for starred + unread items - Add HTML to Markdown converter using markdownify for better embeddings - Add Docker post-install hook to enable News app - Add 25 unit tests for NewsClient API methods Vector sync indexes starred and unread items, providing a balanced approach that captures important (starred) and current (unread) content without indexing the entire article history. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../post-installation/10-install-news-app.sh | 5 + nextcloud_mcp_server/app.py | 4 +- nextcloud_mcp_server/client/__init__.py | 2 + nextcloud_mcp_server/client/news.py | 394 +++++++++++++ nextcloud_mcp_server/models/news.py | 170 ++++++ nextcloud_mcp_server/server/__init__.py | 2 + nextcloud_mcp_server/server/news.py | 360 ++++++++++++ nextcloud_mcp_server/vector/html_processor.py | 49 ++ nextcloud_mcp_server/vector/processor.py | 67 ++- nextcloud_mcp_server/vector/scanner.py | 210 ++++++- pyproject.toml | 1 + tests/client/conftest.py | 219 +++++++ tests/client/news/__init__.py | 0 tests/client/news/test_news_api.py | 542 ++++++++++++++++++ uv.lock | 37 ++ 15 files changed, 2055 insertions(+), 7 deletions(-) create mode 100755 app-hooks/post-installation/10-install-news-app.sh create mode 100644 nextcloud_mcp_server/client/news.py create mode 100644 nextcloud_mcp_server/models/news.py create mode 100644 nextcloud_mcp_server/server/news.py create mode 100644 nextcloud_mcp_server/vector/html_processor.py create mode 100644 tests/client/news/__init__.py create mode 100644 tests/client/news/test_news_api.py diff --git a/app-hooks/post-installation/10-install-news-app.sh b/app-hooks/post-installation/10-install-news-app.sh new file mode 100755 index 0000000..6f32846 --- /dev/null +++ b/app-hooks/post-installation/10-install-news-app.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -euox pipefail + +php /var/www/html/occ app:enable news diff --git a/nextcloud_mcp_server/app.py b/nextcloud_mcp_server/app.py index f1185d6..b682e6a 100644 --- a/nextcloud_mcp_server/app.py +++ b/nextcloud_mcp_server/app.py @@ -60,6 +60,7 @@ from nextcloud_mcp_server.server import ( configure_contacts_tools, configure_cookbook_tools, configure_deck_tools, + configure_news_tools, configure_notes_tools, configure_semantic_tools, configure_sharing_tools, @@ -514,7 +515,7 @@ async def load_oauth_client_credentials( # and the authorization server will limit them to these allowed scopes. # # The PRM endpoint advertises the same scopes dynamically via @require_scopes decorators. - dcr_scopes = "openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write" + dcr_scopes = "openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write news:read news:write" # Add offline_access scope if refresh tokens are enabled enable_offline_access = os.getenv("ENABLE_OFFLINE_ACCESS", "false").lower() in ( @@ -1046,6 +1047,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None = "contacts": configure_contacts_tools, "cookbook": configure_cookbook_tools, "deck": configure_deck_tools, + "news": configure_news_tools, } # If no specific apps are specified, enable all diff --git a/nextcloud_mcp_server/client/__init__.py b/nextcloud_mcp_server/client/__init__.py index a3277c6..3a5a6e1 100644 --- a/nextcloud_mcp_server/client/__init__.py +++ b/nextcloud_mcp_server/client/__init__.py @@ -18,6 +18,7 @@ from .contacts import ContactsClient from .cookbook import CookbookClient from .deck import DeckClient from .groups import GroupsClient +from .news import NewsClient from .notes import NotesClient from .sharing import SharingClient from .tables import TablesClient @@ -81,6 +82,7 @@ class NextcloudClient: self.contacts = ContactsClient(self._client, username) self.cookbook = CookbookClient(self._client, username) self.deck = DeckClient(self._client, username) + self.news = NewsClient(self._client, username) self.users = UsersClient(self._client, username) self.groups = GroupsClient(self._client, username) self.sharing = SharingClient(self._client, username) diff --git a/nextcloud_mcp_server/client/news.py b/nextcloud_mcp_server/client/news.py new file mode 100644 index 0000000..679a882 --- /dev/null +++ b/nextcloud_mcp_server/client/news.py @@ -0,0 +1,394 @@ +"""Client for Nextcloud News app operations.""" + +import logging +from enum import IntEnum +from typing import Any + +from .base import BaseNextcloudClient + +logger = logging.getLogger(__name__) + + +class NewsItemType(IntEnum): + """Type constants for News API item queries.""" + + FEED = 0 # Single feed + FOLDER = 1 # Folder and its feeds + STARRED = 2 # All starred items + ALL = 3 # All items + + +class NewsClient(BaseNextcloudClient): + """Client for Nextcloud News app operations.""" + + app_name = "news" + API_BASE = "/apps/news/api/v1-3" + + # --- Folders --- + + async def get_folders(self) -> list[dict[str, Any]]: + """Get all folders.""" + response = await self._make_request("GET", f"{self.API_BASE}/folders") + return response.json().get("folders", []) + + async def create_folder(self, name: str) -> dict[str, Any]: + """Create a new folder. + + Args: + name: Folder name + + Returns: + Created folder data + + Raises: + HTTPStatusError: 409 if folder name already exists, + 422 if name is empty + """ + response = await self._make_request( + "POST", f"{self.API_BASE}/folders", json={"name": name} + ) + folders = response.json().get("folders", []) + return folders[0] if folders else {} + + async def rename_folder(self, folder_id: int, name: str) -> None: + """Rename a folder. + + Args: + folder_id: Folder ID + name: New folder name + + Raises: + HTTPStatusError: 404 if folder not found, 409 if name exists + """ + await self._make_request( + "PUT", f"{self.API_BASE}/folders/{folder_id}", json={"name": name} + ) + + async def delete_folder(self, folder_id: int) -> None: + """Delete a folder and all its feeds/items. + + Args: + folder_id: Folder ID + + Raises: + HTTPStatusError: 404 if folder not found + """ + await self._make_request("DELETE", f"{self.API_BASE}/folders/{folder_id}") + + async def mark_folder_read(self, folder_id: int, newest_item_id: int) -> None: + """Mark all items in a folder as read. + + Args: + folder_id: Folder ID + newest_item_id: ID of newest item to mark read (prevents marking + items user hasn't seen yet) + + Raises: + HTTPStatusError: 404 if folder not found + """ + await self._make_request( + "POST", + f"{self.API_BASE}/folders/{folder_id}/read", + json={"newestItemId": newest_item_id}, + ) + + # --- Feeds --- + + async def get_feeds(self) -> dict[str, Any]: + """Get all feeds with metadata. + + Returns: + Dict with keys: + - feeds: List of feed objects + - starredCount: Number of starred items + - newestItemId: ID of newest item (omitted if no items) + """ + response = await self._make_request("GET", f"{self.API_BASE}/feeds") + return response.json() + + async def create_feed( + self, url: str, folder_id: int | None = None + ) -> dict[str, Any]: + """Subscribe to a new feed. + + Args: + url: Feed URL + folder_id: Optional folder ID (None for root) + + Returns: + Created feed data + + Raises: + HTTPStatusError: 409 if feed already exists, 422 if URL is invalid + """ + body: dict[str, Any] = {"url": url} + if folder_id is not None: + body["folderId"] = folder_id + response = await self._make_request("POST", f"{self.API_BASE}/feeds", json=body) + data = response.json() + feeds = data.get("feeds", []) + return feeds[0] if feeds else {} + + async def delete_feed(self, feed_id: int) -> None: + """Unsubscribe from a feed (deletes all items). + + Args: + feed_id: Feed ID + + Raises: + HTTPStatusError: 404 if feed not found + """ + await self._make_request("DELETE", f"{self.API_BASE}/feeds/{feed_id}") + + async def move_feed(self, feed_id: int, folder_id: int | None) -> None: + """Move a feed to a different folder. + + Args: + feed_id: Feed ID + folder_id: Target folder ID (None for root) + + Raises: + HTTPStatusError: 404 if feed not found + """ + await self._make_request( + "POST", + f"{self.API_BASE}/feeds/{feed_id}/move", + json={"folderId": folder_id}, + ) + + async def rename_feed(self, feed_id: int, title: str) -> None: + """Rename a feed. + + Args: + feed_id: Feed ID + title: New feed title + + Raises: + HTTPStatusError: 404 if feed not found + """ + await self._make_request( + "POST", + f"{self.API_BASE}/feeds/{feed_id}/rename", + json={"feedTitle": title}, + ) + + async def mark_feed_read(self, feed_id: int, newest_item_id: int) -> None: + """Mark all items in a feed as read. + + Args: + feed_id: Feed ID + newest_item_id: ID of newest item to mark read + + Raises: + HTTPStatusError: 404 if feed not found + """ + await self._make_request( + "POST", + f"{self.API_BASE}/feeds/{feed_id}/read", + json={"newestItemId": newest_item_id}, + ) + + # --- Items --- + + async def get_items( + self, + batch_size: int = 50, + offset: int = 0, + type_: int = NewsItemType.ALL, + id_: int = 0, + get_read: bool = True, + oldest_first: bool = False, + ) -> list[dict[str, Any]]: + """Get items (articles) with filtering. + + Args: + batch_size: Number of items to return (-1 for all) + offset: Item ID to start after (for pagination) + type_: Item type filter (NewsItemType) + id_: Feed/folder ID (ignored for STARRED/ALL types) + get_read: Include read items + oldest_first: Sort oldest first instead of newest + + Returns: + List of item objects + """ + params: dict[str, Any] = { + "batchSize": batch_size, + "offset": offset, + "type": type_, + "id": id_, + "getRead": str(get_read).lower(), + "oldestFirst": str(oldest_first).lower(), + } + response = await self._make_request( + "GET", f"{self.API_BASE}/items", params=params + ) + return response.json().get("items", []) + + async def get_item(self, item_id: int) -> dict[str, Any]: + """Get a specific item by ID. + + Note: The News API doesn't have a direct single-item endpoint, + so we fetch all items and filter. For efficiency, consider + caching or using get_items with specific feed if known. + + Args: + item_id: Item ID + + Returns: + Item data + + Raises: + ValueError: If item not found + """ + # Fetch all items and find the one we need + # This is inefficient but the API doesn't provide a direct endpoint + items = await self.get_items(batch_size=-1, get_read=True) + for item in items: + if item.get("id") == item_id: + return item + raise ValueError(f"Item {item_id} not found") + + async def get_updated_items( + self, + last_modified: int, + type_: int = NewsItemType.ALL, + id_: int = 0, + ) -> list[dict[str, Any]]: + """Get items modified since a timestamp (for delta sync). + + Args: + last_modified: Unix timestamp (seconds or microseconds) + type_: Item type filter + id_: Feed/folder ID + + Returns: + List of modified items (includes deleted items) + """ + params: dict[str, Any] = { + "lastModified": last_modified, + "type": type_, + "id": id_, + } + response = await self._make_request( + "GET", f"{self.API_BASE}/items/updated", params=params + ) + return response.json().get("items", []) + + async def mark_item_read(self, item_id: int) -> None: + """Mark a single item as read. + + Args: + item_id: Item ID + + Raises: + HTTPStatusError: 404 if item not found + """ + await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/read") + + async def mark_item_unread(self, item_id: int) -> None: + """Mark a single item as unread. + + Args: + item_id: Item ID + + Raises: + HTTPStatusError: 404 if item not found + """ + await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unread") + + async def star_item(self, item_id: int) -> None: + """Star (favorite) a single item. + + Args: + item_id: Item ID + + Raises: + HTTPStatusError: 404 if item not found + """ + await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/star") + + async def unstar_item(self, item_id: int) -> None: + """Unstar a single item. + + Args: + item_id: Item ID + + Raises: + HTTPStatusError: 404 if item not found + """ + await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unstar") + + async def mark_items_read(self, item_ids: list[int]) -> None: + """Mark multiple items as read. + + Args: + item_ids: List of item IDs + """ + await self._make_request( + "POST", f"{self.API_BASE}/items/read/multiple", json={"itemIds": item_ids} + ) + + async def mark_items_unread(self, item_ids: list[int]) -> None: + """Mark multiple items as unread. + + Args: + item_ids: List of item IDs + """ + await self._make_request( + "POST", + f"{self.API_BASE}/items/unread/multiple", + json={"itemIds": item_ids}, + ) + + async def star_items(self, item_ids: list[int]) -> None: + """Star multiple items. + + Args: + item_ids: List of item IDs + """ + await self._make_request( + "POST", f"{self.API_BASE}/items/star/multiple", json={"itemIds": item_ids} + ) + + async def unstar_items(self, item_ids: list[int]) -> None: + """Unstar multiple items. + + Args: + item_ids: List of item IDs + """ + await self._make_request( + "POST", + f"{self.API_BASE}/items/unstar/multiple", + json={"itemIds": item_ids}, + ) + + async def mark_all_read(self, newest_item_id: int) -> None: + """Mark all items as read. + + Args: + newest_item_id: ID of newest item to mark read + """ + await self._make_request( + "POST", f"{self.API_BASE}/items/read", json={"newestItemId": newest_item_id} + ) + + # --- Status --- + + async def get_status(self) -> dict[str, Any]: + """Get News app status and configuration. + + Returns: + Dict with version and warnings + """ + response = await self._make_request("GET", f"{self.API_BASE}/status") + return response.json() + + async def get_version(self) -> str: + """Get News app version. + + Returns: + Version string (e.g., "25.0.0") + """ + response = await self._make_request("GET", f"{self.API_BASE}/version") + return response.json().get("version", "") diff --git a/nextcloud_mcp_server/models/news.py b/nextcloud_mcp_server/models/news.py new file mode 100644 index 0000000..898cd86 --- /dev/null +++ b/nextcloud_mcp_server/models/news.py @@ -0,0 +1,170 @@ +"""Pydantic models for Nextcloud News app responses.""" + +from typing import List + +from pydantic import BaseModel, ConfigDict, Field + +from .base import BaseResponse + + +class NewsFolder(BaseModel): + """Model for a News folder.""" + + model_config = ConfigDict(populate_by_name=True) + + id: int = Field(description="Folder ID") + name: str = Field(description="Folder name") + + +class NewsFeed(BaseModel): + """Model for a News feed (RSS/Atom subscription).""" + + model_config = ConfigDict(populate_by_name=True) + + id: int = Field(description="Feed ID") + url: str = Field(description="Feed URL") + title: str = Field(description="Feed title") + favicon_link: str | None = Field( + None, alias="faviconLink", description="Favicon URL" + ) + link: str | None = Field(None, description="Website link") + added: int = Field(description="Unix timestamp when feed was added") + folder_id: int | None = Field( + None, alias="folderId", description="Parent folder ID" + ) + unread_count: int = Field( + 0, alias="unreadCount", description="Number of unread items" + ) + ordering: int = Field( + 0, description="Feed ordering (0=default, 1=oldest, 2=newest)" + ) + pinned: bool = Field(False, description="Whether feed is pinned to top") + update_error_count: int = Field( + 0, alias="updateErrorCount", description="Consecutive update failures" + ) + last_update_error: str | None = Field( + None, alias="lastUpdateError", description="Last update error message" + ) + + @property + def has_errors(self) -> bool: + """Check if feed has update errors.""" + return self.update_error_count > 0 + + +class NewsItem(BaseModel): + """Model for a News item (article) with full content.""" + + model_config = ConfigDict(populate_by_name=True) + + id: int = Field(description="Item ID") + guid: str = Field(description="Globally unique identifier") + guid_hash: str = Field(alias="guidHash", description="MD5 hash of GUID") + url: str | None = Field(None, description="Article URL") + title: str = Field(description="Article title") + author: str | None = Field(None, description="Article author") + pub_date: int | None = Field( + None, alias="pubDate", description="Publication timestamp" + ) + body: str | None = Field(None, description="Article content (HTML)") + enclosure_mime: str | None = Field( + None, alias="enclosureMime", description="Enclosure MIME type" + ) + enclosure_link: str | None = Field( + None, alias="enclosureLink", description="Enclosure URL" + ) + media_thumbnail: str | None = Field( + None, alias="mediaThumbnail", description="Media thumbnail URL" + ) + media_description: str | None = Field( + None, alias="mediaDescription", description="Media description" + ) + feed_id: int = Field(alias="feedId", description="Parent feed ID") + unread: bool = Field(True, description="Whether item is unread") + starred: bool = Field(False, description="Whether item is starred") + rtl: bool = Field(False, description="Right-to-left text") + last_modified: int = Field( + alias="lastModified", description="Last modification timestamp" + ) + fingerprint: str | None = Field( + None, description="Content fingerprint for deduplication" + ) + content_hash: str | None = Field( + None, alias="contentHash", description="Content hash" + ) + + +class NewsItemSummary(BaseModel): + """Lightweight model for News item list responses.""" + + model_config = ConfigDict(populate_by_name=True) + + id: int = Field(description="Item ID") + title: str = Field(description="Article title") + feed_id: int = Field(alias="feedId", description="Parent feed ID") + unread: bool = Field(True, description="Whether item is unread") + starred: bool = Field(False, description="Whether item is starred") + pub_date: int | None = Field( + None, alias="pubDate", description="Publication timestamp" + ) + url: str | None = Field(None, description="Article URL") + author: str | None = Field(None, description="Article author") + + +class NewsStatus(BaseModel): + """Model for News app status.""" + + version: str = Field(description="News app version") + warnings: dict = Field(default_factory=dict, description="Configuration warnings") + + +# --- Response Models --- + + +class ListFoldersResponse(BaseResponse): + """Response model for listing folders.""" + + results: List[NewsFolder] = Field(description="List of folders") + total_count: int = Field(description="Total number of folders") + + +class ListFeedsResponse(BaseResponse): + """Response model for listing feeds.""" + + results: List[NewsFeed] = Field(description="List of feeds") + starred_count: int = Field(0, description="Number of starred items") + newest_item_id: int | None = Field(None, description="ID of newest item") + total_count: int = Field(description="Total number of feeds") + + +class ListItemsResponse(BaseResponse): + """Response model for listing items.""" + + results: List[NewsItemSummary] = Field(description="List of items") + total_count: int = Field(description="Number of items returned") + has_more: bool = Field(False, description="Whether more items exist") + oldest_id: int | None = Field(None, description="Oldest item ID (for pagination)") + + +class GetItemResponse(BaseResponse): + """Response model for getting a single item.""" + + item: NewsItem = Field(description="Full item details") + + +class FeedHealthResponse(BaseResponse): + """Response model for feed health status.""" + + feed_id: int = Field(description="Feed ID") + title: str = Field(description="Feed title") + url: str = Field(description="Feed URL") + has_errors: bool = Field(description="Whether feed has update errors") + error_count: int = Field(description="Number of consecutive errors") + last_error: str | None = Field(None, description="Last error message") + + +class GetStatusResponse(BaseResponse): + """Response model for app status.""" + + version: str = Field(description="News app version") + warnings: dict = Field(default_factory=dict, description="Configuration warnings") diff --git a/nextcloud_mcp_server/server/__init__.py b/nextcloud_mcp_server/server/__init__.py index d1c4d52..0be6bba 100644 --- a/nextcloud_mcp_server/server/__init__.py +++ b/nextcloud_mcp_server/server/__init__.py @@ -2,6 +2,7 @@ from .calendar import configure_calendar_tools from .contacts import configure_contacts_tools from .cookbook import configure_cookbook_tools from .deck import configure_deck_tools +from .news import configure_news_tools from .notes import configure_notes_tools from .semantic import configure_semantic_tools from .sharing import configure_sharing_tools @@ -13,6 +14,7 @@ __all__ = [ "configure_contacts_tools", "configure_cookbook_tools", "configure_deck_tools", + "configure_news_tools", "configure_notes_tools", "configure_semantic_tools", "configure_sharing_tools", diff --git a/nextcloud_mcp_server/server/news.py b/nextcloud_mcp_server/server/news.py new file mode 100644 index 0000000..6c688c3 --- /dev/null +++ b/nextcloud_mcp_server/server/news.py @@ -0,0 +1,360 @@ +"""MCP tools for Nextcloud News app.""" + +import logging + +from httpx import HTTPStatusError, RequestError +from mcp.server.fastmcp import Context, FastMCP +from mcp.shared.exceptions import McpError +from mcp.types import ErrorData + +from nextcloud_mcp_server.auth import require_scopes +from nextcloud_mcp_server.client.news import NewsItemType +from nextcloud_mcp_server.context import get_client +from nextcloud_mcp_server.models.news import ( + FeedHealthResponse, + GetItemResponse, + GetStatusResponse, + ListFeedsResponse, + ListFoldersResponse, + ListItemsResponse, + NewsFeed, + NewsFolder, + NewsItem, + NewsItemSummary, +) +from nextcloud_mcp_server.observability.metrics import instrument_tool + +logger = logging.getLogger(__name__) + + +def configure_news_tools(mcp: FastMCP): + """Configure News app MCP tools.""" + + @mcp.tool() + @require_scopes("news:read") + @instrument_tool + async def nc_news_list_folders(ctx: Context) -> ListFoldersResponse: + """List all News folders (requires news:read scope).""" + client = await get_client(ctx) + try: + folders_data = await client.news.get_folders() + folders = [NewsFolder(**f) for f in folders_data] + return ListFoldersResponse(results=folders, total_count=len(folders)) + except RequestError as e: + raise McpError( + ErrorData(code=-1, message=f"Network error listing folders: {str(e)}") + ) + except HTTPStatusError as e: + raise McpError( + ErrorData( + code=-1, + message=f"Failed to list folders: {e.response.status_code}", + ) + ) + + @mcp.tool() + @require_scopes("news:read") + @instrument_tool + async def nc_news_list_feeds(ctx: Context) -> ListFeedsResponse: + """List all News feeds with metadata (requires news:read scope). + + Returns feeds with unread counts, error status, and overall starred count. + """ + client = await get_client(ctx) + try: + data = await client.news.get_feeds() + feeds = [NewsFeed(**f) for f in data.get("feeds", [])] + return ListFeedsResponse( + results=feeds, + starred_count=data.get("starredCount", 0), + newest_item_id=data.get("newestItemId"), + total_count=len(feeds), + ) + except RequestError as e: + raise McpError( + ErrorData(code=-1, message=f"Network error listing feeds: {str(e)}") + ) + except HTTPStatusError as e: + raise McpError( + ErrorData( + code=-1, + message=f"Failed to list feeds: {e.response.status_code}", + ) + ) + + @mcp.tool() + @require_scopes("news:read") + @instrument_tool + async def nc_news_list_items( + ctx: Context, + feed_id: int | None = None, + folder_id: int | None = None, + starred_only: bool = False, + unread_only: bool = False, + limit: int = 50, + offset: int = 0, + ) -> ListItemsResponse: + """List News items (articles) with optional filtering (requires news:read scope). + + Args: + feed_id: Filter by specific feed ID + folder_id: Filter by specific folder ID + starred_only: Return only starred items + unread_only: Return only unread items + limit: Maximum number of items to return (default 50, -1 for all) + offset: Item ID to start after (for pagination) + + Returns: + ListItemsResponse with items, count, and pagination info + """ + client = await get_client(ctx) + + # Determine item type filter + type_ = NewsItemType.ALL + id_ = 0 + if starred_only: + type_ = NewsItemType.STARRED + elif feed_id is not None: + type_ = NewsItemType.FEED + id_ = feed_id + elif folder_id is not None: + type_ = NewsItemType.FOLDER + id_ = folder_id + + try: + items_data = await client.news.get_items( + batch_size=limit, + offset=offset, + type_=type_, + id_=id_, + get_read=not unread_only, + ) + items = [NewsItemSummary(**i) for i in items_data] + + # Determine pagination info + oldest_id = min((i.id for i in items), default=None) if items else None + has_more = len(items) == limit and limit > 0 + + return ListItemsResponse( + results=items, + total_count=len(items), + has_more=has_more, + oldest_id=oldest_id, + ) + except RequestError as e: + raise McpError( + ErrorData(code=-1, message=f"Network error listing items: {str(e)}") + ) + except HTTPStatusError as e: + raise McpError( + ErrorData( + code=-1, + message=f"Failed to list items: {e.response.status_code}", + ) + ) + + @mcp.tool() + @require_scopes("news:read") + @instrument_tool + async def nc_news_get_item(item_id: int, ctx: Context) -> GetItemResponse: + """Get a specific News item by ID with full content (requires news:read scope). + + Args: + item_id: Item ID + + Returns: + GetItemResponse with full item details including HTML body + """ + client = await get_client(ctx) + try: + item_data = await client.news.get_item(item_id) + item = NewsItem(**item_data) + return GetItemResponse(item=item) + except ValueError as e: + raise McpError(ErrorData(code=-1, message=str(e))) + except RequestError as e: + raise McpError( + ErrorData( + code=-1, message=f"Network error getting item {item_id}: {str(e)}" + ) + ) + except HTTPStatusError as e: + if e.response.status_code == 404: + raise McpError(ErrorData(code=-1, message=f"Item {item_id} not found")) + raise McpError( + ErrorData( + code=-1, + message=f"Failed to get item {item_id}: {e.response.status_code}", + ) + ) + + @mcp.tool() + @require_scopes("news:read") + @instrument_tool + async def nc_news_get_starred_items( + ctx: Context, limit: int = 50, offset: int = 0 + ) -> ListItemsResponse: + """Get starred (favorited) News items (requires news:read scope). + + Convenience method for retrieving user's starred articles. + + Args: + limit: Maximum number of items to return (default 50, -1 for all) + offset: Item ID to start after (for pagination) + + Returns: + ListItemsResponse with starred items + """ + client = await get_client(ctx) + try: + items_data = await client.news.get_items( + batch_size=limit, + offset=offset, + type_=NewsItemType.STARRED, + get_read=True, # Include read starred items + ) + items = [NewsItemSummary(**i) for i in items_data] + + oldest_id = min((i.id for i in items), default=None) if items else None + has_more = len(items) == limit and limit > 0 + + return ListItemsResponse( + results=items, + total_count=len(items), + has_more=has_more, + oldest_id=oldest_id, + ) + except RequestError as e: + raise McpError( + ErrorData( + code=-1, message=f"Network error getting starred items: {str(e)}" + ) + ) + except HTTPStatusError as e: + raise McpError( + ErrorData( + code=-1, + message=f"Failed to get starred items: {e.response.status_code}", + ) + ) + + @mcp.tool() + @require_scopes("news:read") + @instrument_tool + async def nc_news_get_unread_items( + ctx: Context, limit: int = 50, offset: int = 0 + ) -> ListItemsResponse: + """Get unread News items (requires news:read scope). + + Convenience method for retrieving unread articles across all feeds. + + Args: + limit: Maximum number of items to return (default 50, -1 for all) + offset: Item ID to start after (for pagination) + + Returns: + ListItemsResponse with unread items + """ + client = await get_client(ctx) + try: + items_data = await client.news.get_items( + batch_size=limit, + offset=offset, + type_=NewsItemType.ALL, + get_read=False, # Only unread items + ) + items = [NewsItemSummary(**i) for i in items_data] + + oldest_id = min((i.id for i in items), default=None) if items else None + has_more = len(items) == limit and limit > 0 + + return ListItemsResponse( + results=items, + total_count=len(items), + has_more=has_more, + oldest_id=oldest_id, + ) + except RequestError as e: + raise McpError( + ErrorData( + code=-1, message=f"Network error getting unread items: {str(e)}" + ) + ) + except HTTPStatusError as e: + raise McpError( + ErrorData( + code=-1, + message=f"Failed to get unread items: {e.response.status_code}", + ) + ) + + @mcp.tool() + @require_scopes("news:read") + @instrument_tool + async def nc_news_get_feed_health(feed_id: int, ctx: Context) -> FeedHealthResponse: + """Get health status for a specific feed (requires news:read scope). + + Returns error count and last error message if the feed has update issues. + + Args: + feed_id: Feed ID to check + + Returns: + FeedHealthResponse with error status + """ + client = await get_client(ctx) + try: + data = await client.news.get_feeds() + for feed_data in data.get("feeds", []): + if feed_data.get("id") == feed_id: + feed = NewsFeed(**feed_data) + return FeedHealthResponse( + feed_id=feed.id, + title=feed.title, + url=feed.url, + has_errors=feed.has_errors, + error_count=feed.update_error_count, + last_error=feed.last_update_error, + ) + raise McpError(ErrorData(code=-1, message=f"Feed {feed_id} not found")) + except RequestError as e: + raise McpError( + ErrorData( + code=-1, + message=f"Network error getting feed health: {str(e)}", + ) + ) + except HTTPStatusError as e: + raise McpError( + ErrorData( + code=-1, + message=f"Failed to get feed health: {e.response.status_code}", + ) + ) + + @mcp.tool() + @require_scopes("news:read") + @instrument_tool + async def nc_news_get_status(ctx: Context) -> GetStatusResponse: + """Get News app status and version (requires news:read scope). + + Returns version information and any configuration warnings. + """ + client = await get_client(ctx) + try: + status_data = await client.news.get_status() + return GetStatusResponse( + version=status_data.get("version", "unknown"), + warnings=status_data.get("warnings", {}), + ) + except RequestError as e: + raise McpError( + ErrorData(code=-1, message=f"Network error getting status: {str(e)}") + ) + except HTTPStatusError as e: + raise McpError( + ErrorData( + code=-1, + message=f"Failed to get status: {e.response.status_code}", + ) + ) diff --git a/nextcloud_mcp_server/vector/html_processor.py b/nextcloud_mcp_server/vector/html_processor.py new file mode 100644 index 0000000..1d40b6f --- /dev/null +++ b/nextcloud_mcp_server/vector/html_processor.py @@ -0,0 +1,49 @@ +"""HTML to Markdown conversion utilities for vector sync.""" + +import logging + +from markdownify import markdownify as md + +logger = logging.getLogger(__name__) + + +def html_to_markdown(html_content: str | None) -> str: + """Convert HTML content to Markdown, preserving semantic structure. + + This function converts HTML (typically from RSS/Atom feed items) to Markdown + for better text embedding. Markdown preserves: + - Heading hierarchy (important for document structure) + - Lists (bullet and numbered) + - Links (as [text](url)) + - Bold/italic emphasis + - Paragraphs and line breaks + + Args: + html_content: HTML string to convert (may be None or empty) + + Returns: + Markdown string, or empty string if input is None/empty + + Example: + >>> html_to_markdown("

Title

Content with bold.

") + '# Title\\n\\nContent with **bold**.\\n\\n' + """ + if not html_content: + return "" + + try: + markdown = md( + html_content, + heading_style="ATX", # Use # style headings + strip=["script", "style", "iframe", "noscript"], # Remove unsafe elements + bullets="-", # Use - for unordered lists + code_language="", # Don't add language hints to code blocks + ) + return markdown.strip() + except Exception as e: + logger.warning(f"Failed to convert HTML to Markdown: {e}") + # Fallback: strip all HTML tags as a last resort + import re + + text = re.sub(r"<[^>]+>", " ", html_content) + return " ".join(text.split()) # Normalize whitespace diff --git a/nextcloud_mcp_server/vector/processor.py b/nextcloud_mcp_server/vector/processor.py index 85becd9..1bc2ad4 100644 --- a/nextcloud_mcp_server/vector/processor.py +++ b/nextcloud_mcp_server/vector/processor.py @@ -272,6 +272,45 @@ async def _index_document( file_path = None # Notes don't have file paths content_bytes = None # Notes don't have binary content content_type = None + elif doc_task.doc_type == "news_item": + from nextcloud_mcp_server.vector.html_processor import html_to_markdown + + item = await nc_client.news.get_item(int(doc_task.doc_id)) + # Convert HTML body to Markdown for better embedding + body_markdown = html_to_markdown(item.get("body", "")) + # Build content: title + URL + body + item_title = item.get("title", "") + item_url = item.get("url", "") + feed_title = item.get("feedTitle", "") + + # Structure content for embedding + content_parts = [item_title] + if feed_title: + content_parts.append(f"Source: {feed_title}") + if item_url: + content_parts.append(f"URL: {item_url}") + content_parts.append("") # Blank line + content_parts.append(body_markdown) + content = "\n".join(content_parts) + + title = item_title + etag = item.get("guidHash", "") + # Store news-specific metadata for later use in payload + file_metadata = { + "feed_id": item.get("feedId"), + "feed_title": feed_title, + "author": item.get("author"), + "pub_date": item.get("pubDate"), + "starred": item.get("starred", False), + "unread": item.get("unread", True), + "url": item_url, + "guid_hash": item.get("guidHash"), + "enclosure_link": item.get("enclosureLink"), + "enclosure_mime": item.get("enclosureMime"), + } + file_path = None + content_bytes = None + content_type = None elif doc_task.doc_type == "file": # For files, doc_id is now the numeric file ID, file_path comes from DocumentTask if not doc_task.file_path: @@ -358,15 +397,16 @@ async def _index_document( chunks = await chunker.chunk_text(content) # Assign page numbers to chunks if page boundaries are available (PDFs) - if doc_task.doc_type == "file" and "page_boundaries" in file_metadata: + page_boundaries = file_metadata.get("page_boundaries") + if doc_task.doc_type == "file" and page_boundaries is not None: with trace_operation( "vector_sync.assign_page_numbers", attributes={ "vector_sync.chunk_count": len(chunks), - "vector_sync.page_count": len(file_metadata["page_boundaries"]), + "vector_sync.page_count": len(page_boundaries), }, ): - assign_page_numbers(chunks, file_metadata["page_boundaries"]) + assign_page_numbers(chunks, page_boundaries) # Diagnostic: Verify page number assignment assigned_count = sum(1 for c in chunks if c.page_number is not None) @@ -389,8 +429,8 @@ async def _index_document( f"Text length: {len(content)}, " f"Chunks: {len(chunks)}, " f"Chunk offset range: [{chunks[0].start_offset}:{chunks[-1].end_offset}], " - f"Page boundaries: {len(file_metadata['page_boundaries'])} pages, " - f"First boundary: {file_metadata['page_boundaries'][0] if file_metadata['page_boundaries'] else 'None'}" + f"Page boundaries: {len(page_boundaries)} pages, " + f"First boundary: {page_boundaries[0] if page_boundaries else 'None'}" ) # Extract chunk texts for embedding @@ -566,6 +606,23 @@ async def _index_document( if doc_task.doc_type == "file" else {} ), + # News item-specific metadata + **( + { + "feed_id": file_metadata.get("feed_id"), + "feed_title": file_metadata.get("feed_title"), + "author": file_metadata.get("author"), + "pub_date": file_metadata.get("pub_date"), + "starred": file_metadata.get("starred"), + "unread": file_metadata.get("unread"), + "url": file_metadata.get("url"), + "guid_hash": file_metadata.get("guid_hash"), + "enclosure_link": file_metadata.get("enclosure_link"), + "enclosure_mime": file_metadata.get("enclosure_mime"), + } + if doc_task.doc_type == "news_item" + else {} + ), # Highlighted page image (PDF only) **( { diff --git a/nextcloud_mcp_server/vector/scanner.py b/nextcloud_mcp_server/vector/scanner.py index afc4e32..685fce0 100644 --- a/nextcloud_mcp_server/vector/scanner.py +++ b/nextcloud_mcp_server/vector/scanner.py @@ -544,9 +544,217 @@ async def scan_user_documents( queued += file_queued + # Scan News items (starred + unread) + news_queued = 0 + try: + news_queued = await scan_news_items( + user_id=user_id, + send_stream=send_stream, + nc_client=nc_client, + initial_sync=initial_sync, + scan_id=scan_id, + ) + queued += news_queued + except Exception as e: + logger.warning(f"Failed to scan news items for {user_id}: {e}") + if queued > 0: logger.info( - f"Sent {queued} documents ({file_queued} files) for incremental sync: {user_id}" + f"Sent {queued} documents ({file_queued} files, {news_queued} news items) for incremental sync: {user_id}" ) else: logger.debug(f"No changes detected for {user_id}") + + +async def scan_news_items( + user_id: str, + send_stream: MemoryObjectSendStream[DocumentTask], + nc_client: NextcloudClient, + initial_sync: bool, + scan_id: int, +) -> int: + """ + Scan user's News items (starred + unread) and queue changed items. + + Indexes starred and unread items for semantic search. This provides + a balanced approach - important items (starred) and current items + (unread) are searchable, while avoiding indexing the entire history. + + Args: + user_id: User to scan + send_stream: Stream to send changed documents to processors + nc_client: Authenticated Nextcloud client + initial_sync: If True, send all documents (first-time sync) + scan_id: Scan identifier for logging + + Returns: + Number of items queued for processing + """ + from nextcloud_mcp_server.client.news import NewsItemType + + settings = get_settings() + queued = 0 + + # Get indexed news item IDs from Qdrant (for deletion tracking) + indexed_item_ids: set[str] = set() + if not initial_sync: + qdrant_client = await get_qdrant_client() + scroll_result = await qdrant_client.scroll( + collection_name=settings.get_collection_name(), + scroll_filter=Filter( + must=[ + FieldCondition(key="user_id", match=MatchValue(value=user_id)), + FieldCondition(key="doc_type", match=MatchValue(value="news_item")), + ] + ), + with_payload=["doc_id"], + with_vectors=False, + limit=10000, + ) + indexed_item_ids = {point.payload["doc_id"] for point in scroll_result[0]} + logger.debug(f"Found {len(indexed_item_ids)} indexed news items in Qdrant") + + # Fetch starred items (type=STARRED) + starred_items = await nc_client.news.get_items( + batch_size=-1, # Get all + type_=NewsItemType.STARRED, + get_read=True, # Include read starred items + ) + logger.debug(f"[SCAN-{scan_id}] Found {len(starred_items)} starred news items") + + # Fetch unread items (type=ALL, get_read=False) + unread_items = await nc_client.news.get_items( + batch_size=-1, + type_=NewsItemType.ALL, + get_read=False, # Only unread + ) + logger.debug(f"[SCAN-{scan_id}] Found {len(unread_items)} unread news items") + + # Combine and deduplicate (an item can be both starred and unread) + items_by_id: dict[int, dict] = {} + for item in starred_items: + items_by_id[item["id"]] = item + for item in unread_items: + items_by_id[item["id"]] = item + + item_count = len(items_by_id) + nextcloud_item_ids: set[str] = set() + + for item_id, item in items_by_id.items(): + doc_id = str(item_id) + nextcloud_item_ids.add(doc_id) + + # Use lastModified timestamp (microseconds in News API) + modified_at = item.get("lastModified", 0) + # Convert to seconds if needed (News API uses microseconds) + if modified_at > 10000000000: # > year 2286 in seconds + modified_at = modified_at // 1000000 + + if initial_sync: + # Send everything on first sync - write placeholder first + await write_placeholder_point( + doc_id=doc_id, + doc_type="news_item", + user_id=user_id, + modified_at=modified_at, + ) + await send_stream.send( + DocumentTask( + user_id=user_id, + doc_id=doc_id, + doc_type="news_item", + operation="index", + modified_at=modified_at, + ) + ) + queued += 1 + else: + # Incremental sync: check if item exists and compare modified_at + doc_key = (user_id, doc_id) + if doc_key in _potentially_deleted: + logger.debug( + f"News item {doc_id} reappeared, removing from deletion grace period" + ) + del _potentially_deleted[doc_key] + + # Query Qdrant for existing entry + existing_metadata = await query_document_metadata( + doc_id=doc_id, doc_type="news_item", user_id=user_id + ) + + needs_indexing = False + if existing_metadata is None: + needs_indexing = True + elif existing_metadata.get("modified_at", 0) < modified_at: + needs_indexing = True + elif existing_metadata.get("is_placeholder", False): + queued_at = existing_metadata.get("queued_at", 0) + placeholder_age = time.time() - queued_at + stale_threshold = settings.vector_sync_scan_interval * 5 + if placeholder_age > stale_threshold: + logger.debug( + f"Found stale placeholder for news item {doc_id} " + f"(age={placeholder_age:.1f}s), requeuing" + ) + needs_indexing = True + + if needs_indexing: + await write_placeholder_point( + doc_id=doc_id, + doc_type="news_item", + user_id=user_id, + modified_at=modified_at, + ) + await send_stream.send( + DocumentTask( + user_id=user_id, + doc_id=doc_id, + doc_type="news_item", + operation="index", + modified_at=modified_at, + ) + ) + queued += 1 + + logger.info( + f"[SCAN-{scan_id}] Found {item_count} news items (starred+unread) for {user_id}" + ) + record_vector_sync_scan(item_count) + + # Check for deleted items (not initial sync) + # Items become "deleted" when they are no longer starred AND become read + if not initial_sync: + grace_period = settings.vector_sync_scan_interval * 1.5 + current_time = time.time() + + for doc_id in indexed_item_ids: + if doc_id not in nextcloud_item_ids: + doc_key = (user_id, doc_id) + + if doc_key in _potentially_deleted: + first_missing_time = _potentially_deleted[doc_key] + time_missing = current_time - first_missing_time + + if time_missing >= grace_period: + logger.info( + f"News item {doc_id} missing for {time_missing:.1f}s " + f"(>{grace_period:.1f}s grace period), sending deletion" + ) + await send_stream.send( + DocumentTask( + user_id=user_id, + doc_id=doc_id, + doc_type="news_item", + operation="delete", + modified_at=0, + ) + ) + queued += 1 + del _potentially_deleted[doc_key] + else: + logger.debug( + f"News item {doc_id} missing for first time, starting grace period" + ) + _potentially_deleted[doc_key] = current_time + + return queued diff --git a/pyproject.toml b/pyproject.toml index f3c0b51..14b1d01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "python-json-logger>=3.2.0", # Structured JSON logging "jinja2>=3.1.6", "langchain-text-splitters>=1.0.0", + "markdownify>=0.14.1", # HTML to Markdown conversion for News items "pymupdf>=1.26.6", "pymupdf4llm>=0.2.2", "pymupdf-layout>=1.26.6", diff --git a/tests/client/conftest.py b/tests/client/conftest.py index bed39ea..8a9f554 100644 --- a/tests/client/conftest.py +++ b/tests/client/conftest.py @@ -480,3 +480,222 @@ def create_mock_table_row_ocs_response( ocs_response = {"ocs": {"meta": {"status": "ok"}, "data": row_data}} return create_mock_response(status_code=200, json_data=ocs_response) + + +# ============================================================================ +# News Mock Response Helpers +# ============================================================================ + + +def create_mock_news_folders_response( + folders: list[dict] | None = None, +) -> httpx.Response: + """Create a mock response for News folders list. + + Args: + folders: List of folder dictionaries. If None, returns empty list. + + Returns: + Mock httpx.Response with folders data + """ + if folders is None: + folders = [] + + return create_mock_response(status_code=200, json_data={"folders": folders}) + + +def create_mock_news_folder_response( + folder_id: int = 1, + name: str = "Test Folder", + **kwargs, +) -> httpx.Response: + """Create a mock response for a News folder. + + Args: + folder_id: Folder ID + name: Folder name + **kwargs: Additional folder fields + + Returns: + Mock httpx.Response with folder data + """ + folder_data = { + "id": folder_id, + "name": name, + **kwargs, + } + + return create_mock_response(status_code=200, json_data={"folders": [folder_data]}) + + +def create_mock_news_feeds_response( + feeds: list[dict] | None = None, + starred_count: int = 0, + newest_item_id: int | None = None, +) -> httpx.Response: + """Create a mock response for News feeds list. + + Args: + feeds: List of feed dictionaries. If None, returns empty list. + starred_count: Number of starred items + newest_item_id: ID of newest item + + Returns: + Mock httpx.Response with feeds data + """ + if feeds is None: + feeds = [] + + data = { + "feeds": feeds, + "starredCount": starred_count, + } + if newest_item_id is not None: + data["newestItemId"] = newest_item_id + + return create_mock_response(status_code=200, json_data=data) + + +def create_mock_news_feed_response( + feed_id: int = 1, + url: str = "https://example.com/feed", + title: str = "Test Feed", + favicon_link: str | None = None, + folder_id: int | None = None, + unread_count: int = 0, + **kwargs, +) -> httpx.Response: + """Create a mock response for a News feed. + + Args: + feed_id: Feed ID + url: Feed URL + title: Feed title + favicon_link: Favicon URL + folder_id: Parent folder ID + unread_count: Number of unread items + **kwargs: Additional feed fields + + Returns: + Mock httpx.Response with feed data + """ + feed_data = { + "id": feed_id, + "url": url, + "title": title, + "faviconLink": favicon_link, + "folderId": folder_id, + "unreadCount": unread_count, + "link": kwargs.get("link", "https://example.com"), + "added": kwargs.get("added", 1700000000), + "updateErrorCount": kwargs.get("updateErrorCount", 0), + "lastUpdateError": kwargs.get("lastUpdateError"), + **{ + k: v + for k, v in kwargs.items() + if k not in ["link", "added", "updateErrorCount", "lastUpdateError"] + }, + } + + return create_mock_response(status_code=200, json_data={"feeds": [feed_data]}) + + +def create_mock_news_items_response( + items: list[dict] | None = None, +) -> httpx.Response: + """Create a mock response for News items list. + + Args: + items: List of item dictionaries. If None, returns empty list. + + Returns: + Mock httpx.Response with items data + """ + if items is None: + items = [] + + return create_mock_response(status_code=200, json_data={"items": items}) + + +def create_mock_news_item( + item_id: int = 1, + feed_id: int = 1, + title: str = "Test Article", + body: str = "

Test content

", + url: str = "https://example.com/article", + author: str | None = "Test Author", + pub_date: int = 1700000000, + unread: bool = True, + starred: bool = False, + **kwargs, +) -> dict: + """Create a mock News item dictionary. + + Args: + item_id: Item ID + feed_id: Parent feed ID + title: Article title + body: Article body (HTML) + url: Article URL + author: Article author + pub_date: Publication timestamp (Unix) + unread: Whether item is unread + starred: Whether item is starred + **kwargs: Additional item fields + + Returns: + Item dictionary + """ + return { + "id": item_id, + "feedId": feed_id, + "title": title, + "body": body, + "url": url, + "author": author, + "pubDate": pub_date, + "unread": unread, + "starred": starred, + "guid": kwargs.get("guid", f"guid-{item_id}"), + "guidHash": kwargs.get("guidHash", f"hash-{item_id}"), + "lastModified": kwargs.get("lastModified", pub_date * 1000000), + "enclosureLink": kwargs.get("enclosureLink"), + "enclosureMime": kwargs.get("enclosureMime"), + "fingerprint": kwargs.get("fingerprint", f"fp-{item_id}"), + "contentHash": kwargs.get("contentHash", f"ch-{item_id}"), + **{ + k: v + for k, v in kwargs.items() + if k + not in [ + "guid", + "guidHash", + "lastModified", + "enclosureLink", + "enclosureMime", + "fingerprint", + "contentHash", + ] + }, + } + + +def create_mock_news_status_response( + version: str = "25.0.0", + warnings: dict | None = None, +) -> httpx.Response: + """Create a mock response for News status. + + Args: + version: News app version + warnings: Warning messages + + Returns: + Mock httpx.Response with status data + """ + data = { + "version": version, + "warnings": warnings or {}, + } + + return create_mock_response(status_code=200, json_data=data) diff --git a/tests/client/news/__init__.py b/tests/client/news/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/client/news/test_news_api.py b/tests/client/news/test_news_api.py new file mode 100644 index 0000000..bd9f972 --- /dev/null +++ b/tests/client/news/test_news_api.py @@ -0,0 +1,542 @@ +"""Unit tests for NewsClient API methods.""" + +import logging + +import httpx +import pytest + +from nextcloud_mcp_server.client.news import NewsClient, NewsItemType +from tests.client.conftest import ( + create_mock_error_response, + create_mock_news_feed_response, + create_mock_news_feeds_response, + create_mock_news_folder_response, + create_mock_news_folders_response, + create_mock_news_item, + create_mock_news_items_response, + create_mock_news_status_response, + create_mock_response, +) + +logger = logging.getLogger(__name__) + +# Mark all tests in this module as unit tests +pytestmark = pytest.mark.unit + + +# ============================================================================ +# Folder Tests +# ============================================================================ + + +async def test_news_api_get_folders(mocker): + """Test that get_folders correctly parses the API response.""" + mock_response = create_mock_news_folders_response( + folders=[ + {"id": 1, "name": "Tech"}, + {"id": 2, "name": "News"}, + ] + ) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + folders = await client.get_folders() + + assert len(folders) == 2 + assert folders[0]["id"] == 1 + assert folders[0]["name"] == "Tech" + assert folders[1]["name"] == "News" + + mock_make_request.assert_called_once_with("GET", "/apps/news/api/v1-3/folders") + + +async def test_news_api_create_folder(mocker): + """Test that create_folder correctly creates a folder.""" + mock_response = create_mock_news_folder_response(folder_id=3, name="New Folder") + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + folder = await client.create_folder(name="New Folder") + + assert folder["id"] == 3 + assert folder["name"] == "New Folder" + + mock_make_request.assert_called_once_with( + "POST", "/apps/news/api/v1-3/folders", json={"name": "New Folder"} + ) + + +async def test_news_api_rename_folder(mocker): + """Test that rename_folder makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.rename_folder(folder_id=1, name="Renamed") + + mock_make_request.assert_called_once_with( + "PUT", "/apps/news/api/v1-3/folders/1", json={"name": "Renamed"} + ) + + +async def test_news_api_delete_folder(mocker): + """Test that delete_folder makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.delete_folder(folder_id=1) + + mock_make_request.assert_called_once_with("DELETE", "/apps/news/api/v1-3/folders/1") + + +# ============================================================================ +# Feed Tests +# ============================================================================ + + +async def test_news_api_get_feeds(mocker): + """Test that get_feeds correctly parses the API response.""" + mock_response = create_mock_news_feeds_response( + feeds=[ + {"id": 1, "url": "https://example.com/feed1", "title": "Feed 1"}, + {"id": 2, "url": "https://example.com/feed2", "title": "Feed 2"}, + ], + starred_count=5, + newest_item_id=100, + ) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + result = await client.get_feeds() + + assert len(result["feeds"]) == 2 + assert result["starredCount"] == 5 + assert result["newestItemId"] == 100 + + mock_make_request.assert_called_once_with("GET", "/apps/news/api/v1-3/feeds") + + +async def test_news_api_create_feed(mocker): + """Test that create_feed correctly creates a feed.""" + mock_response = create_mock_news_feed_response( + feed_id=10, url="https://example.com/new-feed", title="New Feed" + ) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + feed = await client.create_feed(url="https://example.com/new-feed") + + assert feed["id"] == 10 + assert feed["url"] == "https://example.com/new-feed" + + mock_make_request.assert_called_once_with( + "POST", + "/apps/news/api/v1-3/feeds", + json={"url": "https://example.com/new-feed"}, + ) + + +async def test_news_api_create_feed_with_folder(mocker): + """Test that create_feed correctly creates a feed in a folder.""" + mock_response = create_mock_news_feed_response( + feed_id=10, url="https://example.com/feed", folder_id=5 + ) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + feed = await client.create_feed(url="https://example.com/feed", folder_id=5) + + assert feed["folderId"] == 5 + + mock_make_request.assert_called_once_with( + "POST", + "/apps/news/api/v1-3/feeds", + json={"url": "https://example.com/feed", "folderId": 5}, + ) + + +async def test_news_api_delete_feed(mocker): + """Test that delete_feed makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.delete_feed(feed_id=10) + + mock_make_request.assert_called_once_with("DELETE", "/apps/news/api/v1-3/feeds/10") + + +async def test_news_api_move_feed(mocker): + """Test that move_feed makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.move_feed(feed_id=10, folder_id=5) + + mock_make_request.assert_called_once_with( + "POST", "/apps/news/api/v1-3/feeds/10/move", json={"folderId": 5} + ) + + +async def test_news_api_rename_feed(mocker): + """Test that rename_feed makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.rename_feed(feed_id=10, title="Renamed Feed") + + mock_make_request.assert_called_once_with( + "POST", + "/apps/news/api/v1-3/feeds/10/rename", + json={"feedTitle": "Renamed Feed"}, + ) + + +# ============================================================================ +# Item Tests +# ============================================================================ + + +async def test_news_api_get_items(mocker): + """Test that get_items correctly parses the API response.""" + items = [ + create_mock_news_item(item_id=1, title="Article 1"), + create_mock_news_item(item_id=2, title="Article 2"), + ] + mock_response = create_mock_news_items_response(items=items) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + result = await client.get_items() + + assert len(result) == 2 + assert result[0]["title"] == "Article 1" + assert result[1]["title"] == "Article 2" + + # Verify default parameters + call_args = mock_make_request.call_args + assert call_args[0] == ("GET", "/apps/news/api/v1-3/items") + params = call_args[1]["params"] + assert params["batchSize"] == 50 + assert params["type"] == NewsItemType.ALL + + +async def test_news_api_get_items_starred(mocker): + """Test that get_items with STARRED type filters correctly.""" + items = [create_mock_news_item(item_id=1, starred=True)] + mock_response = create_mock_news_items_response(items=items) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + result = await client.get_items(type_=NewsItemType.STARRED) + + assert len(result) == 1 + assert result[0]["starred"] is True + + call_args = mock_make_request.call_args + params = call_args[1]["params"] + assert params["type"] == NewsItemType.STARRED + + +async def test_news_api_get_items_unread_only(mocker): + """Test that get_items with get_read=False filters correctly.""" + items = [create_mock_news_item(item_id=1, unread=True)] + mock_response = create_mock_news_items_response(items=items) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + result = await client.get_items(get_read=False) + + assert len(result) == 1 + + call_args = mock_make_request.call_args + params = call_args[1]["params"] + assert params["getRead"] == "false" + + +async def test_news_api_get_updated_items(mocker): + """Test that get_updated_items correctly calls the updated endpoint.""" + items = [create_mock_news_item(item_id=1)] + mock_response = create_mock_news_items_response(items=items) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + result = await client.get_updated_items(last_modified=1700000000) + + assert len(result) == 1 + + call_args = mock_make_request.call_args + assert call_args[0] == ("GET", "/apps/news/api/v1-3/items/updated") + params = call_args[1]["params"] + assert params["lastModified"] == 1700000000 + + +async def test_news_api_mark_item_read(mocker): + """Test that mark_item_read makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.mark_item_read(item_id=123) + + mock_make_request.assert_called_once_with( + "POST", "/apps/news/api/v1-3/items/123/read" + ) + + +async def test_news_api_mark_item_unread(mocker): + """Test that mark_item_unread makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.mark_item_unread(item_id=123) + + mock_make_request.assert_called_once_with( + "POST", "/apps/news/api/v1-3/items/123/unread" + ) + + +async def test_news_api_star_item(mocker): + """Test that star_item makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.star_item(item_id=123) + + mock_make_request.assert_called_once_with( + "POST", "/apps/news/api/v1-3/items/123/star" + ) + + +async def test_news_api_unstar_item(mocker): + """Test that unstar_item makes the correct API call.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.unstar_item(item_id=123) + + mock_make_request.assert_called_once_with( + "POST", "/apps/news/api/v1-3/items/123/unstar" + ) + + +async def test_news_api_mark_items_read_multiple(mocker): + """Test that mark_items_read makes the correct API call for multiple items.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.mark_items_read(item_ids=[1, 2, 3]) + + mock_make_request.assert_called_once_with( + "POST", "/apps/news/api/v1-3/items/read/multiple", json={"itemIds": [1, 2, 3]} + ) + + +async def test_news_api_star_items_multiple(mocker): + """Test that star_items makes the correct API call for multiple items.""" + mock_response = create_mock_response(status_code=200, json_data={}) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + await client.star_items(item_ids=[1, 2, 3]) + + mock_make_request.assert_called_once_with( + "POST", "/apps/news/api/v1-3/items/star/multiple", json={"itemIds": [1, 2, 3]} + ) + + +# ============================================================================ +# Status Tests +# ============================================================================ + + +async def test_news_api_get_status(mocker): + """Test that get_status correctly parses the API response.""" + mock_response = create_mock_news_status_response( + version="25.0.0", + warnings={"improperlyConfiguredCron": False}, + ) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + status = await client.get_status() + + assert status["version"] == "25.0.0" + assert "warnings" in status + + mock_make_request.assert_called_once_with("GET", "/apps/news/api/v1-3/status") + + +async def test_news_api_get_version(mocker): + """Test that get_version correctly parses the API response.""" + mock_response = create_mock_response( + status_code=200, json_data={"version": "25.0.0"} + ) + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object( + NewsClient, "_make_request", return_value=mock_response + ) + + client = NewsClient(mock_client, "testuser") + version = await client.get_version() + + assert version == "25.0.0" + + mock_make_request.assert_called_once_with("GET", "/apps/news/api/v1-3/version") + + +# ============================================================================ +# Error Handling Tests +# ============================================================================ + + +async def test_news_api_create_folder_conflict(mocker): + """Test that create_folder raises HTTPStatusError on 409 conflict.""" + error_response = create_mock_error_response(409, "Folder name already exists") + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object(NewsClient, "_make_request") + mock_make_request.side_effect = httpx.HTTPStatusError( + "409 Conflict", + request=httpx.Request("POST", "http://test.local"), + response=error_response, + ) + + client = NewsClient(mock_client, "testuser") + + with pytest.raises(httpx.HTTPStatusError) as excinfo: + await client.create_folder(name="Existing Folder") + + assert excinfo.value.response.status_code == 409 + + +async def test_news_api_delete_feed_not_found(mocker): + """Test that delete_feed raises HTTPStatusError on 404.""" + error_response = create_mock_error_response(404, "Feed not found") + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object(NewsClient, "_make_request") + mock_make_request.side_effect = httpx.HTTPStatusError( + "404 Not Found", + request=httpx.Request("DELETE", "http://test.local"), + response=error_response, + ) + + client = NewsClient(mock_client, "testuser") + + with pytest.raises(httpx.HTTPStatusError) as excinfo: + await client.delete_feed(feed_id=999999) + + assert excinfo.value.response.status_code == 404 + + +async def test_news_api_create_feed_invalid_url(mocker): + """Test that create_feed raises HTTPStatusError on 422 for invalid URL.""" + error_response = create_mock_error_response(422, "Invalid feed URL") + + mock_client = mocker.AsyncMock(spec=httpx.AsyncClient) + mock_make_request = mocker.patch.object(NewsClient, "_make_request") + mock_make_request.side_effect = httpx.HTTPStatusError( + "422 Unprocessable Entity", + request=httpx.Request("POST", "http://test.local"), + response=error_response, + ) + + client = NewsClient(mock_client, "testuser") + + with pytest.raises(httpx.HTTPStatusError) as excinfo: + await client.create_feed(url="not-a-valid-url") + + assert excinfo.value.response.status_code == 422 diff --git a/uv.lock b/uv.lock index 7cdf27e..94b3c61 100644 --- a/uv.lock +++ b/uv.lock @@ -234,6 +234,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/aa/5082412d1ee302e9e7d80b6949bc4d2a8fa1149aaab610c5fc24709605d6/authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a", size = 243608, upload-time = "2025-10-02T13:36:07.637Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.14.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822, upload-time = "2025-09-29T10:05:42.613Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" }, +] + [[package]] name = "boto3" version = "1.40.74" @@ -1557,6 +1570,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, ] +[[package]] +name = "markdownify" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3f/bc/c8c8eea5335341306b0fa7e1cb33c5e1c8d24ef70ddd684da65f41c49c92/markdownify-1.2.2.tar.gz", hash = "sha256:b274f1b5943180b031b699b199cbaeb1e2ac938b75851849a31fd0c3d6603d09", size = 18816, upload-time = "2025-11-16T19:21:18.565Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/ce/f1e3e9d959db134cedf06825fae8d5b294bd368aacdd0831a3975b7c4d55/markdownify-1.2.2-py3-none-any.whl", hash = "sha256:3f02d3cc52714084d6e589f70397b6fc9f2f3a8531481bf35e8cc39f975e186a", size = 15724, upload-time = "2025-11-16T19:21:17.622Z" }, +] + [[package]] name = "markupsafe" version = "3.0.3" @@ -1950,6 +1976,7 @@ dependencies = [ { name = "icalendar" }, { name = "jinja2" }, { name = "langchain-text-splitters" }, + { name = "markdownify" }, { name = "mcp", extra = ["cli"] }, { name = "openai" }, { name = "opentelemetry-api" }, @@ -1999,6 +2026,7 @@ requires-dist = [ { name = "icalendar", specifier = ">=6.0.0,<7.0.0" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "langchain-text-splitters", specifier = ">=1.0.0" }, + { name = "markdownify", specifier = ">=0.14.1" }, { name = "mcp", extras = ["cli"], specifier = ">=1.22,<1.23" }, { name = "openai", specifier = ">=2.8.1" }, { name = "opentelemetry-api", specifier = ">=1.28.2" }, @@ -3584,6 +3612,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "soupsieve" +version = "2.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, +] + [[package]] name = "sse-starlette" version = "3.0.3"