feat(news): add Nextcloud News app integration

Add full integration for the Nextcloud News (RSS/Atom reader) app: - Add NewsClient with complete CRUD operations for folders, feeds, and items - Add 8 read-only MCP tools for listing/getting folders, feeds, items - Add Pydantic models for News entities with camelCase alias support - Add vector sync support for starred + unread items - Add HTML to Markdown converter using markdownify for better embeddings - Add Docker post-install hook to enable News app - Add 25 unit tests for NewsClient API methods Vector sync indexes starred and unread items, providing a balanced approach that captures important (starred) and current (unread) content without indexing the entire article history. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-29 14:30:23 +01:00
parent 1b1667bc2b
commit a33f6a2f15
15 changed files with 2055 additions and 7 deletions
@@ -60,6 +60,7 @@ from nextcloud_mcp_server.server import (
    configure_contacts_tools,
    configure_cookbook_tools,
    configure_deck_tools,
+    configure_news_tools,
    configure_notes_tools,
    configure_semantic_tools,
    configure_sharing_tools,
@@ -514,7 +515,7 @@ async def load_oauth_client_credentials(
        # and the authorization server will limit them to these allowed scopes.
        #
        # The PRM endpoint advertises the same scopes dynamically via @require_scopes decorators.
-        dcr_scopes = "openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write"
+        dcr_scopes = "openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write news:read news:write"

        # Add offline_access scope if refresh tokens are enabled
        enable_offline_access = os.getenv("ENABLE_OFFLINE_ACCESS", "false").lower() in (
@@ -1046,6 +1047,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
        "contacts": configure_contacts_tools,
        "cookbook": configure_cookbook_tools,
        "deck": configure_deck_tools,
+        "news": configure_news_tools,
    }

    # If no specific apps are specified, enable all
@@ -18,6 +18,7 @@ from .contacts import ContactsClient
 from .cookbook import CookbookClient
 from .deck import DeckClient
 from .groups import GroupsClient
+from .news import NewsClient
 from .notes import NotesClient
 from .sharing import SharingClient
 from .tables import TablesClient
@@ -81,6 +82,7 @@ class NextcloudClient:
        self.contacts = ContactsClient(self._client, username)
        self.cookbook = CookbookClient(self._client, username)
        self.deck = DeckClient(self._client, username)
+        self.news = NewsClient(self._client, username)
        self.users = UsersClient(self._client, username)
        self.groups = GroupsClient(self._client, username)
        self.sharing = SharingClient(self._client, username)
@@ -0,0 +1,394 @@
+"""Client for Nextcloud News app operations."""
+
+import logging
+from enum import IntEnum
+from typing import Any
+
+from .base import BaseNextcloudClient
+
+logger = logging.getLogger(__name__)
+
+
+class NewsItemType(IntEnum):
+    """Type constants for News API item queries."""
+
+    FEED = 0  # Single feed
+    FOLDER = 1  # Folder and its feeds
+    STARRED = 2  # All starred items
+    ALL = 3  # All items
+
+
+class NewsClient(BaseNextcloudClient):
+    """Client for Nextcloud News app operations."""
+
+    app_name = "news"
+    API_BASE = "/apps/news/api/v1-3"
+
+    # --- Folders ---
+
+    async def get_folders(self) -> list[dict[str, Any]]:
+        """Get all folders."""
+        response = await self._make_request("GET", f"{self.API_BASE}/folders")
+        return response.json().get("folders", [])
+
+    async def create_folder(self, name: str) -> dict[str, Any]:
+        """Create a new folder.
+
+        Args:
+            name: Folder name
+
+        Returns:
+            Created folder data
+
+        Raises:
+            HTTPStatusError: 409 if folder name already exists,
+                            422 if name is empty
+        """
+        response = await self._make_request(
+            "POST", f"{self.API_BASE}/folders", json={"name": name}
+        )
+        folders = response.json().get("folders", [])
+        return folders[0] if folders else {}
+
+    async def rename_folder(self, folder_id: int, name: str) -> None:
+        """Rename a folder.
+
+        Args:
+            folder_id: Folder ID
+            name: New folder name
+
+        Raises:
+            HTTPStatusError: 404 if folder not found, 409 if name exists
+        """
+        await self._make_request(
+            "PUT", f"{self.API_BASE}/folders/{folder_id}", json={"name": name}
+        )
+
+    async def delete_folder(self, folder_id: int) -> None:
+        """Delete a folder and all its feeds/items.
+
+        Args:
+            folder_id: Folder ID
+
+        Raises:
+            HTTPStatusError: 404 if folder not found
+        """
+        await self._make_request("DELETE", f"{self.API_BASE}/folders/{folder_id}")
+
+    async def mark_folder_read(self, folder_id: int, newest_item_id: int) -> None:
+        """Mark all items in a folder as read.
+
+        Args:
+            folder_id: Folder ID
+            newest_item_id: ID of newest item to mark read (prevents marking
+                           items user hasn't seen yet)
+
+        Raises:
+            HTTPStatusError: 404 if folder not found
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/folders/{folder_id}/read",
+            json={"newestItemId": newest_item_id},
+        )
+
+    # --- Feeds ---
+
+    async def get_feeds(self) -> dict[str, Any]:
+        """Get all feeds with metadata.
+
+        Returns:
+            Dict with keys:
+                - feeds: List of feed objects
+                - starredCount: Number of starred items
+                - newestItemId: ID of newest item (omitted if no items)
+        """
+        response = await self._make_request("GET", f"{self.API_BASE}/feeds")
+        return response.json()
+
+    async def create_feed(
+        self, url: str, folder_id: int | None = None
+    ) -> dict[str, Any]:
+        """Subscribe to a new feed.
+
+        Args:
+            url: Feed URL
+            folder_id: Optional folder ID (None for root)
+
+        Returns:
+            Created feed data
+
+        Raises:
+            HTTPStatusError: 409 if feed already exists, 422 if URL is invalid
+        """
+        body: dict[str, Any] = {"url": url}
+        if folder_id is not None:
+            body["folderId"] = folder_id
+        response = await self._make_request("POST", f"{self.API_BASE}/feeds", json=body)
+        data = response.json()
+        feeds = data.get("feeds", [])
+        return feeds[0] if feeds else {}
+
+    async def delete_feed(self, feed_id: int) -> None:
+        """Unsubscribe from a feed (deletes all items).
+
+        Args:
+            feed_id: Feed ID
+
+        Raises:
+            HTTPStatusError: 404 if feed not found
+        """
+        await self._make_request("DELETE", f"{self.API_BASE}/feeds/{feed_id}")
+
+    async def move_feed(self, feed_id: int, folder_id: int | None) -> None:
+        """Move a feed to a different folder.
+
+        Args:
+            feed_id: Feed ID
+            folder_id: Target folder ID (None for root)
+
+        Raises:
+            HTTPStatusError: 404 if feed not found
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/feeds/{feed_id}/move",
+            json={"folderId": folder_id},
+        )
+
+    async def rename_feed(self, feed_id: int, title: str) -> None:
+        """Rename a feed.
+
+        Args:
+            feed_id: Feed ID
+            title: New feed title
+
+        Raises:
+            HTTPStatusError: 404 if feed not found
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/feeds/{feed_id}/rename",
+            json={"feedTitle": title},
+        )
+
+    async def mark_feed_read(self, feed_id: int, newest_item_id: int) -> None:
+        """Mark all items in a feed as read.
+
+        Args:
+            feed_id: Feed ID
+            newest_item_id: ID of newest item to mark read
+
+        Raises:
+            HTTPStatusError: 404 if feed not found
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/feeds/{feed_id}/read",
+            json={"newestItemId": newest_item_id},
+        )
+
+    # --- Items ---
+
+    async def get_items(
+        self,
+        batch_size: int = 50,
+        offset: int = 0,
+        type_: int = NewsItemType.ALL,
+        id_: int = 0,
+        get_read: bool = True,
+        oldest_first: bool = False,
+    ) -> list[dict[str, Any]]:
+        """Get items (articles) with filtering.
+
+        Args:
+            batch_size: Number of items to return (-1 for all)
+            offset: Item ID to start after (for pagination)
+            type_: Item type filter (NewsItemType)
+            id_: Feed/folder ID (ignored for STARRED/ALL types)
+            get_read: Include read items
+            oldest_first: Sort oldest first instead of newest
+
+        Returns:
+            List of item objects
+        """
+        params: dict[str, Any] = {
+            "batchSize": batch_size,
+            "offset": offset,
+            "type": type_,
+            "id": id_,
+            "getRead": str(get_read).lower(),
+            "oldestFirst": str(oldest_first).lower(),
+        }
+        response = await self._make_request(
+            "GET", f"{self.API_BASE}/items", params=params
+        )
+        return response.json().get("items", [])
+
+    async def get_item(self, item_id: int) -> dict[str, Any]:
+        """Get a specific item by ID.
+
+        Note: The News API doesn't have a direct single-item endpoint,
+        so we fetch all items and filter. For efficiency, consider
+        caching or using get_items with specific feed if known.
+
+        Args:
+            item_id: Item ID
+
+        Returns:
+            Item data
+
+        Raises:
+            ValueError: If item not found
+        """
+        # Fetch all items and find the one we need
+        # This is inefficient but the API doesn't provide a direct endpoint
+        items = await self.get_items(batch_size=-1, get_read=True)
+        for item in items:
+            if item.get("id") == item_id:
+                return item
+        raise ValueError(f"Item {item_id} not found")
+
+    async def get_updated_items(
+        self,
+        last_modified: int,
+        type_: int = NewsItemType.ALL,
+        id_: int = 0,
+    ) -> list[dict[str, Any]]:
+        """Get items modified since a timestamp (for delta sync).
+
+        Args:
+            last_modified: Unix timestamp (seconds or microseconds)
+            type_: Item type filter
+            id_: Feed/folder ID
+
+        Returns:
+            List of modified items (includes deleted items)
+        """
+        params: dict[str, Any] = {
+            "lastModified": last_modified,
+            "type": type_,
+            "id": id_,
+        }
+        response = await self._make_request(
+            "GET", f"{self.API_BASE}/items/updated", params=params
+        )
+        return response.json().get("items", [])
+
+    async def mark_item_read(self, item_id: int) -> None:
+        """Mark a single item as read.
+
+        Args:
+            item_id: Item ID
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/read")
+
+    async def mark_item_unread(self, item_id: int) -> None:
+        """Mark a single item as unread.
+
+        Args:
+            item_id: Item ID
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unread")
+
+    async def star_item(self, item_id: int) -> None:
+        """Star (favorite) a single item.
+
+        Args:
+            item_id: Item ID
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/star")
+
+    async def unstar_item(self, item_id: int) -> None:
+        """Unstar a single item.
+
+        Args:
+            item_id: Item ID
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unstar")
+
+    async def mark_items_read(self, item_ids: list[int]) -> None:
+        """Mark multiple items as read.
+
+        Args:
+            item_ids: List of item IDs
+        """
+        await self._make_request(
+            "POST", f"{self.API_BASE}/items/read/multiple", json={"itemIds": item_ids}
+        )
+
+    async def mark_items_unread(self, item_ids: list[int]) -> None:
+        """Mark multiple items as unread.
+
+        Args:
+            item_ids: List of item IDs
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/items/unread/multiple",
+            json={"itemIds": item_ids},
+        )
+
+    async def star_items(self, item_ids: list[int]) -> None:
+        """Star multiple items.
+
+        Args:
+            item_ids: List of item IDs
+        """
+        await self._make_request(
+            "POST", f"{self.API_BASE}/items/star/multiple", json={"itemIds": item_ids}
+        )
+
+    async def unstar_items(self, item_ids: list[int]) -> None:
+        """Unstar multiple items.
+
+        Args:
+            item_ids: List of item IDs
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/items/unstar/multiple",
+            json={"itemIds": item_ids},
+        )
+
+    async def mark_all_read(self, newest_item_id: int) -> None:
+        """Mark all items as read.
+
+        Args:
+            newest_item_id: ID of newest item to mark read
+        """
+        await self._make_request(
+            "POST", f"{self.API_BASE}/items/read", json={"newestItemId": newest_item_id}
+        )
+
+    # --- Status ---
+
+    async def get_status(self) -> dict[str, Any]:
+        """Get News app status and configuration.
+
+        Returns:
+            Dict with version and warnings
+        """
+        response = await self._make_request("GET", f"{self.API_BASE}/status")
+        return response.json()
+
+    async def get_version(self) -> str:
+        """Get News app version.
+
+        Returns:
+            Version string (e.g., "25.0.0")
+        """
+        response = await self._make_request("GET", f"{self.API_BASE}/version")
+        return response.json().get("version", "")
@@ -0,0 +1,170 @@
+"""Pydantic models for Nextcloud News app responses."""
+
+from typing import List
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from .base import BaseResponse
+
+
+class NewsFolder(BaseModel):
+    """Model for a News folder."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: int = Field(description="Folder ID")
+    name: str = Field(description="Folder name")
+
+
+class NewsFeed(BaseModel):
+    """Model for a News feed (RSS/Atom subscription)."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: int = Field(description="Feed ID")
+    url: str = Field(description="Feed URL")
+    title: str = Field(description="Feed title")
+    favicon_link: str | None = Field(
+        None, alias="faviconLink", description="Favicon URL"
+    )
+    link: str | None = Field(None, description="Website link")
+    added: int = Field(description="Unix timestamp when feed was added")
+    folder_id: int | None = Field(
+        None, alias="folderId", description="Parent folder ID"
+    )
+    unread_count: int = Field(
+        0, alias="unreadCount", description="Number of unread items"
+    )
+    ordering: int = Field(
+        0, description="Feed ordering (0=default, 1=oldest, 2=newest)"
+    )
+    pinned: bool = Field(False, description="Whether feed is pinned to top")
+    update_error_count: int = Field(
+        0, alias="updateErrorCount", description="Consecutive update failures"
+    )
+    last_update_error: str | None = Field(
+        None, alias="lastUpdateError", description="Last update error message"
+    )
+
+    @property
+    def has_errors(self) -> bool:
+        """Check if feed has update errors."""
+        return self.update_error_count > 0
+
+
+class NewsItem(BaseModel):
+    """Model for a News item (article) with full content."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: int = Field(description="Item ID")
+    guid: str = Field(description="Globally unique identifier")
+    guid_hash: str = Field(alias="guidHash", description="MD5 hash of GUID")
+    url: str | None = Field(None, description="Article URL")
+    title: str = Field(description="Article title")
+    author: str | None = Field(None, description="Article author")
+    pub_date: int | None = Field(
+        None, alias="pubDate", description="Publication timestamp"
+    )
+    body: str | None = Field(None, description="Article content (HTML)")
+    enclosure_mime: str | None = Field(
+        None, alias="enclosureMime", description="Enclosure MIME type"
+    )
+    enclosure_link: str | None = Field(
+        None, alias="enclosureLink", description="Enclosure URL"
+    )
+    media_thumbnail: str | None = Field(
+        None, alias="mediaThumbnail", description="Media thumbnail URL"
+    )
+    media_description: str | None = Field(
+        None, alias="mediaDescription", description="Media description"
+    )
+    feed_id: int = Field(alias="feedId", description="Parent feed ID")
+    unread: bool = Field(True, description="Whether item is unread")
+    starred: bool = Field(False, description="Whether item is starred")
+    rtl: bool = Field(False, description="Right-to-left text")
+    last_modified: int = Field(
+        alias="lastModified", description="Last modification timestamp"
+    )
+    fingerprint: str | None = Field(
+        None, description="Content fingerprint for deduplication"
+    )
+    content_hash: str | None = Field(
+        None, alias="contentHash", description="Content hash"
+    )
+
+
+class NewsItemSummary(BaseModel):
+    """Lightweight model for News item list responses."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: int = Field(description="Item ID")
+    title: str = Field(description="Article title")
+    feed_id: int = Field(alias="feedId", description="Parent feed ID")
+    unread: bool = Field(True, description="Whether item is unread")
+    starred: bool = Field(False, description="Whether item is starred")
+    pub_date: int | None = Field(
+        None, alias="pubDate", description="Publication timestamp"
+    )
+    url: str | None = Field(None, description="Article URL")
+    author: str | None = Field(None, description="Article author")
+
+
+class NewsStatus(BaseModel):
+    """Model for News app status."""
+
+    version: str = Field(description="News app version")
+    warnings: dict = Field(default_factory=dict, description="Configuration warnings")
+
+
+# --- Response Models ---
+
+
+class ListFoldersResponse(BaseResponse):
+    """Response model for listing folders."""
+
+    results: List[NewsFolder] = Field(description="List of folders")
+    total_count: int = Field(description="Total number of folders")
+
+
+class ListFeedsResponse(BaseResponse):
+    """Response model for listing feeds."""
+
+    results: List[NewsFeed] = Field(description="List of feeds")
+    starred_count: int = Field(0, description="Number of starred items")
+    newest_item_id: int | None = Field(None, description="ID of newest item")
+    total_count: int = Field(description="Total number of feeds")
+
+
+class ListItemsResponse(BaseResponse):
+    """Response model for listing items."""
+
+    results: List[NewsItemSummary] = Field(description="List of items")
+    total_count: int = Field(description="Number of items returned")
+    has_more: bool = Field(False, description="Whether more items exist")
+    oldest_id: int | None = Field(None, description="Oldest item ID (for pagination)")
+
+
+class GetItemResponse(BaseResponse):
+    """Response model for getting a single item."""
+
+    item: NewsItem = Field(description="Full item details")
+
+
+class FeedHealthResponse(BaseResponse):
+    """Response model for feed health status."""
+
+    feed_id: int = Field(description="Feed ID")
+    title: str = Field(description="Feed title")
+    url: str = Field(description="Feed URL")
+    has_errors: bool = Field(description="Whether feed has update errors")
+    error_count: int = Field(description="Number of consecutive errors")
+    last_error: str | None = Field(None, description="Last error message")
+
+
+class GetStatusResponse(BaseResponse):
+    """Response model for app status."""
+
+    version: str = Field(description="News app version")
+    warnings: dict = Field(default_factory=dict, description="Configuration warnings")
@@ -2,6 +2,7 @@ from .calendar import configure_calendar_tools
 from .contacts import configure_contacts_tools
 from .cookbook import configure_cookbook_tools
 from .deck import configure_deck_tools
+from .news import configure_news_tools
 from .notes import configure_notes_tools
 from .semantic import configure_semantic_tools
 from .sharing import configure_sharing_tools
@@ -13,6 +14,7 @@ __all__ = [
    "configure_contacts_tools",
    "configure_cookbook_tools",
    "configure_deck_tools",
+    "configure_news_tools",
    "configure_notes_tools",
    "configure_semantic_tools",
    "configure_sharing_tools",
@@ -0,0 +1,360 @@
+"""MCP tools for Nextcloud News app."""
+
+import logging
+
+from httpx import HTTPStatusError, RequestError
+from mcp.server.fastmcp import Context, FastMCP
+from mcp.shared.exceptions import McpError
+from mcp.types import ErrorData
+
+from nextcloud_mcp_server.auth import require_scopes
+from nextcloud_mcp_server.client.news import NewsItemType
+from nextcloud_mcp_server.context import get_client
+from nextcloud_mcp_server.models.news import (
+    FeedHealthResponse,
+    GetItemResponse,
+    GetStatusResponse,
+    ListFeedsResponse,
+    ListFoldersResponse,
+    ListItemsResponse,
+    NewsFeed,
+    NewsFolder,
+    NewsItem,
+    NewsItemSummary,
+)
+from nextcloud_mcp_server.observability.metrics import instrument_tool
+
+logger = logging.getLogger(__name__)
+
+
+def configure_news_tools(mcp: FastMCP):
+    """Configure News app MCP tools."""
+
+    @mcp.tool()
+    @require_scopes("news:read")
+    @instrument_tool
+    async def nc_news_list_folders(ctx: Context) -> ListFoldersResponse:
+        """List all News folders (requires news:read scope)."""
+        client = await get_client(ctx)
+        try:
+            folders_data = await client.news.get_folders()
+            folders = [NewsFolder(**f) for f in folders_data]
+            return ListFoldersResponse(results=folders, total_count=len(folders))
+        except RequestError as e:
+            raise McpError(
+                ErrorData(code=-1, message=f"Network error listing folders: {str(e)}")
+            )
+        except HTTPStatusError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Failed to list folders: {e.response.status_code}",
+                )
+            )
+
+    @mcp.tool()
+    @require_scopes("news:read")
+    @instrument_tool
+    async def nc_news_list_feeds(ctx: Context) -> ListFeedsResponse:
+        """List all News feeds with metadata (requires news:read scope).
+
+        Returns feeds with unread counts, error status, and overall starred count.
+        """
+        client = await get_client(ctx)
+        try:
+            data = await client.news.get_feeds()
+            feeds = [NewsFeed(**f) for f in data.get("feeds", [])]
+            return ListFeedsResponse(
+                results=feeds,
+                starred_count=data.get("starredCount", 0),
+                newest_item_id=data.get("newestItemId"),
+                total_count=len(feeds),
+            )
+        except RequestError as e:
+            raise McpError(
+                ErrorData(code=-1, message=f"Network error listing feeds: {str(e)}")
+            )
+        except HTTPStatusError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Failed to list feeds: {e.response.status_code}",
+                )
+            )
+
+    @mcp.tool()
+    @require_scopes("news:read")
+    @instrument_tool
+    async def nc_news_list_items(
+        ctx: Context,
+        feed_id: int | None = None,
+        folder_id: int | None = None,
+        starred_only: bool = False,
+        unread_only: bool = False,
+        limit: int = 50,
+        offset: int = 0,
+    ) -> ListItemsResponse:
+        """List News items (articles) with optional filtering (requires news:read scope).
+
+        Args:
+            feed_id: Filter by specific feed ID
+            folder_id: Filter by specific folder ID
+            starred_only: Return only starred items
+            unread_only: Return only unread items
+            limit: Maximum number of items to return (default 50, -1 for all)
+            offset: Item ID to start after (for pagination)
+
+        Returns:
+            ListItemsResponse with items, count, and pagination info
+        """
+        client = await get_client(ctx)
+
+        # Determine item type filter
+        type_ = NewsItemType.ALL
+        id_ = 0
+        if starred_only:
+            type_ = NewsItemType.STARRED
+        elif feed_id is not None:
+            type_ = NewsItemType.FEED
+            id_ = feed_id
+        elif folder_id is not None:
+            type_ = NewsItemType.FOLDER
+            id_ = folder_id
+
+        try:
+            items_data = await client.news.get_items(
+                batch_size=limit,
+                offset=offset,
+                type_=type_,
+                id_=id_,
+                get_read=not unread_only,
+            )
+            items = [NewsItemSummary(**i) for i in items_data]
+
+            # Determine pagination info
+            oldest_id = min((i.id for i in items), default=None) if items else None
+            has_more = len(items) == limit and limit > 0
+
+            return ListItemsResponse(
+                results=items,
+                total_count=len(items),
+                has_more=has_more,
+                oldest_id=oldest_id,
+            )
+        except RequestError as e:
+            raise McpError(
+                ErrorData(code=-1, message=f"Network error listing items: {str(e)}")
+            )
+        except HTTPStatusError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Failed to list items: {e.response.status_code}",
+                )
+            )
+
+    @mcp.tool()
+    @require_scopes("news:read")
+    @instrument_tool
+    async def nc_news_get_item(item_id: int, ctx: Context) -> GetItemResponse:
+        """Get a specific News item by ID with full content (requires news:read scope).
+
+        Args:
+            item_id: Item ID
+
+        Returns:
+            GetItemResponse with full item details including HTML body
+        """
+        client = await get_client(ctx)
+        try:
+            item_data = await client.news.get_item(item_id)
+            item = NewsItem(**item_data)
+            return GetItemResponse(item=item)
+        except ValueError as e:
+            raise McpError(ErrorData(code=-1, message=str(e)))
+        except RequestError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1, message=f"Network error getting item {item_id}: {str(e)}"
+                )
+            )
+        except HTTPStatusError as e:
+            if e.response.status_code == 404:
+                raise McpError(ErrorData(code=-1, message=f"Item {item_id} not found"))
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Failed to get item {item_id}: {e.response.status_code}",
+                )
+            )
+
+    @mcp.tool()
+    @require_scopes("news:read")
+    @instrument_tool
+    async def nc_news_get_starred_items(
+        ctx: Context, limit: int = 50, offset: int = 0
+    ) -> ListItemsResponse:
+        """Get starred (favorited) News items (requires news:read scope).
+
+        Convenience method for retrieving user's starred articles.
+
+        Args:
+            limit: Maximum number of items to return (default 50, -1 for all)
+            offset: Item ID to start after (for pagination)
+
+        Returns:
+            ListItemsResponse with starred items
+        """
+        client = await get_client(ctx)
+        try:
+            items_data = await client.news.get_items(
+                batch_size=limit,
+                offset=offset,
+                type_=NewsItemType.STARRED,
+                get_read=True,  # Include read starred items
+            )
+            items = [NewsItemSummary(**i) for i in items_data]
+
+            oldest_id = min((i.id for i in items), default=None) if items else None
+            has_more = len(items) == limit and limit > 0
+
+            return ListItemsResponse(
+                results=items,
+                total_count=len(items),
+                has_more=has_more,
+                oldest_id=oldest_id,
+            )
+        except RequestError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1, message=f"Network error getting starred items: {str(e)}"
+                )
+            )
+        except HTTPStatusError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Failed to get starred items: {e.response.status_code}",
+                )
+            )
+
+    @mcp.tool()
+    @require_scopes("news:read")
+    @instrument_tool
+    async def nc_news_get_unread_items(
+        ctx: Context, limit: int = 50, offset: int = 0
+    ) -> ListItemsResponse:
+        """Get unread News items (requires news:read scope).
+
+        Convenience method for retrieving unread articles across all feeds.
+
+        Args:
+            limit: Maximum number of items to return (default 50, -1 for all)
+            offset: Item ID to start after (for pagination)
+
+        Returns:
+            ListItemsResponse with unread items
+        """
+        client = await get_client(ctx)
+        try:
+            items_data = await client.news.get_items(
+                batch_size=limit,
+                offset=offset,
+                type_=NewsItemType.ALL,
+                get_read=False,  # Only unread items
+            )
+            items = [NewsItemSummary(**i) for i in items_data]
+
+            oldest_id = min((i.id for i in items), default=None) if items else None
+            has_more = len(items) == limit and limit > 0
+
+            return ListItemsResponse(
+                results=items,
+                total_count=len(items),
+                has_more=has_more,
+                oldest_id=oldest_id,
+            )
+        except RequestError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1, message=f"Network error getting unread items: {str(e)}"
+                )
+            )
+        except HTTPStatusError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Failed to get unread items: {e.response.status_code}",
+                )
+            )
+
+    @mcp.tool()
+    @require_scopes("news:read")
+    @instrument_tool
+    async def nc_news_get_feed_health(feed_id: int, ctx: Context) -> FeedHealthResponse:
+        """Get health status for a specific feed (requires news:read scope).
+
+        Returns error count and last error message if the feed has update issues.
+
+        Args:
+            feed_id: Feed ID to check
+
+        Returns:
+            FeedHealthResponse with error status
+        """
+        client = await get_client(ctx)
+        try:
+            data = await client.news.get_feeds()
+            for feed_data in data.get("feeds", []):
+                if feed_data.get("id") == feed_id:
+                    feed = NewsFeed(**feed_data)
+                    return FeedHealthResponse(
+                        feed_id=feed.id,
+                        title=feed.title,
+                        url=feed.url,
+                        has_errors=feed.has_errors,
+                        error_count=feed.update_error_count,
+                        last_error=feed.last_update_error,
+                    )
+            raise McpError(ErrorData(code=-1, message=f"Feed {feed_id} not found"))
+        except RequestError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Network error getting feed health: {str(e)}",
+                )
+            )
+        except HTTPStatusError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Failed to get feed health: {e.response.status_code}",
+                )
+            )
+
+    @mcp.tool()
+    @require_scopes("news:read")
+    @instrument_tool
+    async def nc_news_get_status(ctx: Context) -> GetStatusResponse:
+        """Get News app status and version (requires news:read scope).
+
+        Returns version information and any configuration warnings.
+        """
+        client = await get_client(ctx)
+        try:
+            status_data = await client.news.get_status()
+            return GetStatusResponse(
+                version=status_data.get("version", "unknown"),
+                warnings=status_data.get("warnings", {}),
+            )
+        except RequestError as e:
+            raise McpError(
+                ErrorData(code=-1, message=f"Network error getting status: {str(e)}")
+            )
+        except HTTPStatusError as e:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message=f"Failed to get status: {e.response.status_code}",
+                )
+            )
@@ -0,0 +1,49 @@
+"""HTML to Markdown conversion utilities for vector sync."""
+
+import logging
+
+from markdownify import markdownify as md
+
+logger = logging.getLogger(__name__)
+
+
+def html_to_markdown(html_content: str | None) -> str:
+    """Convert HTML content to Markdown, preserving semantic structure.
+
+    This function converts HTML (typically from RSS/Atom feed items) to Markdown
+    for better text embedding. Markdown preserves:
+    - Heading hierarchy (important for document structure)
+    - Lists (bullet and numbered)
+    - Links (as [text](url))
+    - Bold/italic emphasis
+    - Paragraphs and line breaks
+
+    Args:
+        html_content: HTML string to convert (may be None or empty)
+
+    Returns:
+        Markdown string, or empty string if input is None/empty
+
+    Example:
+        >>> html_to_markdown("<h1>Title</h1><p>Content with <b>bold</b>.</p>")
+        '# Title\\n\\nContent with **bold**.\\n\\n'
+    """
+    if not html_content:
+        return ""
+
+    try:
+        markdown = md(
+            html_content,
+            heading_style="ATX",  # Use # style headings
+            strip=["script", "style", "iframe", "noscript"],  # Remove unsafe elements
+            bullets="-",  # Use - for unordered lists
+            code_language="",  # Don't add language hints to code blocks
+        )
+        return markdown.strip()
+    except Exception as e:
+        logger.warning(f"Failed to convert HTML to Markdown: {e}")
+        # Fallback: strip all HTML tags as a last resort
+        import re
+
+        text = re.sub(r"<[^>]+>", " ", html_content)
+        return " ".join(text.split())  # Normalize whitespace
@@ -272,6 +272,45 @@ async def _index_document(
            file_path = None  # Notes don't have file paths
            content_bytes = None  # Notes don't have binary content
            content_type = None
+        elif doc_task.doc_type == "news_item":
+            from nextcloud_mcp_server.vector.html_processor import html_to_markdown
+
+            item = await nc_client.news.get_item(int(doc_task.doc_id))
+            # Convert HTML body to Markdown for better embedding
+            body_markdown = html_to_markdown(item.get("body", ""))
+            # Build content: title + URL + body
+            item_title = item.get("title", "")
+            item_url = item.get("url", "")
+            feed_title = item.get("feedTitle", "")
+
+            # Structure content for embedding
+            content_parts = [item_title]
+            if feed_title:
+                content_parts.append(f"Source: {feed_title}")
+            if item_url:
+                content_parts.append(f"URL: {item_url}")
+            content_parts.append("")  # Blank line
+            content_parts.append(body_markdown)
+            content = "\n".join(content_parts)
+
+            title = item_title
+            etag = item.get("guidHash", "")
+            # Store news-specific metadata for later use in payload
+            file_metadata = {
+                "feed_id": item.get("feedId"),
+                "feed_title": feed_title,
+                "author": item.get("author"),
+                "pub_date": item.get("pubDate"),
+                "starred": item.get("starred", False),
+                "unread": item.get("unread", True),
+                "url": item_url,
+                "guid_hash": item.get("guidHash"),
+                "enclosure_link": item.get("enclosureLink"),
+                "enclosure_mime": item.get("enclosureMime"),
+            }
+            file_path = None
+            content_bytes = None
+            content_type = None
        elif doc_task.doc_type == "file":
            # For files, doc_id is now the numeric file ID, file_path comes from DocumentTask
            if not doc_task.file_path:
@@ -358,15 +397,16 @@ async def _index_document(
        chunks = await chunker.chunk_text(content)

    # Assign page numbers to chunks if page boundaries are available (PDFs)
-    if doc_task.doc_type == "file" and "page_boundaries" in file_metadata:
+    page_boundaries = file_metadata.get("page_boundaries")
+    if doc_task.doc_type == "file" and page_boundaries is not None:
        with trace_operation(
            "vector_sync.assign_page_numbers",
            attributes={
                "vector_sync.chunk_count": len(chunks),
-                "vector_sync.page_count": len(file_metadata["page_boundaries"]),
+                "vector_sync.page_count": len(page_boundaries),
            },
        ):
-            assign_page_numbers(chunks, file_metadata["page_boundaries"])
+            assign_page_numbers(chunks, page_boundaries)

            # Diagnostic: Verify page number assignment
            assigned_count = sum(1 for c in chunks if c.page_number is not None)
@@ -389,8 +429,8 @@ async def _index_document(
                    f"Text length: {len(content)}, "
                    f"Chunks: {len(chunks)}, "
                    f"Chunk offset range: [{chunks[0].start_offset}:{chunks[-1].end_offset}], "
-                    f"Page boundaries: {len(file_metadata['page_boundaries'])} pages, "
-                    f"First boundary: {file_metadata['page_boundaries'][0] if file_metadata['page_boundaries'] else 'None'}"
+                    f"Page boundaries: {len(page_boundaries)} pages, "
+                    f"First boundary: {page_boundaries[0] if page_boundaries else 'None'}"
                )

    # Extract chunk texts for embedding
@@ -566,6 +606,23 @@ async def _index_document(
                        if doc_task.doc_type == "file"
                        else {}
                    ),
+                    # News item-specific metadata
+                    **(
+                        {
+                            "feed_id": file_metadata.get("feed_id"),
+                            "feed_title": file_metadata.get("feed_title"),
+                            "author": file_metadata.get("author"),
+                            "pub_date": file_metadata.get("pub_date"),
+                            "starred": file_metadata.get("starred"),
+                            "unread": file_metadata.get("unread"),
+                            "url": file_metadata.get("url"),
+                            "guid_hash": file_metadata.get("guid_hash"),
+                            "enclosure_link": file_metadata.get("enclosure_link"),
+                            "enclosure_mime": file_metadata.get("enclosure_mime"),
+                        }
+                        if doc_task.doc_type == "news_item"
+                        else {}
+                    ),
                    # Highlighted page image (PDF only)
                    **(
                        {
@@ -544,9 +544,217 @@ async def scan_user_documents(

        queued += file_queued

+        # Scan News items (starred + unread)
+        news_queued = 0
+        try:
+            news_queued = await scan_news_items(
+                user_id=user_id,
+                send_stream=send_stream,
+                nc_client=nc_client,
+                initial_sync=initial_sync,
+                scan_id=scan_id,
+            )
+            queued += news_queued
+        except Exception as e:
+            logger.warning(f"Failed to scan news items for {user_id}: {e}")
+
        if queued > 0:
            logger.info(
-                f"Sent {queued} documents ({file_queued} files) for incremental sync: {user_id}"
+                f"Sent {queued} documents ({file_queued} files, {news_queued} news items) for incremental sync: {user_id}"
            )
        else:
            logger.debug(f"No changes detected for {user_id}")
+
+
+async def scan_news_items(
+    user_id: str,
+    send_stream: MemoryObjectSendStream[DocumentTask],
+    nc_client: NextcloudClient,
+    initial_sync: bool,
+    scan_id: int,
+) -> int:
+    """
+    Scan user's News items (starred + unread) and queue changed items.
+
+    Indexes starred and unread items for semantic search. This provides
+    a balanced approach - important items (starred) and current items
+    (unread) are searchable, while avoiding indexing the entire history.
+
+    Args:
+        user_id: User to scan
+        send_stream: Stream to send changed documents to processors
+        nc_client: Authenticated Nextcloud client
+        initial_sync: If True, send all documents (first-time sync)
+        scan_id: Scan identifier for logging
+
+    Returns:
+        Number of items queued for processing
+    """
+    from nextcloud_mcp_server.client.news import NewsItemType
+
+    settings = get_settings()
+    queued = 0
+
+    # Get indexed news item IDs from Qdrant (for deletion tracking)
+    indexed_item_ids: set[str] = set()
+    if not initial_sync:
+        qdrant_client = await get_qdrant_client()
+        scroll_result = await qdrant_client.scroll(
+            collection_name=settings.get_collection_name(),
+            scroll_filter=Filter(
+                must=[
+                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                    FieldCondition(key="doc_type", match=MatchValue(value="news_item")),
+                ]
+            ),
+            with_payload=["doc_id"],
+            with_vectors=False,
+            limit=10000,
+        )
+        indexed_item_ids = {point.payload["doc_id"] for point in scroll_result[0]}
+        logger.debug(f"Found {len(indexed_item_ids)} indexed news items in Qdrant")
+
+    # Fetch starred items (type=STARRED)
+    starred_items = await nc_client.news.get_items(
+        batch_size=-1,  # Get all
+        type_=NewsItemType.STARRED,
+        get_read=True,  # Include read starred items
+    )
+    logger.debug(f"[SCAN-{scan_id}] Found {len(starred_items)} starred news items")
+
+    # Fetch unread items (type=ALL, get_read=False)
+    unread_items = await nc_client.news.get_items(
+        batch_size=-1,
+        type_=NewsItemType.ALL,
+        get_read=False,  # Only unread
+    )
+    logger.debug(f"[SCAN-{scan_id}] Found {len(unread_items)} unread news items")
+
+    # Combine and deduplicate (an item can be both starred and unread)
+    items_by_id: dict[int, dict] = {}
+    for item in starred_items:
+        items_by_id[item["id"]] = item
+    for item in unread_items:
+        items_by_id[item["id"]] = item
+
+    item_count = len(items_by_id)
+    nextcloud_item_ids: set[str] = set()
+
+    for item_id, item in items_by_id.items():
+        doc_id = str(item_id)
+        nextcloud_item_ids.add(doc_id)
+
+        # Use lastModified timestamp (microseconds in News API)
+        modified_at = item.get("lastModified", 0)
+        # Convert to seconds if needed (News API uses microseconds)
+        if modified_at > 10000000000:  # > year 2286 in seconds
+            modified_at = modified_at // 1000000
+
+        if initial_sync:
+            # Send everything on first sync - write placeholder first
+            await write_placeholder_point(
+                doc_id=doc_id,
+                doc_type="news_item",
+                user_id=user_id,
+                modified_at=modified_at,
+            )
+            await send_stream.send(
+                DocumentTask(
+                    user_id=user_id,
+                    doc_id=doc_id,
+                    doc_type="news_item",
+                    operation="index",
+                    modified_at=modified_at,
+                )
+            )
+            queued += 1
+        else:
+            # Incremental sync: check if item exists and compare modified_at
+            doc_key = (user_id, doc_id)
+            if doc_key in _potentially_deleted:
+                logger.debug(
+                    f"News item {doc_id} reappeared, removing from deletion grace period"
+                )
+                del _potentially_deleted[doc_key]
+
+            # Query Qdrant for existing entry
+            existing_metadata = await query_document_metadata(
+                doc_id=doc_id, doc_type="news_item", user_id=user_id
+            )
+
+            needs_indexing = False
+            if existing_metadata is None:
+                needs_indexing = True
+            elif existing_metadata.get("modified_at", 0) < modified_at:
+                needs_indexing = True
+            elif existing_metadata.get("is_placeholder", False):
+                queued_at = existing_metadata.get("queued_at", 0)
+                placeholder_age = time.time() - queued_at
+                stale_threshold = settings.vector_sync_scan_interval * 5
+                if placeholder_age > stale_threshold:
+                    logger.debug(
+                        f"Found stale placeholder for news item {doc_id} "
+                        f"(age={placeholder_age:.1f}s), requeuing"
+                    )
+                    needs_indexing = True
+
+            if needs_indexing:
+                await write_placeholder_point(
+                    doc_id=doc_id,
+                    doc_type="news_item",
+                    user_id=user_id,
+                    modified_at=modified_at,
+                )
+                await send_stream.send(
+                    DocumentTask(
+                        user_id=user_id,
+                        doc_id=doc_id,
+                        doc_type="news_item",
+                        operation="index",
+                        modified_at=modified_at,
+                    )
+                )
+                queued += 1
+
+    logger.info(
+        f"[SCAN-{scan_id}] Found {item_count} news items (starred+unread) for {user_id}"
+    )
+    record_vector_sync_scan(item_count)
+
+    # Check for deleted items (not initial sync)
+    # Items become "deleted" when they are no longer starred AND become read
+    if not initial_sync:
+        grace_period = settings.vector_sync_scan_interval * 1.5
+        current_time = time.time()
+
+        for doc_id in indexed_item_ids:
+            if doc_id not in nextcloud_item_ids:
+                doc_key = (user_id, doc_id)
+
+                if doc_key in _potentially_deleted:
+                    first_missing_time = _potentially_deleted[doc_key]
+                    time_missing = current_time - first_missing_time
+
+                    if time_missing >= grace_period:
+                        logger.info(
+                            f"News item {doc_id} missing for {time_missing:.1f}s "
+                            f"(>{grace_period:.1f}s grace period), sending deletion"
+                        )
+                        await send_stream.send(
+                            DocumentTask(
+                                user_id=user_id,
+                                doc_id=doc_id,
+                                doc_type="news_item",
+                                operation="delete",
+                                modified_at=0,
+                            )
+                        )
+                        queued += 1
+                        del _potentially_deleted[doc_key]
+                else:
+                    logger.debug(
+                        f"News item {doc_id} missing for first time, starting grace period"
+                    )
+                    _potentially_deleted[doc_key] = current_time
+
+    return queued