From c1763ebc6acbf407b595f0ec074086914d88896b Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Wed, 7 May 2025 23:06:22 +0200 Subject: [PATCH] ADR search and handling categories in notes --- docs/ADR-001-enhanced-note-search.md | 168 ++++++++++++++ nextcloud_mcp_server/client.py | 269 ++++++++++++++++++---- pyproject.toml | 4 +- tests/conftest.py | 5 +- tests/integration/test_attachments.py | 171 +++++++++++++- tests/integration/test_embedded_images.py | 52 ++++- tests/integration/test_webdav_cleanup.py | 161 +++++++++++++ 7 files changed, 776 insertions(+), 54 deletions(-) create mode 100644 docs/ADR-001-enhanced-note-search.md create mode 100644 tests/integration/test_webdav_cleanup.py diff --git a/docs/ADR-001-enhanced-note-search.md b/docs/ADR-001-enhanced-note-search.md new file mode 100644 index 0000000..2d793eb --- /dev/null +++ b/docs/ADR-001-enhanced-note-search.md @@ -0,0 +1,168 @@ +# ADR-001: Enhanced Note Search with Token-Based Relevance Ranking + +## Status +Proposed + +## Context +The current search implementation in the Nextcloud MCP server performs simple substring matching without relevance ranking. The existing method: +1. Fetches all notes +2. Performs case-insensitive substring matching on title and content +3. Returns matches without any ordering by relevance + +This approach has several limitations: +- Requires exact substring matches +- No ranking by relevance +- Only finds notes where the exact query string appears +- Cannot prioritize more important matches (e.g., title vs content) +- Inefficient for large note collections + +We need to improve the search functionality without adding external dependencies to enhance the user experience while maintaining simplicity. + +## Decision +We will implement a token-based search with relevance ranking that: +1. Splits queries and note content into individual tokens (words) +2. Matches based on tokens rather than complete substrings +3. Applies weighted scoring with title matches valued higher than content matches +4. Sorts results by relevance score +5. Maintains backward compatibility with the existing API + +## Implementation Details + +### 1. Query Processing +The search query will be tokenized (split into individual words), normalized (converted to lowercase), and filtered for stop words if necessary: + +```python +def process_query(query: str) -> list[str]: + # Convert to lowercase and split into tokens + tokens = query.lower().split() + # Filter out very short tokens (optional) + tokens = [token for token in tokens if len(token) > 1] + # Could add stop word removal here + return tokens +``` + +### 2. Note Content Processing +Each note's title and content will be processed in a similar way: + +```python +def process_note_content(note: dict) -> tuple[list[str], list[str]]: + # Process title + title = note.get("title", "").lower() + title_tokens = title.split() + + # Process content + content = note.get("content", "").lower() + content_tokens = content.split() + + return title_tokens, content_tokens +``` + +### 3. Scoring Algorithm +We'll implement a scoring function that: +- Assigns higher weight to title matches (e.g., 3x more important than content matches) +- Considers the percentage of query tokens that match +- Factors in the frequency of matches + +```python +def calculate_score(query_tokens: list[str], title_tokens: list[str], content_tokens: list[str]) -> float: + # Constants for weighting + TITLE_WEIGHT = 3.0 + CONTENT_WEIGHT = 1.0 + + score = 0.0 + + # Count matches in title + title_matches = sum(1 for qt in query_tokens if qt in title_tokens) + if query_tokens: # Avoid division by zero + title_match_ratio = title_matches / len(query_tokens) + score += TITLE_WEIGHT * title_match_ratio + + # Count matches in content + content_matches = sum(1 for qt in query_tokens if qt in content_tokens) + if query_tokens: # Avoid division by zero + content_match_ratio = content_matches / len(query_tokens) + score += CONTENT_WEIGHT * content_match_ratio + + # If no tokens matched at all, return zero + if title_matches == 0 and content_matches == 0: + return 0.0 + + return score +``` + +### 4. Enhanced Search Implementation + +```python +def notes_search_notes(self, *, query: str): + """ + Search notes using token-based matching with relevance ranking. + Returns notes sorted by relevance score. + """ + all_notes = self.notes_get_all() + search_results = [] + + # Process the query + query_tokens = process_query(query) + + # If empty query after processing, return empty results + if not query_tokens: + return [] + + # Process and score each note + for note in all_notes: + title_tokens, content_tokens = process_note_content(note) + score = calculate_score(query_tokens, title_tokens, content_tokens) + + # Only include notes with a non-zero score + if score > 0: + search_results.append({ + "id": note.get("id"), + "title": note.get("title"), + "category": note.get("category"), + "modified": note.get("modified"), + "_score": score # Include score for sorting (optional field) + }) + + # Sort by score in descending order + search_results.sort(key=lambda x: x["_score"], reverse=True) + + # Remove score field before returning (optional) + for result in search_results: + if "_score" in result: + del result["_score"] + + return search_results +``` + +### 5. Performance Considerations +- The enhanced search still retrieves all notes from the server, which could be inefficient for large collections +- Future improvements could include caching or building an in-memory index +- For very large note collections, consider adding pagination to the API + +## Consequences + +### Benefits +1. Better search results with matches on individual words instead of exact phrases +2. Relevant results appear first due to ranking +3. Title matches are prioritized, matching user expectations +4. No additional dependencies required +5. Maintains backward compatibility with existing API + +### Limitations +1. Slightly increased complexity in the search implementation +2. Still requires fetching all notes for each search operation +3. No handling of typos or similar words (would require fuzzy matching) +4. No stemming/lemmatization to match word variations + +### Future Potential Enhancements +1. Add support for phrase queries (exact matches) +2. Implement an in-memory index for faster repeated searches +3. Add basic natural language processing features (stemming, stop words) +4. Support for fuzzy matching to handle typos + +## Alternatives Considered +1. Implementing a full-text search engine (e.g., integrating with Elasticsearch) +2. Using vector-based semantic search with embeddings +3. Adding external NLP libraries for more sophisticated text processing + +These alternatives were not selected for the initial implementation due to the desire to maintain simplicity and avoid adding dependencies, but could be considered for future enhancements. diff --git a/nextcloud_mcp_server/client.py b/nextcloud_mcp_server/client.py index e62233e..303b8e3 100644 --- a/nextcloud_mcp_server/client.py +++ b/nextcloud_mcp_server/client.py @@ -109,7 +109,19 @@ class NextcloudClient: content: str | None = None, category: str | None = None, ): - # body = {"etag": etag} # Removed redundant line + # First, get the current note details to check for category change + old_note = None + try: + if category is not None: # Only fetch if category might change + old_note = self.notes_get_note(note_id=note_id) + old_category = old_note.get("category", "") + logger.info(f"Current category for note {note_id}: '{old_category}'") + except Exception as e: + logger.warning(f"Could not fetch current note {note_id} details before update: {e}") + # Continue with update even if we couldn't fetch current details + old_note = None + + # Prepare update body body = {} if title: body.update({"title": title}) @@ -121,14 +133,14 @@ class NextcloudClient: logger.info( "Attempting to update note %s with etag %s. Body: %s", note_id, - etag, # This was current_etag in the loop + etag, body, ) # Ensure conditional PUT using If-Match header is active response = self._client.put( url=f"/apps/notes/api/v1/notes/{note_id}", json=body, - headers={"If-Match": f'"{etag}"'}, # This was current_etag in the loop + headers={"If-Match": f'"{etag}"'}, ) logger.info( "Update response for note %s: Status %s, Headers %s", @@ -137,26 +149,130 @@ class NextcloudClient: response.headers, ) response.raise_for_status() - return response.json() + updated_note = response.json() + + # Check for category change and clean up old attachment directory if needed + if old_note and category is not None and old_note.get("category", "") != category: + logger.info(f"Category changed from '{old_note.get('category', '')}' to '{category}' - cleaning up old attachment directory") + try: + self._cleanup_old_attachment_directory(note_id=note_id, old_category=old_note.get("category", "")) + except Exception as e: + logger.error(f"Error cleaning up old attachment directory for note {note_id}: {e}") + # Continue with update even if cleanup failed + + return updated_note def notes_search_notes(self, *, query: str): + """ + Search notes using token-based matching with relevance ranking. + Returns notes sorted by relevance score. + """ all_notes = self.notes_get_all() search_results = [] - query_lower = query.lower() + + # Process the query + query_tokens = self.process_query(query) + + # If empty query after processing, return empty results + if not query_tokens: + return [] + + # Process and score each note for note in all_notes: - title_lower = note.get("title", "").lower() - content_lower = note.get("content", "").lower() - if query_lower in title_lower or query_lower in content_lower: - search_results.append( - { - "id": note.get("id"), - "title": note.get("title"), - "category": note.get("category"), - "modified": note.get("modified"), - } - ) + title_tokens, content_tokens = self.process_note_content(note) + score = self.calculate_score(query_tokens, title_tokens, content_tokens) + + # Only include notes with a non-zero score + if score > 0: + search_results.append({ + "id": note.get("id"), + "title": note.get("title"), + "category": note.get("category"), + "modified": note.get("modified"), + "_score": score # Include score for sorting (optional field) + }) + + # Sort by score in descending order + search_results.sort(key=lambda x: x["_score"], reverse=True) + + # Remove score field before returning (optional) + for result in search_results: + if "_score" in result: + del result["_score"] + return search_results + def process_query(self, query: str) -> list[str]: + """ + Tokenize and normalize the search query. + """ + # Convert to lowercase and split into tokens + tokens = query.lower().split() + # Filter out very short tokens (optional) + tokens = [token for token in tokens if len(token) > 1] + # Could add stop word removal here + return tokens + + def process_note_content(self, note: dict) -> tuple[list[str], list[str]]: + """ + Tokenize and normalize note title and content. + """ + # Process title + title = note.get("title", "").lower() + title_tokens = title.split() + + # Process content + content = note.get("content", "").lower() + content_tokens = content.split() + + return title_tokens, content_tokens + + def calculate_score(self, query_tokens: list[str], title_tokens: list[str], content_tokens: list[str]) -> float: + """ + Calculate a relevance score for a note based on query tokens. + """ + # Constants for weighting + TITLE_WEIGHT = 3.0 + CONTENT_WEIGHT = 1.0 + + score = 0.0 + + # Count matches in title + title_matches = sum(1 for qt in query_tokens if qt in title_tokens) + if query_tokens: # Avoid division by zero + title_match_ratio = title_matches / len(query_tokens) + score += TITLE_WEIGHT * title_match_ratio + + # Count matches in content + content_matches = sum(1 for qt in query_tokens if qt in content_tokens) + if query_tokens: # Avoid division by zero + content_match_ratio = content_matches / len(query_tokens) + score += CONTENT_WEIGHT * content_match_ratio + + # If no tokens matched at all, return zero + if title_matches == 0 and content_matches == 0: + return 0.0 + + return score + + def _cleanup_old_attachment_directory(self, *, note_id: int, old_category: str): + """ + Clean up the attachment directory for a note in its old category location. + Called after a category change to prevent orphaned directories. + """ + # Construct path to old attachment directory + old_category_path_part = f"{old_category}/" if old_category else "" + old_attachment_dir_path = f"Notes/{old_category_path_part}.attachments.{note_id}/" + + logger.info(f"Cleaning up old attachment directory: {old_attachment_dir_path}") + try: + delete_result = self.delete_webdav_resource(path=old_attachment_dir_path) + logger.info(f"Cleanup of old attachment directory result: {delete_result}") + return delete_result + except Exception as e: + logger.error(f"Error during cleanup of old attachment directory: {e}") + raise e + def delete_webdav_resource(self, *, path: str): """Delete a resource (file or directory) via WebDAV DELETE.""" # Ensure path ends with a slash if it's a directory @@ -172,6 +288,19 @@ class NextcloudClient: headers = {"OCS-APIRequest": "true"} try: + # First try a PROPFIND to verify resource exists + propfind_headers = {"Depth": "0", "OCS-APIRequest": "true"} + try: + propfind_resp = self._client.request("PROPFIND", webdav_path, headers=propfind_headers) + logger.info(f"Resource exists check (PROPFIND) status: {propfind_resp.status_code}") + # If we get here with 2xx, the resource exists + except HTTPStatusError as e: + if e.response.status_code == 404: + logger.info(f"Resource '{webdav_path}' doesn't exist, no deletion needed.") + return {"status_code": 404} + # For other errors, continue with deletion attempt + + # Proceed with deletion response = self._client.delete(webdav_path, headers=headers) response.raise_for_status() # Raises for 4xx/5xx status codes logger.info("Successfully deleted WebDAV resource '%s' (Status: %s)", webdav_path, response.status_code) @@ -200,21 +329,58 @@ class NextcloudClient: raise e def notes_delete_note(self, *, note_id: int): - # First delete the note through the Notes API - response = self._client.delete(f"/apps/notes/api/v1/notes/{note_id}") - response.raise_for_status() - json_response = response.json() - - # Now, attempt to delete the associated attachments directory via WebDAV - # Add a trailing slash as suggested - attachment_dir_path = f"Notes/.attachments.{note_id}/" - logger.info(f"Attempting to delete attachment directory for note {note_id} via WebDAV: {attachment_dir_path}") + """Deletes a note via API and attempts to delete its attachment directory via WebDAV.""" + # Fetch note details first to get the category for path construction try: - self.delete_webdav_resource(path=attachment_dir_path) - logger.info(f"Successfully attempted to delete attachment directory for note {note_id}.") - except Exception as e: - # Log the error but don't re-raise, as note deletion itself was successful - logger.error(f"Failed to delete attachment directory for note {note_id}: {e}") + note_details = self.notes_get_note(note_id=note_id) + category = note_details.get("category", "") + + # Check for other potential categories (if any note was moved between categories) + # We can't reliably detect this without a dedicated tracking mechanism, but we can + # implement a basic check for common category names and empty category + potential_categories = [] + if category: + potential_categories.append(category) # Current category first + + # Add empty category (uncategorized notes) + if category != "": + potential_categories.append("") + + # We could add logic here to check for other common categories if needed + + logger.info(f"Note {note_id} has category: '{category}', will check attachment directories in: {potential_categories}") + except HTTPStatusError as e: + # If note doesn't exist (404), we can't delete attachments anyway. + # Re-raise other errors. + if e.response.status_code == 404: + logger.warning(f"Note {note_id} not found when attempting delete. Skipping attachment cleanup.") + # Still raise the 404 as the primary delete operation failed + raise e + else: + logger.error(f"Error fetching note {note_id} details before deleting attachments: {e}") + raise e # Re-raise unexpected errors during fetch + + # Proceed with API note deletion + logger.info(f"Deleting note {note_id} via API.") + response = self._client.delete(f"/apps/notes/api/v1/notes/{note_id}") + response.raise_for_status() # Raise if API deletion fails + logger.info(f"Note {note_id} deleted successfully via API.") + json_response = response.json() # Usually empty on success + + # Now, attempt to delete the associated attachments directory via WebDAV for each potential category + for cat in potential_categories: + cat_path_part = f"{cat}/" if cat else "" + attachment_dir_path = f"Notes/{cat_path_part}.attachments.{note_id}/" + + logger.info(f"Attempting to delete attachment directory for note {note_id} in category '{cat}' via WebDAV: {attachment_dir_path}") + try: + # delete_webdav_resource expects path relative to user's files dir + delete_result = self.delete_webdav_resource(path=attachment_dir_path) + logger.info(f"WebDAV deletion for category '{cat}' attachment directory: {delete_result}") + except Exception as e: + # Log the error but don't re-raise, as API note deletion itself was successful + # Also, we want to try other potential categories even if one fails + logger.error(f"Failed during WebDAV deletion for category '{cat}' attachment directory: {e}") return json_response @@ -225,13 +391,23 @@ class NextcloudClient: # Use the stored username return f"/remote.php/dav/files/{self.username}" - def add_note_attachment(self, *, note_id: int, filename: str, content: bytes, mime_type: str | None = None): - """Add/Update an attachment to a note via WebDAV PUT.""" - # Attachments are stored in a hidden folder .attachments.{note_id} within the Notes folder + # Removed _get_note_attachment_webdav_path helper + + def add_note_attachment(self, *, note_id: int, filename: str, content: bytes, category: str | None = None, mime_type: str | None = None): + """ + Add/Update an attachment to a note via WebDAV PUT. + Requires the caller to provide the note's category. + """ + # Construct paths based on provided category webdav_base = self._get_webdav_base_path() - attachment_path = f"{webdav_base}/Notes/.attachments.{note_id}/{filename}" - logger.info("Uploading attachment to WebDAV path: %s", attachment_path) - + category_path_part = f"{category}/" if category else "" + attachment_dir_segment = f".attachments.{note_id}" + parent_dir_webdav_rel_path = f"Notes/{category_path_part}{attachment_dir_segment}" + parent_dir_path = f"{webdav_base}/{parent_dir_webdav_rel_path}" # Full path for MKCOL + attachment_path = f"{parent_dir_path}/{filename}" # Full path for PUT + + logger.info(f"Uploading attachment for note {note_id} (category: '{category or ''}') to WebDAV path: {attachment_path}") + # Log current auth settings to diagnose the issue logger.info("WebDAV auth settings - Username: %s, Auth Type: %s", self.username, type(self._client.auth).__name__) @@ -275,12 +451,12 @@ class NextcloudClient: notes_dir_response.status_code) # Ensure the parent directory exists using MKCOL - parent_dir_path = f"{webdav_base}/Notes/.attachments.{note_id}" - logger.info("Creating attachments directory: %s", parent_dir_path) + # parent_dir_path is now determined by the helper method + logger.info("Ensuring attachments directory exists: %s", parent_dir_path) mkcol_headers = {"OCS-APIRequest": "true"} logger.info("Headers for MKCOL (Attachments dir): %s", mkcol_headers) mkcol_response = self._client.request("MKCOL", parent_dir_path, headers=mkcol_headers) - # MKCOL should return 201 Created or 405 Method Not Allowed (if exists) + # MKCOL should return 201 Created or 405 Method Not Allowed (if directory already exists) # We can ignore 405, but raise for other errors if mkcol_response.status_code not in [201, 405]: logger.warning( @@ -321,11 +497,18 @@ class NextcloudClient: ) raise e - def get_note_attachment(self, *, note_id: int, filename: str): - """Fetch a specific attachment from a note via WebDAV GET.""" + def get_note_attachment(self, *, note_id: int, filename: str, category: str | None = None): + """ + Fetch a specific attachment from a note via WebDAV GET. + Requires the caller to provide the note's category. + """ + # Construct path based on provided category webdav_base = self._get_webdav_base_path() - attachment_path = f"{webdav_base}/Notes/.attachments.{note_id}/{filename}" - logger.info("Fetching attachment from WebDAV path: %s", attachment_path) + category_path_part = f"{category}/" if category else "" + attachment_dir_segment = f".attachments.{note_id}" + attachment_path = f"{webdav_base}/Notes/{category_path_part}{attachment_dir_segment}/{filename}" + + logger.info(f"Fetching attachment for note {note_id} (category: '{category or ''}') from WebDAV path: {attachment_path}") try: response = self._client.get(attachment_path) diff --git a/pyproject.toml b/pyproject.toml index 1f1643b..a59cea7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,8 +18,8 @@ nc-mcp-server = "nextcloud_mcp_server.server:run" [tool.pytest.ini_options] log_cli = 1 -log_cli_level = "INFO" -log_level = "INFO" +log_cli_level = "WARN" +log_level = "WARN" markers = [ "integration: marks tests as slow (deselect with '-m \"not slow\"')" ] diff --git a/tests/conftest.py b/tests/conftest.py index 845e911..12cca15 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -76,17 +76,20 @@ def temporary_note_with_attachment(nc_client: NextcloudClient, temporary_note: d """ note_data = temporary_note note_id = note_data["id"] + note_category = note_data.get("category") # Get category from the note data unique_suffix = uuid.uuid4().hex[:8] attachment_filename = f"temp_attach_{unique_suffix}.txt" attachment_content = f"Content for {attachment_filename}".encode('utf-8') attachment_mime = "text/plain" - logger.info(f"Adding attachment '{attachment_filename}' to temporary note ID: {note_id}") + logger.info(f"Adding attachment '{attachment_filename}' to temporary note ID: {note_id} (category: '{note_category or ''}')") try: + # Pass the category to add_note_attachment upload_response = nc_client.add_note_attachment( note_id=note_id, filename=attachment_filename, content=attachment_content, + category=note_category, # Pass the fetched category mime_type=attachment_mime ) assert upload_response.get("status_code") in [201, 204], f"Failed to upload attachment: {upload_response}" diff --git a/tests/integration/test_attachments.py b/tests/integration/test_attachments.py index 120f07f..6745575 100644 --- a/tests/integration/test_attachments.py +++ b/tests/integration/test_attachments.py @@ -20,11 +20,14 @@ def test_attachments_add_and_get(nc_client: NextcloudClient, temporary_note_with """ note_data, attachment_filename, attachment_content = temporary_note_with_attachment note_id = note_data["id"] + note_category = note_data.get("category") # Get category from fixture data logger.info(f"Attempting to retrieve attachment '{attachment_filename}' added by fixture for note ID: {note_id}") + # Pass category to get_note_attachment retrieved_content, retrieved_mime = nc_client.get_note_attachment( note_id=note_id, - filename=attachment_filename + filename=attachment_filename, + category=note_category ) logger.info(f"Attachment retrieved. Mime type: {retrieved_mime}, Size: {len(retrieved_content)} bytes") @@ -49,10 +52,12 @@ def test_attachments_add_to_note_with_category(nc_client: NextcloudClient, tempo attachment_mime = "text/plain" logger.info(f"Attempting to add attachment '{attachment_filename}' to note ID: {note_id}") + # Pass category to add_note_attachment upload_response = nc_client.add_note_attachment( note_id=note_id, filename=attachment_filename, content=attachment_content, + category=note_category, # Pass the note's category mime_type=attachment_mime ) assert upload_response and "status_code" in upload_response @@ -62,9 +67,11 @@ def test_attachments_add_to_note_with_category(nc_client: NextcloudClient, tempo # Get and Verify Attachment logger.info(f"Attempting to retrieve attachment '{attachment_filename}' from note ID: {note_id}") + # Pass category to get_note_attachment retrieved_content, retrieved_mime = nc_client.get_note_attachment( note_id=note_id, - filename=attachment_filename + filename=attachment_filename, + category=note_category # Pass the note's category ) logger.info(f"Attachment retrieved. Mime type: {retrieved_mime}, Size: {len(retrieved_content)} bytes") @@ -80,6 +87,7 @@ def test_attachments_cleanup_on_note_delete(nc_client: NextcloudClient, temporar """ note_data, attachment_filename, _ = temporary_note_with_attachment note_id = note_data["id"] + note_category = note_data.get("category") # Get category from fixture data # Fixture setup already added the attachment. # Fixture teardown (from temporary_note) will delete the note. @@ -105,13 +113,168 @@ def test_attachments_cleanup_on_note_delete(nc_client: NextcloudClient, temporar # Verify Attachment Is Deleted (via 404 on GET) logger.info(f"Verifying attachment '{attachment_filename}' is deleted for note ID: {note_id}") with pytest.raises(HTTPStatusError) as excinfo_attach: + # Pass category to get_note_attachment - although it should fail anyway + # because the note (and thus details) are gone. + # The client method will raise 404 from the initial notes_get_note call. nc_client.get_note_attachment( note_id=note_id, - filename=attachment_filename + filename=attachment_filename, + category=note_category # Pass category, though note fetch should fail first ) - # Expect 404 because the parent directory (.attachments.NOTE_ID) should be gone + # Expect 404 because the note itself is gone assert excinfo_attach.value.response.status_code == 404 logger.info(f"Attachment '{attachment_filename}' correctly not found (404) after note deletion.") + # Directly verify attachment directory doesn't exist using WebDAV PROPFIND + logger.info(f"Directly verifying attachment directory doesn't exist via PROPFIND") + webdav_base = nc_client._get_webdav_base_path() + category_path_part = f"{note_category}/" if note_category else "" + attachment_dir_path = f"{webdav_base}/Notes/{category_path_part}.attachments.{note_id}" + propfind_headers = {"Depth": "0", "OCS-APIRequest": "true"} + try: + propfind_resp = nc_client._client.request("PROPFIND", attachment_dir_path, headers=propfind_headers) + status = propfind_resp.status_code + if status in [200, 207]: # Successful PROPFIND means directory exists + logger.error(f"Attachment directory still exists! PROPFIND returned {status}") + assert False, f"Expected attachment directory to be gone, but PROPFIND returned {status}!" + except HTTPStatusError as e: + assert e.response.status_code == 404, f"Expected PROPFIND to fail with 404, got {e.response.status_code}" + logger.info(f"Verified attachment directory does not exist via PROPFIND (404 received)") + # Note: The temporary_note fixture will still run its cleanup, # but it will find the note already deleted (404) and handle it gracefully. + +def test_attachments_category_change_handling(nc_client: NextcloudClient): + """ + Tests attachment handling when a note's category is changed. + Verifies attachment retrieval works before and after category change, + and that cleanup targets the correct final location. + """ + note_id = None + initial_category = "CategoryA" + new_category = "CategoryB" + unique_suffix = uuid.uuid4().hex[:8] + note_title = f"Category Change Test {unique_suffix}" + attachment_filename = f"cat_change_{unique_suffix}.txt" + attachment_content = f"Content for {attachment_filename}".encode('utf-8') + + try: + # 1. Create note with initial category + logger.info(f"Creating note '{note_title}' in category '{initial_category}'") + created_note = nc_client.notes_create_note( + title=note_title, content="Initial content", category=initial_category + ) + note_id = created_note["id"] + etag1 = created_note["etag"] + logger.info(f"Note created with ID: {note_id}, Etag: {etag1}") + time.sleep(1) + + # 2. Add attachment (passing initial category) + logger.info(f"Adding attachment '{attachment_filename}' to note {note_id} (in {initial_category})") + upload_response = nc_client.add_note_attachment( + note_id=note_id, filename=attachment_filename, content=attachment_content, category=initial_category, mime_type="text/plain" + ) + assert upload_response["status_code"] in [201, 204] + logger.info("Attachment added successfully.") + time.sleep(1) + + # 3. Verify attachment retrieval from initial category (passing initial category) + logger.info(f"Verifying attachment retrieval from initial category '{initial_category}'") + retrieved_content1, _ = nc_client.get_note_attachment(note_id=note_id, filename=attachment_filename, category=initial_category) + assert retrieved_content1 == attachment_content + logger.info("Attachment retrieved successfully from initial category.") + + # 4. Update note category + logger.info(f"Updating note {note_id} category from '{initial_category}' to '{new_category}'") + # Need to fetch the latest etag after attachment add (WebDAV ops don't update note etag) + current_note_data = nc_client.notes_get_note(note_id=note_id) + current_etag = current_note_data["etag"] + updated_note = nc_client.notes_update_note( + note_id=note_id, etag=current_etag, category=new_category, title=note_title, content="Updated content" # Pass required fields + ) + etag3 = updated_note["etag"] + assert updated_note["category"] == new_category + logger.info(f"Note category updated successfully. New Etag: {etag3}") + time.sleep(1) + + # 5. Verify attachment retrieval from *new* category (passing new category) + logger.info(f"Verifying attachment retrieval from new category '{new_category}'") + retrieved_content2, _ = nc_client.get_note_attachment(note_id=note_id, filename=attachment_filename, category=new_category) + assert retrieved_content2 == attachment_content + logger.info("Attachment retrieved successfully from new category.") + + # 5.1 Verify old category attachment directory is gone via WebDAV PROPFIND + logger.info(f"Directly checking if old attachment directory exists in WebDAV") + webdav_base = nc_client._get_webdav_base_path() + old_attachment_dir_path = f"{webdav_base}/Notes/{initial_category}/.attachments.{note_id}" + propfind_headers = {"Depth": "0", "OCS-APIRequest": "true"} + try: + propfind_resp = nc_client._client.request("PROPFIND", old_attachment_dir_path, headers=propfind_headers) + status = propfind_resp.status_code + if status in [200, 207]: # Successful PROPFIND means directory exists + logger.error(f"Old attachment directory still exists! PROPFIND returned {status}") + assert False, f"Expected old directory to be gone, but PROPFIND returned {status} - directory still exists!" + except HTTPStatusError as e: + assert e.response.status_code == 404, f"Expected PROPFIND to fail with 404, got {e.response.status_code}" + logger.info(f"Verified old attachment directory does not exist via PROPFIND (404 received)") + + # 5.2 Verify new category attachment directory exists via WebDAV PROPFIND + logger.info(f"Directly checking if new attachment directory exists in WebDAV") + new_attachment_dir_path = f"{webdav_base}/Notes/{new_category}/.attachments.{note_id}" + try: + propfind_resp = nc_client._client.request("PROPFIND", new_attachment_dir_path, headers=propfind_headers) + status = propfind_resp.status_code + assert status in [207, 200], f"Expected PROPFIND to return success (207/200), got {status}" + logger.info(f"Verified new attachment directory exists via PROPFIND ({status} received)") + except HTTPStatusError as e: + logger.error(f"New attachment directory not found! PROPFIND failed with {e.response.status_code}") + assert False, f"Expected new attachment directory to exist, but PROPFIND failed with {e.response.status_code}" + + finally: + # 6. Cleanup: Delete the note (client should use the *final* category for cleanup path) + if note_id: + logger.info(f"Cleaning up note ID: {note_id} (last known category: '{new_category}')") + try: + nc_client.notes_delete_note(note_id=note_id) + logger.info(f"Note {note_id} deleted.") + time.sleep(1) + # Verify note deletion + with pytest.raises(HTTPStatusError) as excinfo_note_del: + nc_client.notes_get_note(note_id=note_id) + assert excinfo_note_del.value.response.status_code == 404 + logger.info("Verified note deleted (404).") + # Verify attachment deletion (should fail with 404 on the initial note fetch) + with pytest.raises(HTTPStatusError) as excinfo_attach_del: + # Pass the *last known* category, although the note fetch should fail first + nc_client.get_note_attachment(note_id=note_id, filename=attachment_filename, category=new_category) + assert excinfo_attach_del.value.response.status_code == 404 + logger.info("Verified attachment cannot be retrieved after note deletion (404).") + + # 6.1 Verify both old and new attachment directories are gone via WebDAV PROPFIND + logger.info("Directly verifying attachment directories don't exist via PROPFIND") + webdav_base = nc_client._get_webdav_base_path() + + # Check new category attachment directory + new_attachment_dir_path = f"{webdav_base}/Notes/{new_category}/.attachments.{note_id}" + propfind_headers = {"Depth": "0", "OCS-APIRequest": "true"} + try: + resp = nc_client._client.request("PROPFIND", new_attachment_dir_path, headers=propfind_headers) + if resp.status_code in [200, 207]: # Successful PROPFIND means directory exists + assert False, f"New category attachment directory still exists!" + except HTTPStatusError as e: + assert e.response.status_code == 404, f"Expected PROPFIND to fail with 404, got {e.response.status_code}" + logger.info("Verified new category attachment directory is gone via PROPFIND") + + # Check old category attachment directory + old_attachment_dir_path = f"{webdav_base}/Notes/{initial_category}/.attachments.{note_id}" + try: + resp = nc_client._client.request("PROPFIND", old_attachment_dir_path, headers=propfind_headers) + if resp.status_code in [200, 207]: # Successful PROPFIND means directory exists + assert False, f"Old category attachment directory still exists!" + except HTTPStatusError as e: + assert e.response.status_code == 404, f"Expected PROPFIND to fail with 404, got {e.response.status_code}" + logger.info("Verified old category attachment directory is gone via PROPFIND") + + logger.info("Verified all attachment directories are properly cleaned up.") + except Exception as e: + logger.error(f"Error during cleanup for note {note_id}: {e}") diff --git a/tests/integration/test_embedded_images.py b/tests/integration/test_embedded_images.py index e6b3386..565543f 100644 --- a/tests/integration/test_embedded_images.py +++ b/tests/integration/test_embedded_images.py @@ -53,17 +53,34 @@ def test_note_with_embedded_image(nc_client: NextcloudClient, temporary_note: di attachment_filename = f"test_image_{unique_suffix}.png" # Make filename unique per run # 1. Upload the image as an attachment - logger.info(f"Uploading image attachment '{attachment_filename}' to note {note_id}...") + note_category = note_data.get("category") # Get category from fixture data + logger.info(f"Uploading image attachment '{attachment_filename}' to note {note_id} (category: '{note_category or ''}')...") upload_response = nc_client.add_note_attachment( note_id=note_id, filename=attachment_filename, content=image_content, + category=note_category, # Pass the category mime_type="image/png" ) assert upload_response and upload_response.get("status_code") in [201, 204] logger.info(f"Image uploaded successfully (Status: {upload_response.get('status_code')}).") time.sleep(1) # Allow potential processing time + # 1.1 Verify attachment directory exists via WebDAV PROPFIND + logger.info(f"Directly checking if attachment directory exists in WebDAV") + webdav_base = nc_client._get_webdav_base_path() + category_path_part = f"{note_category}/" if note_category else "" + attachment_dir_path = f"{webdav_base}/Notes/{category_path_part}.attachments.{note_id}" + propfind_headers = {"Depth": "0", "OCS-APIRequest": "true"} + try: + propfind_resp = nc_client._client.request("PROPFIND", attachment_dir_path, headers=propfind_headers) + status = propfind_resp.status_code + assert status in [207, 200], f"Expected PROPFIND to return success (207/200), got {status}" + logger.info(f"Verified attachment directory exists via PROPFIND ({status} received)") + except HTTPStatusError as e: + logger.error(f"Attachment directory not found! PROPFIND failed with {e.response.status_code}") + assert False, f"Expected attachment directory to exist, but PROPFIND failed with {e.response.status_code}" + # 2. Update the note content to include the embedded image references updated_content = f"""{note_data['content']} @@ -94,13 +111,40 @@ def test_note_with_embedded_image(nc_client: NextcloudClient, temporary_note: di logger.info("Verified image reference exists in updated note content.") # 4. Verify the image attachment can be retrieved - logger.info(f"Retrieving image attachment '{attachment_filename}'...") + logger.info(f"Retrieving image attachment '{attachment_filename}' (category: '{note_category or ''}')...") + # Pass category to get_note_attachment retrieved_img_content, mime_type = nc_client.get_note_attachment( note_id=note_id, - filename=attachment_filename + filename=attachment_filename, + category=note_category ) assert retrieved_img_content == image_content assert mime_type.startswith("image/png") logger.info("Successfully retrieved and verified image attachment content and mime type.") - # Note cleanup is handled by the temporary_note fixture + # 5. Manually trigger deletion to verify cleanup (instead of waiting for fixture teardown) + logger.info(f"Manually deleting note ID: {note_id} to verify proper attachment cleanup") + nc_client.notes_delete_note(note_id=note_id) + logger.info(f"Note ID: {note_id} deleted successfully.") + time.sleep(1) + + # 6. Verify note is deleted + with pytest.raises(HTTPStatusError) as excinfo_note: + nc_client.notes_get_note(note_id=note_id) + assert excinfo_note.value.response.status_code == 404 + logger.info(f"Verified note {note_id} deletion (404 received).") + + # 7. Verify attachment directory is deleted via WebDAV PROPFIND + logger.info(f"Directly verifying attachment directory doesn't exist via PROPFIND") + try: + propfind_resp = nc_client._client.request("PROPFIND", attachment_dir_path, headers=propfind_headers) + status = propfind_resp.status_code + if status in [200, 207]: # Successful PROPFIND means directory exists + logger.error(f"Attachment directory still exists! PROPFIND returned {status}") + assert False, f"Expected attachment directory to be gone, but PROPFIND returned {status}!" + except HTTPStatusError as e: + assert e.response.status_code == 404, f"Expected PROPFIND to fail with 404, got {e.response.status_code}" + logger.info(f"Verified attachment directory does not exist via PROPFIND (404 received)") + + # Note: The temporary_note fixture will still run its cleanup, + # but it will find the note already deleted (404) and handle it gracefully. diff --git a/tests/integration/test_webdav_cleanup.py b/tests/integration/test_webdav_cleanup.py new file mode 100644 index 0000000..15a91c7 --- /dev/null +++ b/tests/integration/test_webdav_cleanup.py @@ -0,0 +1,161 @@ +import pytest +import logging +import time +import uuid +from httpx import HTTPStatusError + +from nextcloud_mcp_server.client import NextcloudClient + +logger = logging.getLogger(__name__) + +# Mark all tests in this module as integration tests +pytestmark = pytest.mark.integration + +def test_category_change_cleans_up_old_attachments_directory(nc_client: NextcloudClient): + """ + Tests that when a note's category is changed, the old attachment directory is properly cleaned up. + """ + note_id = None + initial_category = "CategoryTest1" + new_category = "CategoryTest2" + unique_suffix = uuid.uuid4().hex[:8] + note_title = f"Category Cleanup Test {unique_suffix}" + attachment_filename = f"cleanup_test_{unique_suffix}.txt" + attachment_content = f"Content for {attachment_filename}".encode('utf-8') + + try: + # 1. Create note with initial category + logger.info(f"Creating note '{note_title}' in category '{initial_category}'") + created_note = nc_client.notes_create_note( + title=note_title, content="Initial content", category=initial_category + ) + note_id = created_note["id"] + etag1 = created_note["etag"] + logger.info(f"Note created with ID: {note_id}, Etag: {etag1}") + time.sleep(1) + + # 2. Add attachment (passing initial category) + logger.info(f"Adding attachment '{attachment_filename}' to note {note_id} (in {initial_category})") + upload_response = nc_client.add_note_attachment( + note_id=note_id, filename=attachment_filename, content=attachment_content, category=initial_category, mime_type="text/plain" + ) + assert upload_response["status_code"] in [201, 204] + logger.info("Attachment added successfully.") + time.sleep(1) + + # 3. Verify attachment retrieval from initial category + logger.info(f"Verifying attachment retrieval from initial category '{initial_category}'") + retrieved_content1, _ = nc_client.get_note_attachment(note_id=note_id, filename=attachment_filename, category=initial_category) + assert retrieved_content1 == attachment_content + logger.info("Attachment retrieved successfully from initial category.") + + # 4. Construct and check the WebDAV path for the initial category's attachment directory + initial_webdav_path = f"Notes/{initial_category}/.attachments.{note_id}" + logger.info(f"Initial WebDAV path for attachments: {initial_webdav_path}") + # Here we would check if the directory exists, but the WebDAV client doesn't directly + # expose directory listing functionality, so we'll infer from attachment retrieval success + + # 5. Update note category + logger.info(f"Updating note {note_id} category from '{initial_category}' to '{new_category}'") + current_note_data = nc_client.notes_get_note(note_id=note_id) + current_etag = current_note_data["etag"] + updated_note = nc_client.notes_update_note( + note_id=note_id, etag=current_etag, category=new_category, title=note_title, content="Updated content" + ) + etag3 = updated_note["etag"] + assert updated_note["category"] == new_category + logger.info(f"Note category updated successfully. New Etag: {etag3}") + time.sleep(1) + + # 6. Verify attachment retrieval from new category + logger.info(f"Verifying attachment retrieval from new category '{new_category}'") + retrieved_content2, _ = nc_client.get_note_attachment(note_id=note_id, filename=attachment_filename, category=new_category) + assert retrieved_content2 == attachment_content + logger.info("Attachment retrieved successfully from new category.") + + # 7. Try to retrieve from old category - this should fail + logger.info(f"Trying to retrieve attachment from old category '{initial_category}' - should fail") + try: + nc_client.get_note_attachment(note_id=note_id, filename=attachment_filename, category=initial_category) + # If we get here, it means the old directory still exists (a problem) + logger.error("ISSUE DETECTED: Was able to retrieve attachment from old category path!") + assert False, "Old category attachment directory still exists and accessible!" + except HTTPStatusError as e: + # This is the expected outcome - old directory should be gone + logger.info(f"Correctly got error accessing old category path: {e.response.status_code}") + assert e.response.status_code == 404, f"Expected 404, got {e.response.status_code}" + logger.info("Verified old category attachment directory is not accessible (good!)") + + # 7.1 Directly check old attachment directory existence using WebDAV PROPFIND + logger.info(f"Directly checking if old attachment directory exists in WebDAV") + webdav_base = nc_client._get_webdav_base_path() + old_attachment_dir_path = f"{webdav_base}/Notes/{initial_category}/.attachments.{note_id}" + propfind_headers = {"Depth": "0", "OCS-APIRequest": "true"} + try: + propfind_resp = nc_client._client.request("PROPFIND", old_attachment_dir_path, headers=propfind_headers) + status = propfind_resp.status_code + if status in [200, 207]: # Success codes indicate the directory exists (a problem) + logger.error(f"Old attachment directory still exists! PROPFIND returned {status}") + assert False, f"Expected old attachment directory to be gone, but it still exists (PROPFIND returned {status})!" + # If we got another status code (like 404), it's also good - the directory doesn't exist + logger.info(f"Verified old attachment directory does not exist (PROPFIND returned {status})") + except HTTPStatusError as e: + # 404 is expected - directory should not exist + assert e.response.status_code == 404, f"Expected PROPFIND to fail with 404, got {e.response.status_code}" + logger.info(f"Verified old attachment directory does not exist via PROPFIND (404 received)") + + finally: + # 8. Cleanup: Delete the note + if note_id: + logger.info(f"Cleaning up note ID: {note_id}") + try: + nc_client.notes_delete_note(note_id=note_id) + logger.info(f"Note {note_id} deleted.") + time.sleep(1) + + # 9. Verify both old and new attachment paths are gone + logger.info("Verifying all attachment paths are gone") + with pytest.raises(HTTPStatusError) as excinfo_new: + nc_client.get_note_attachment(note_id=note_id, filename=attachment_filename, category=new_category) + assert excinfo_new.value.response.status_code == 404 + + with pytest.raises(HTTPStatusError) as excinfo_old: + nc_client.get_note_attachment(note_id=note_id, filename=attachment_filename, category=initial_category) + assert excinfo_old.value.response.status_code == 404 + + # 9.1 Directly verify directories don't exist using WebDAV PROPFIND + logger.info("Directly verifying attachment directories don't exist via PROPFIND") + webdav_base = nc_client._get_webdav_base_path() + + # Check new category attachment directory + new_attachment_dir_path = f"{webdav_base}/Notes/{new_category}/.attachments.{note_id}" + propfind_headers = {"Depth": "0", "OCS-APIRequest": "true"} + try: + propfind_resp = nc_client._client.request("PROPFIND", new_attachment_dir_path, headers=propfind_headers) + status = propfind_resp.status_code + if status in [200, 207]: # Success codes indicate the directory exists (a problem) + logger.error(f"New category attachment directory still exists! PROPFIND returned {status}") + assert False, f"Expected new category attachment directory to be gone, but it still exists (PROPFIND returned {status})!" + # If we got another status code (like 404), it's also good - the directory doesn't exist + logger.info(f"Verified new category attachment directory does not exist (PROPFIND returned {status})") + except HTTPStatusError as e: + assert e.response.status_code == 404, f"Expected PROPFIND to fail with 404, got {e.response.status_code}" + logger.info("Verified new category attachment directory is gone via PROPFIND") + + # Check old category attachment directory + old_attachment_dir_path = f"{webdav_base}/Notes/{initial_category}/.attachments.{note_id}" + try: + propfind_resp = nc_client._client.request("PROPFIND", old_attachment_dir_path, headers=propfind_headers) + status = propfind_resp.status_code + if status in [200, 207]: # Success codes indicate the directory exists (a problem) + logger.error(f"Old category attachment directory still exists! PROPFIND returned {status}") + assert False, f"Expected old category attachment directory to be gone, but it still exists (PROPFIND returned {status})!" + # If we got another status code (like 404), it's also good - the directory doesn't exist + logger.info(f"Verified old category attachment directory does not exist (PROPFIND returned {status})") + except HTTPStatusError as e: + assert e.response.status_code == 404, f"Expected PROPFIND to fail with 404, got {e.response.status_code}" + logger.info("Verified old category attachment directory is gone via PROPFIND") + + logger.info("Verified all attachment directories are properly cleaned up.") + except Exception as e: + logger.error(f"Error during cleanup for note {note_id}: {e}")