From 3ff6346c03728ba366a52dcb1b48065de2b1b750 Mon Sep 17 00:00:00 2001
From: yuisheaven <91854357+yuisheaven@users.noreply.github.com>
Date: Sun, 5 Oct 2025 02:16:42 +0200
Subject: [PATCH] ran ruff format via uv

---
 .../client/unstructured_client.py             | 68 ++++++++++---------
 nextcloud_mcp_server/config.py                | 32 +++++----
 nextcloud_mcp_server/server/webdav.py         |  7 +-
 nextcloud_mcp_server/utils/__init__.py        |  2 +-
 nextcloud_mcp_server/utils/document_parser.py | 45 +++++++-----
 5 files changed, 87 insertions(+), 67 deletions(-)

diff --git a/nextcloud_mcp_server/client/unstructured_client.py b/nextcloud_mcp_server/client/unstructured_client.py
index dd6a289..b425a0c 100644
--- a/nextcloud_mcp_server/client/unstructured_client.py
+++ b/nextcloud_mcp_server/client/unstructured_client.py
@@ -17,31 +17,31 @@ logger = logging.getLogger(__name__)
 
 class UnstructuredClient:
     """Client for interacting with the Unstructured API.
-    
+
     The Unstructured API provides document parsing capabilities for various formats
     including PDF, DOCX, images with OCR, and more.
-    
+
     API Documentation: https://docs.unstructured.io/api-reference/api-services/api-parameters
     """
-    
+
     def __init__(self, api_url: Optional[str] = None, timeout: int = 120):
         """Initialize the Unstructured API client.
-        
+
         Args:
             api_url: Base URL of the Unstructured API. If None, will use config.
             timeout: Request timeout in seconds (default: 120 for large documents)
         """
         self.api_url = api_url or get_unstructured_api_url()
         self.timeout = timeout
-        
+
         if not self.api_url:
             raise ValueError(
                 "Unstructured API URL not configured. "
                 "Set ENABLE_UNSTRUCTURED_PARSING=true and UNSTRUCTURED_API_URL in environment."
             )
-        
+
         logger.info(f"Initialized UnstructuredClient with API URL: {self.api_url}")
-    
+
     async def partition_document(
         self,
         content: bytes,
@@ -52,7 +52,7 @@ class UnstructuredClient:
         extract_image_block_types: Optional[list[str]] = None,
     ) -> Tuple[str, dict]:
         """Parse a document using the Unstructured API.
-        
+
         Args:
             content: The document content as bytes
             filename: The filename (used for format detection)
@@ -62,12 +62,12 @@ class UnstructuredClient:
             languages: List of language codes for OCR (e.g., ["eng", "deu"]).
                       If None, uses the value from UNSTRUCTURED_LANGUAGES env var.
             extract_image_block_types: Types of elements to extract from images
-            
+
         Returns:
             Tuple of (parsed_text, metadata) where:
             - parsed_text: The extracted text content
             - metadata: Additional metadata about the parsing
-            
+
         Raises:
             httpx.HTTPError: If the API request fails
             Exception: If parsing fails
@@ -75,29 +75,33 @@ class UnstructuredClient:
         # Use environment configuration as defaults
         if strategy is None:
             strategy = get_unstructured_strategy()
-        
+
         if languages is None:
             languages = get_unstructured_languages()
-        
+
         # Prepare the multipart form data
         files = {
-            "files": (filename, io.BytesIO(content), content_type or "application/octet-stream")
+            "files": (
+                filename,
+                io.BytesIO(content),
+                content_type or "application/octet-stream",
+            )
         }
-        
+
         # Prepare the request data
         data = {
             "strategy": strategy,
             "languages": ",".join(languages),
         }
-        
+
         if extract_image_block_types:
             data["extract_image_block_types"] = ",".join(extract_image_block_types)
-        
+
         logger.debug(
             f"Partitioning document '{filename}' with strategy '{strategy}', "
             f"languages: {languages}"
         )
-        
+
         try:
             async with httpx.AsyncClient(timeout=self.timeout) as client:
                 response = await client.post(
@@ -106,25 +110,25 @@ class UnstructuredClient:
                     data=data,
                 )
                 response.raise_for_status()
-                
+
                 # Parse the response
                 elements = response.json()
-                
+
                 # Extract text from elements
                 # Each element has a "text" field
                 texts = []
                 element_types = {}
-                
+
                 for element in elements:
                     if "text" in element and element["text"]:
                         texts.append(element["text"])
-                    
+
                     # Track element types
                     el_type = element.get("type", "unknown")
                     element_types[el_type] = element_types.get(el_type, 0) + 1
-                
+
                 parsed_text = "\n\n".join(texts)
-                
+
                 # Collect metadata
                 metadata = {
                     "element_count": len(elements),
@@ -132,26 +136,28 @@ class UnstructuredClient:
                     "element_types": element_types,
                     "strategy": strategy,
                     "languages": languages,
-                    "parsing_method": "unstructured_api"
+                    "parsing_method": "unstructured_api",
                 }
-                
+
                 logger.debug(
                     f"Successfully parsed document: {len(elements)} elements, "
                     f"{len(parsed_text)} characters"
                 )
-                
+
                 return parsed_text, metadata
-                
+
         except httpx.HTTPError as e:
             logger.error(f"HTTP error calling Unstructured API: {e}")
-            raise Exception(f"Failed to parse document via Unstructured API: {str(e)}") from e
+            raise Exception(
+                f"Failed to parse document via Unstructured API: {str(e)}"
+            ) from e
         except Exception as e:
             logger.error(f"Unexpected error parsing document: {e}")
             raise Exception(f"Failed to parse document: {str(e)}") from e
-    
+
     async def health_check(self) -> bool:
         """Check if the Unstructured API is available.
-        
+
         Returns:
             True if the API is healthy, False otherwise.
         """
@@ -161,4 +167,4 @@ class UnstructuredClient:
                 return response.status_code == 200
         except Exception as e:
             logger.warning(f"Unstructured API health check failed: {e}")
-            return False
\ No newline at end of file
+            return False
diff --git a/nextcloud_mcp_server/config.py b/nextcloud_mcp_server/config.py
index 0108990..d82310e 100644
--- a/nextcloud_mcp_server/config.py
+++ b/nextcloud_mcp_server/config.py
@@ -42,20 +42,20 @@ def setup_logging():
 # Document Parsing Configuration
 def get_unstructured_api_url() -> Optional[str]:
     """Get the Unstructured API URL from environment variables.
-    
+
     Returns:
         The Unstructured API URL if parsing is enabled, None otherwise.
     """
     enabled = os.getenv("ENABLE_UNSTRUCTURED_PARSING", "true").lower() == "true"
     if not enabled:
         return None
-    
+
     return os.getenv("UNSTRUCTURED_API_URL", "http://unstructured:8000")
 
 
 def is_unstructured_parsing_enabled() -> bool:
     """Check if unstructured document parsing is enabled.
-    
+
     Returns:
         True if enabled, False otherwise.
     """
@@ -64,36 +64,36 @@ def is_unstructured_parsing_enabled() -> bool:
 
 def get_unstructured_strategy() -> str:
     """Get the parsing strategy for the Unstructured API.
-    
+
     Valid values are:
     - 'auto': Automatically choose the best strategy (default)
     - 'fast': Fast parsing without OCR
     - 'hi_res': High-resolution parsing with OCR for better accuracy
-    
+
     Returns:
         The parsing strategy to use.
     """
     strategy = os.getenv("UNSTRUCTURED_STRATEGY", "auto").lower()
     valid_strategies = ["auto", "fast", "hi_res"]
-    
+
     if strategy not in valid_strategies:
         logging.warning(
             f"Invalid UNSTRUCTURED_STRATEGY '{strategy}'. Using 'hi_res'. "
             f"Valid options: {', '.join(valid_strategies)}"
         )
         return "hi_res"
-    
+
     return strategy
 
 
 def get_unstructured_languages() -> list[str]:
     """Get the OCR languages for the Unstructured API.
-    
+
     Languages should be specified as ISO 639-3 codes (e.g., 'eng', 'deu', 'fra').
     Multiple languages can be specified separated by commas.
-    
+
     Default languages: English (eng) and German (deu)
-    
+
     Common language codes:
     - eng: English
     - deu: German
@@ -106,17 +106,19 @@ def get_unstructured_languages() -> list[str]:
     - zho: Chinese
     - jpn: Japanese
     - kor: Korean
-    
+
     Returns:
         List of language codes for OCR processing.
     """
     languages_str = os.getenv("UNSTRUCTURED_LANGUAGES", "eng,deu")
-    
+
     # Split by comma and clean up whitespace
     languages = [lang.strip() for lang in languages_str.split(",") if lang.strip()]
-    
+
     if not languages:
-        logging.warning("No languages specified in UNSTRUCTURED_LANGUAGES. Using default: eng,deu")
+        logging.warning(
+            "No languages specified in UNSTRUCTURED_LANGUAGES. Using default: eng,deu"
+        )
         return ["eng", "deu"]
-    
+
     return languages
diff --git a/nextcloud_mcp_server/server/webdav.py b/nextcloud_mcp_server/server/webdav.py
index 66433e3..0ceaa96 100644
--- a/nextcloud_mcp_server/server/webdav.py
+++ b/nextcloud_mcp_server/server/webdav.py
@@ -3,7 +3,10 @@ import logging
 from mcp.server.fastmcp import Context, FastMCP
 
 from nextcloud_mcp_server.client import NextcloudClient
-from nextcloud_mcp_server.utils.document_parser import is_parseable_document, parse_document
+from nextcloud_mcp_server.utils.document_parser import (
+    is_parseable_document,
+    parse_document,
+)
 from nextcloud_mcp_server.config import is_unstructured_parsing_enabled
 
 logger = logging.getLogger(__name__)
@@ -62,7 +65,7 @@ def configure_webdav_tools(mcp: FastMCP):
         content, content_type = await client.webdav.read_file(path)
 
         # Check if this is a parseable document (PDF, DOCX, etc.)
-        if (is_unstructured_parsing_enabled() and is_parseable_document(content_type)):
+        if is_unstructured_parsing_enabled() and is_parseable_document(content_type):
             try:
                 logger.info(f"Parsing document '{path}' of type '{content_type}'")
                 parsed_text, metadata = await parse_document(
diff --git a/nextcloud_mcp_server/utils/__init__.py b/nextcloud_mcp_server/utils/__init__.py
index 816a761..8a4b271 100644
--- a/nextcloud_mcp_server/utils/__init__.py
+++ b/nextcloud_mcp_server/utils/__init__.py
@@ -1 +1 @@
-"""Utility functions for the Nextcloud MCP server."""
\ No newline at end of file
+"""Utility functions for the Nextcloud MCP server."""
diff --git a/nextcloud_mcp_server/utils/document_parser.py b/nextcloud_mcp_server/utils/document_parser.py
index 145c61d..b7c809f 100644
--- a/nextcloud_mcp_server/utils/document_parser.py
+++ b/nextcloud_mcp_server/utils/document_parser.py
@@ -35,56 +35,61 @@ PARSEABLE_MIME_TYPES = {
     "image/bmp": "image",
 }
 
+
 def is_parseable_document(content_type: Optional[str]) -> bool:
     """Check if a document type can be parsed.
-    
+
     Args:
         content_type: The MIME type of the document
-         
+
     Returns:
         True if the document can be parsed, False otherwise
     """
     if not content_type:
         return False
-    
+
     # Handle content types with additional parameters (e.g., "application/pdf; charset=utf-8")
     base_content_type = content_type.split(";")[0].strip().lower()
     return base_content_type in PARSEABLE_MIME_TYPES
 
+
 async def parse_document(
-    content: bytes,
-    content_type: Optional[str],
-    filename: Optional[str] = None
+    content: bytes, content_type: Optional[str], filename: Optional[str] = None
 ) -> Tuple[str, dict]:
     """Parse a document using the Unstructured API.
-    
+
     Args:
         content: The document content as bytes
         content_type: The MIME type of the document
         filename: Optional filename to help with format detection
-         
+
     Returns:
         Tuple of (parsed_text, metadata) where:
         - parsed_text: The extracted text content
         - metadata: Additional metadata about the parsing
-         
+
     Raises:
         ValueError: If the document type is not supported
         Exception: If parsing fails
     """
     if not is_parseable_document(content_type):
         raise ValueError(f"Document type '{content_type}' is not supported for parsing")
-    
-    base_content_type = content_type.split(";")[0].strip().lower() if content_type else ""
+
+    base_content_type = (
+        content_type.split(";")[0].strip().lower() if content_type else ""
+    )
     doc_type = PARSEABLE_MIME_TYPES.get(base_content_type, "unknown")
-    
+
     logger.debug(f"Parsing document of type '{doc_type}' (MIME: {content_type})")
-    
+
     # Check if unstructured parsing is enabled via environment
     if is_unstructured_parsing_enabled():
         logger.debug("Using Unstructured API for parsing")
         try:
-            from nextcloud_mcp_server.client.unstructured_client import UnstructuredClient
+            from nextcloud_mcp_server.client.unstructured_client import (
+                UnstructuredClient,
+            )
+
             client = UnstructuredClient()
             # The client will automatically use environment configuration
             # (UNSTRUCTURED_STRATEGY and UNSTRUCTURED_LANGUAGES)
@@ -97,6 +102,7 @@ async def parse_document(
             logger.error(f"Unstructured API parsing failed: {e}")
             # If unstructured parsing fails, return base64 as fallback
             import base64
+
             parsed_text = f"Document could not be parsed. Base64 content: {base64.b64encode(content).decode('ascii')[:200]}..."
             metadata = {
                 "document_type": doc_type,
@@ -104,18 +110,21 @@ async def parse_document(
                 "element_count": 0,
                 "text_length": len(parsed_text),
                 "parsing_method": "fallback_base64",
-                "error": str(e)
+                "error": str(e),
             }
             return parsed_text, metadata
     else:
-        logger.debug("Unstructured parsing is disabled, returning base64 encoded content as fallback")
+        logger.debug(
+            "Unstructured parsing is disabled, returning base64 encoded content as fallback"
+        )
         import base64
+
         parsed_text = f"Document could not be parsed. Base64 content: {base64.b64encode(content).decode('ascii')[:200]}..."
         metadata = {
             "document_type": doc_type,
             "mime_type": content_type,
             "element_count": 0,
             "text_length": len(parsed_text),
-            "parsing_method": "fallback_base64"
+            "parsing_method": "fallback_base64",
         }
-        return parsed_text, metadata
\ No newline at end of file
+        return parsed_text, metadata