added envs for unstructured to control OCR quality and OCR languages

This commit is contained in:
yuisheaven
2025-10-04 05:21:02 +02:00
parent df5f85e0c6
commit c9a687171a
5 changed files with 96 additions and 6 deletions
@@ -86,11 +86,12 @@ async def parse_document(
try:
from nextcloud_mcp_server.client.unstructured_client import UnstructuredClient
client = UnstructuredClient()
# The client will automatically use environment configuration
# (UNSTRUCTURED_STRATEGY and UNSTRUCTURED_LANGUAGES)
return await client.partition_document(
content=content,
filename=filename or f"document.{doc_type}",
content_type=content_type,
strategy="auto"
)
except Exception as e:
logger.error(f"Unstructured API parsing failed: {e}")