Files
nextcloud-mcp-server/tests/test_unstructured_config.py
T
2025-10-23 22:56:25 +02:00

173 lines
6.5 KiB
Python

"""Unit tests for Unstructured API configuration."""
import os
import pytest
from nextcloud_mcp_server.client.unstructured_client import UnstructuredClient
from nextcloud_mcp_server.config import (
get_unstructured_languages,
get_unstructured_strategy,
)
class TestUnstructuredStrategy:
"""Test strategy configuration."""
def test_strategy_default(self):
"""Test that strategy defaults to 'auto'."""
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
assert get_unstructured_strategy() == "auto"
def test_strategy_custom_auto(self):
"""Test custom strategy 'auto'."""
os.environ["UNSTRUCTURED_STRATEGY"] = "auto"
try:
assert get_unstructured_strategy() == "auto"
finally:
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
def test_strategy_custom_fast(self):
"""Test custom strategy 'fast'."""
os.environ["UNSTRUCTURED_STRATEGY"] = "fast"
try:
assert get_unstructured_strategy() == "fast"
finally:
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
def test_strategy_custom_hi_res(self):
"""Test custom strategy 'hi_res'."""
os.environ["UNSTRUCTURED_STRATEGY"] = "hi_res"
try:
assert get_unstructured_strategy() == "hi_res"
finally:
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
def test_strategy_invalid_fallback(self, caplog):
"""Test that invalid strategy falls back to 'hi_res'."""
import logging
os.environ["UNSTRUCTURED_STRATEGY"] = "invalid_strategy"
try:
# Ensure logging is captured at WARNING level
with caplog.at_level(logging.WARNING):
strategy = get_unstructured_strategy()
assert strategy == "hi_res"
assert "Invalid UNSTRUCTURED_STRATEGY" in caplog.text
finally:
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
def test_strategy_case_insensitive(self):
"""Test that strategy is case-insensitive."""
os.environ["UNSTRUCTURED_STRATEGY"] = "HI_RES"
try:
assert get_unstructured_strategy() == "hi_res"
finally:
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
class TestUnstructuredLanguages:
"""Test language configuration."""
def test_languages_default(self):
"""Test that languages default to English and German."""
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
assert get_unstructured_languages() == ["eng", "deu"]
def test_languages_single(self):
"""Test single language configuration."""
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng"
try:
assert get_unstructured_languages() == ["eng"]
finally:
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
def test_languages_multiple(self):
"""Test multiple languages configuration."""
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng,fra,spa"
try:
assert get_unstructured_languages() == ["eng", "fra", "spa"]
finally:
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
def test_languages_whitespace_trimming(self):
"""Test that whitespace is trimmed from language codes."""
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng, deu , fra "
try:
assert get_unstructured_languages() == ["eng", "deu", "fra"]
finally:
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
def test_languages_empty_fallback(self, caplog):
"""Test that empty languages string falls back to default."""
import logging
os.environ["UNSTRUCTURED_LANGUAGES"] = ""
try:
with caplog.at_level(logging.WARNING):
languages = get_unstructured_languages()
assert languages == ["eng", "deu"]
assert "No languages specified" in caplog.text
finally:
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
def test_languages_only_whitespace_fallback(self, caplog):
"""Test that whitespace-only string falls back to default."""
import logging
os.environ["UNSTRUCTURED_LANGUAGES"] = " , , "
try:
with caplog.at_level(logging.WARNING):
languages = get_unstructured_languages()
assert languages == ["eng", "deu"]
assert "No languages specified" in caplog.text
finally:
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
class TestUnstructuredClientConfiguration:
"""Test that UnstructuredClient respects configuration."""
@pytest.mark.asyncio
async def test_client_uses_default_strategy(self):
"""Test that client uses default strategy from environment."""
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
os.environ["UNSTRUCTURED_API_URL"] = "http://test:8000"
try:
_client = UnstructuredClient()
# The partition_document method should use get_unstructured_strategy() when strategy is None
# We can't test the actual call without a running API, but we can verify the config is read
assert get_unstructured_strategy() == "auto"
finally:
os.environ.pop("UNSTRUCTURED_API_URL", None)
@pytest.mark.asyncio
async def test_client_uses_default_languages(self):
"""Test that client uses default languages from environment."""
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
os.environ["UNSTRUCTURED_API_URL"] = "http://test:8000"
try:
_client = UnstructuredClient()
# The partition_document method should use get_unstructured_languages() when languages is None
assert get_unstructured_languages() == ["eng", "deu"]
finally:
os.environ.pop("UNSTRUCTURED_API_URL", None)
@pytest.mark.asyncio
async def test_client_uses_custom_configuration(self):
"""Test that client uses custom configuration from environment."""
os.environ["UNSTRUCTURED_STRATEGY"] = "hi_res"
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng,fra,spa"
os.environ["UNSTRUCTURED_API_URL"] = "http://test:8000"
try:
_client = UnstructuredClient()
assert get_unstructured_strategy() == "hi_res"
assert get_unstructured_languages() == ["eng", "fra", "spa"]
finally:
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
os.environ.pop("UNSTRUCTURED_API_URL", None)