173 lines
6.5 KiB
Python
173 lines
6.5 KiB
Python
"""Unit tests for Unstructured API configuration."""
|
|
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from nextcloud_mcp_server.client.unstructured_client import UnstructuredClient
|
|
from nextcloud_mcp_server.config import (
|
|
get_unstructured_languages,
|
|
get_unstructured_strategy,
|
|
)
|
|
|
|
|
|
class TestUnstructuredStrategy:
|
|
"""Test strategy configuration."""
|
|
|
|
def test_strategy_default(self):
|
|
"""Test that strategy defaults to 'auto'."""
|
|
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
|
assert get_unstructured_strategy() == "auto"
|
|
|
|
def test_strategy_custom_auto(self):
|
|
"""Test custom strategy 'auto'."""
|
|
os.environ["UNSTRUCTURED_STRATEGY"] = "auto"
|
|
try:
|
|
assert get_unstructured_strategy() == "auto"
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
|
|
|
def test_strategy_custom_fast(self):
|
|
"""Test custom strategy 'fast'."""
|
|
os.environ["UNSTRUCTURED_STRATEGY"] = "fast"
|
|
try:
|
|
assert get_unstructured_strategy() == "fast"
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
|
|
|
def test_strategy_custom_hi_res(self):
|
|
"""Test custom strategy 'hi_res'."""
|
|
os.environ["UNSTRUCTURED_STRATEGY"] = "hi_res"
|
|
try:
|
|
assert get_unstructured_strategy() == "hi_res"
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
|
|
|
def test_strategy_invalid_fallback(self, caplog):
|
|
"""Test that invalid strategy falls back to 'hi_res'."""
|
|
import logging
|
|
|
|
os.environ["UNSTRUCTURED_STRATEGY"] = "invalid_strategy"
|
|
try:
|
|
# Ensure logging is captured at WARNING level
|
|
with caplog.at_level(logging.WARNING):
|
|
strategy = get_unstructured_strategy()
|
|
assert strategy == "hi_res"
|
|
assert "Invalid UNSTRUCTURED_STRATEGY" in caplog.text
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
|
|
|
def test_strategy_case_insensitive(self):
|
|
"""Test that strategy is case-insensitive."""
|
|
os.environ["UNSTRUCTURED_STRATEGY"] = "HI_RES"
|
|
try:
|
|
assert get_unstructured_strategy() == "hi_res"
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
|
|
|
|
|
class TestUnstructuredLanguages:
|
|
"""Test language configuration."""
|
|
|
|
def test_languages_default(self):
|
|
"""Test that languages default to English and German."""
|
|
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
|
assert get_unstructured_languages() == ["eng", "deu"]
|
|
|
|
def test_languages_single(self):
|
|
"""Test single language configuration."""
|
|
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng"
|
|
try:
|
|
assert get_unstructured_languages() == ["eng"]
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
|
|
|
def test_languages_multiple(self):
|
|
"""Test multiple languages configuration."""
|
|
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng,fra,spa"
|
|
try:
|
|
assert get_unstructured_languages() == ["eng", "fra", "spa"]
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
|
|
|
def test_languages_whitespace_trimming(self):
|
|
"""Test that whitespace is trimmed from language codes."""
|
|
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng, deu , fra "
|
|
try:
|
|
assert get_unstructured_languages() == ["eng", "deu", "fra"]
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
|
|
|
def test_languages_empty_fallback(self, caplog):
|
|
"""Test that empty languages string falls back to default."""
|
|
import logging
|
|
|
|
os.environ["UNSTRUCTURED_LANGUAGES"] = ""
|
|
try:
|
|
with caplog.at_level(logging.WARNING):
|
|
languages = get_unstructured_languages()
|
|
assert languages == ["eng", "deu"]
|
|
assert "No languages specified" in caplog.text
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
|
|
|
def test_languages_only_whitespace_fallback(self, caplog):
|
|
"""Test that whitespace-only string falls back to default."""
|
|
import logging
|
|
|
|
os.environ["UNSTRUCTURED_LANGUAGES"] = " , , "
|
|
try:
|
|
with caplog.at_level(logging.WARNING):
|
|
languages = get_unstructured_languages()
|
|
assert languages == ["eng", "deu"]
|
|
assert "No languages specified" in caplog.text
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
|
|
|
|
|
class TestUnstructuredClientConfiguration:
|
|
"""Test that UnstructuredClient respects configuration."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_client_uses_default_strategy(self):
|
|
"""Test that client uses default strategy from environment."""
|
|
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
|
os.environ["UNSTRUCTURED_API_URL"] = "http://test:8000"
|
|
|
|
try:
|
|
_client = UnstructuredClient()
|
|
# The partition_document method should use get_unstructured_strategy() when strategy is None
|
|
# We can't test the actual call without a running API, but we can verify the config is read
|
|
assert get_unstructured_strategy() == "auto"
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_API_URL", None)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_client_uses_default_languages(self):
|
|
"""Test that client uses default languages from environment."""
|
|
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
|
os.environ["UNSTRUCTURED_API_URL"] = "http://test:8000"
|
|
|
|
try:
|
|
_client = UnstructuredClient()
|
|
# The partition_document method should use get_unstructured_languages() when languages is None
|
|
assert get_unstructured_languages() == ["eng", "deu"]
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_API_URL", None)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_client_uses_custom_configuration(self):
|
|
"""Test that client uses custom configuration from environment."""
|
|
os.environ["UNSTRUCTURED_STRATEGY"] = "hi_res"
|
|
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng,fra,spa"
|
|
os.environ["UNSTRUCTURED_API_URL"] = "http://test:8000"
|
|
|
|
try:
|
|
_client = UnstructuredClient()
|
|
assert get_unstructured_strategy() == "hi_res"
|
|
assert get_unstructured_languages() == ["eng", "fra", "spa"]
|
|
finally:
|
|
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
|
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
|
os.environ.pop("UNSTRUCTURED_API_URL", None)
|