diff --git a/nextcloud_mcp_server/observability/logging_config.py b/nextcloud_mcp_server/observability/logging_config.py index 0af7dfa..b1bd3f4 100644 --- a/nextcloud_mcp_server/observability/logging_config.py +++ b/nextcloud_mcp_server/observability/logging_config.py @@ -37,19 +37,18 @@ class HealthCheckFilter(logging.Filter): """ # Check if the log message contains health check endpoints message = record.getMessage() - return ( - not any( - endpoint in message - for endpoint in [ - "/health/live", - "/health/ready", - "/metrics", - "/app/vector-sync/status", - ] - ) - or "OpenCV not installed. Disabling OCR" in message + health_check = any( + endpoint in message + for endpoint in [ + "/health/live", + "/health/ready", + "/metrics", + "/app/vector-sync/status", + ] ) + return not health_check + class TraceContextFormatter(JsonFormatter): """ diff --git a/nextcloud_mcp_server/vector/processor.py b/nextcloud_mcp_server/vector/processor.py index ab2bc55..d3e8607 100644 --- a/nextcloud_mcp_server/vector/processor.py +++ b/nextcloud_mcp_server/vector/processor.py @@ -389,6 +389,7 @@ async def _index_document( "user_id": doc_task.user_id, "doc_id": doc_task.doc_id, "doc_type": doc_task.doc_type, + "is_placeholder": False, # Real indexed document (not placeholder) "title": title, "excerpt": chunk.text[:200], "indexed_at": indexed_at, diff --git a/tools/parse-doc.py b/tools/parse-doc.py new file mode 100644 index 0000000..78fd987 --- /dev/null +++ b/tools/parse-doc.py @@ -0,0 +1,41 @@ +import logging +import pathlib + +import anyio +import pymupdf +import pymupdf.layout + +from nextcloud_mcp_server.client import NextcloudClient + +pymupdf.layout.activate() +import pymupdf4llm # noqa: E402 + +client = NextcloudClient.from_env() +logger = logging.getLogger(__name__) + +TMP_DIR = pathlib.Path("/tmp/tmp-images") +TMP_DIR.mkdir(exist_ok=True, parents=True) + + +async def print_markdown(filename): + content, _ = await client.webdav.read_file(filename) + doc = pymupdf.open("pdf", content) + md_text = pymupdf4llm.to_markdown(doc, write_images=True, image_path=str(TMP_DIR)) + print(md_text) + + +async def run1(): + response = await client.webdav.find_by_type("application/pdf") + # print(response) + for file in response: + await print_markdown(file["path"]) + + +async def run(): + tags = await client.tags.get_all_tags() + print(tags) + + +if __name__ == "__main__": + logging.basicConfig(level="INFO") + anyio.run(run)