Files
Chris Coutinho 5a251a99e6 fix: Set is_placeholder=False in processor to fix search filtering
The processor was not setting is_placeholder field when writing real
document chunks to Qdrant. This caused the placeholder filter to exclude
all documents (since None != False), resulting in 0 search results.

Now explicitly sets is_placeholder: False in payload when writing real
indexed chunks, allowing search filters to correctly distinguish between
placeholders and real documents.
2025-11-20 17:15:19 +01:00

42 lines
941 B
Python

import logging
import pathlib
import anyio
import pymupdf
import pymupdf.layout
from nextcloud_mcp_server.client import NextcloudClient
pymupdf.layout.activate()
import pymupdf4llm # noqa: E402
client = NextcloudClient.from_env()
logger = logging.getLogger(__name__)
TMP_DIR = pathlib.Path("/tmp/tmp-images")
TMP_DIR.mkdir(exist_ok=True, parents=True)
async def print_markdown(filename):
content, _ = await client.webdav.read_file(filename)
doc = pymupdf.open("pdf", content)
md_text = pymupdf4llm.to_markdown(doc, write_images=True, image_path=str(TMP_DIR))
print(md_text)
async def run1():
response = await client.webdav.find_by_type("application/pdf")
# print(response)
for file in response:
await print_markdown(file["path"])
async def run():
tags = await client.tags.get_all_tags()
print(tags)
if __name__ == "__main__":
logging.basicConfig(level="INFO")
anyio.run(run)