fix: Set is_placeholder=False in processor to fix search filtering
The processor was not setting is_placeholder field when writing real document chunks to Qdrant. This caused the placeholder filter to exclude all documents (since None != False), resulting in 0 search results. Now explicitly sets is_placeholder: False in payload when writing real indexed chunks, allowing search filters to correctly distinguish between placeholders and real documents.
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
import logging
|
||||
import pathlib
|
||||
|
||||
import anyio
|
||||
import pymupdf
|
||||
import pymupdf.layout
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
|
||||
pymupdf.layout.activate()
|
||||
import pymupdf4llm # noqa: E402
|
||||
|
||||
client = NextcloudClient.from_env()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TMP_DIR = pathlib.Path("/tmp/tmp-images")
|
||||
TMP_DIR.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
|
||||
async def print_markdown(filename):
|
||||
content, _ = await client.webdav.read_file(filename)
|
||||
doc = pymupdf.open("pdf", content)
|
||||
md_text = pymupdf4llm.to_markdown(doc, write_images=True, image_path=str(TMP_DIR))
|
||||
print(md_text)
|
||||
|
||||
|
||||
async def run1():
|
||||
response = await client.webdav.find_by_type("application/pdf")
|
||||
# print(response)
|
||||
for file in response:
|
||||
await print_markdown(file["path"])
|
||||
|
||||
|
||||
async def run():
|
||||
tags = await client.tags.get_all_tags()
|
||||
print(tags)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level="INFO")
|
||||
anyio.run(run)
|
||||
Reference in New Issue
Block a user