Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 626c4bf562 | |||
| a56b3f3d51 | |||
| 2896fa1dc9 | |||
| 04251401aa |
@@ -3,6 +3,10 @@ name: RAG Evaluation
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
manual_path:
|
||||
description: 'Path to Nextcloud User Manual PDF in Nextcloud'
|
||||
required: false
|
||||
default: 'Nextcloud Manual.pdf'
|
||||
embedding_model:
|
||||
description: 'OpenAI embedding model'
|
||||
required: false
|
||||
@@ -15,40 +19,15 @@ on:
|
||||
jobs:
|
||||
rag-evaluation:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 45
|
||||
timeout-minutes: 30
|
||||
permissions:
|
||||
models: read
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
- name: Clone Nextcloud documentation
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
repository: 'nextcloud/documentation'
|
||||
path: 'nextcloud-docs'
|
||||
|
||||
- name: Install Sphinx and LaTeX dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
python3-sphinx \
|
||||
python3-pip \
|
||||
latexmk \
|
||||
texlive-latex-recommended \
|
||||
texlive-latex-extra \
|
||||
texlive-fonts-recommended \
|
||||
texlive-fonts-extra
|
||||
|
||||
- name: Build User Manual PDF
|
||||
run: |
|
||||
cd nextcloud-docs/user_manual
|
||||
pip3 install -r ../requirements.txt
|
||||
make latexpdf
|
||||
ls -la _build/latex/
|
||||
cp _build/latex/NextcloudUserManual.pdf ../../Nextcloud_User_Manual.pdf
|
||||
echo "PDF built successfully"
|
||||
|
||||
###### Required to build OIDC App ######
|
||||
- name: Set up php 8.4
|
||||
uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
|
||||
@@ -111,149 +90,12 @@ jobs:
|
||||
done
|
||||
echo "MCP server is ready."
|
||||
|
||||
- name: Upload User Manual PDF to Nextcloud
|
||||
run: |
|
||||
echo "Uploading Nextcloud_User_Manual.pdf to Nextcloud..."
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -u admin:admin \
|
||||
-X PUT \
|
||||
-T Nextcloud_User_Manual.pdf \
|
||||
"http://localhost:8080/remote.php/dav/files/admin/Nextcloud_User_Manual.pdf")
|
||||
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "204" ]; then
|
||||
echo "PDF uploaded successfully (HTTP $HTTP_CODE)"
|
||||
else
|
||||
echo "Failed to upload PDF (HTTP $HTTP_CODE)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Create vector-index tag
|
||||
id: create_tag
|
||||
run: |
|
||||
# Create the tag using OCS API
|
||||
echo "Creating vector-index tag..."
|
||||
RESPONSE=$(curl -s -u admin:admin \
|
||||
-X POST \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'OCS-APIRequest: true' \
|
||||
-d '{"name":"vector-index","userVisible":true,"userAssignable":true}' \
|
||||
"http://localhost:8080/ocs/v2.php/apps/systemtags/api/v1/tags")
|
||||
|
||||
echo "Create tag response: $RESPONSE"
|
||||
|
||||
# Get tag ID from response or lookup
|
||||
TAG_ID=$(echo "$RESPONSE" | grep -oP '(?<="id":)[0-9]+' | head -1 || echo "")
|
||||
|
||||
if [ -z "$TAG_ID" ]; then
|
||||
echo "Tag may already exist, looking it up..."
|
||||
TAG_ID=$(curl -s -u admin:admin \
|
||||
-X PROPFIND \
|
||||
-H 'Content-Type: application/xml' \
|
||||
-d '<?xml version="1.0"?><d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns"><d:prop><oc:id/><oc:display-name/></d:prop></d:propfind>' \
|
||||
http://localhost:8080/remote.php/dav/systemtags/ \
|
||||
| grep -B2 "vector-index" | grep -oP '(?<=<oc:id>)[0-9]+(?=</oc:id>)' | head -1 || echo "")
|
||||
fi
|
||||
|
||||
if [ -z "$TAG_ID" ]; then
|
||||
echo "ERROR: Could not create or find vector-index tag"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Tag ID: $TAG_ID"
|
||||
echo "tag_id=$TAG_ID" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Get file ID of uploaded PDF
|
||||
id: get_file_id
|
||||
run: |
|
||||
echo "Getting file ID for Nextcloud_User_Manual.pdf..."
|
||||
|
||||
# Get file ID using PROPFIND
|
||||
FILE_ID=$(curl -s -u admin:admin \
|
||||
-X PROPFIND \
|
||||
-H 'Content-Type: application/xml' \
|
||||
-d '<?xml version="1.0"?><d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns"><d:prop><oc:fileid/></d:prop></d:propfind>' \
|
||||
"http://localhost:8080/remote.php/dav/files/admin/Nextcloud_User_Manual.pdf" \
|
||||
| grep -oP '(?<=<oc:fileid>)[0-9]+(?=</oc:fileid>)' || echo "")
|
||||
|
||||
if [ -z "$FILE_ID" ]; then
|
||||
echo "ERROR: Could not find file ID"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Found file ID: $FILE_ID"
|
||||
echo "file_id=$FILE_ID" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Tag file with vector-index
|
||||
env:
|
||||
FILE_ID: ${{ steps.get_file_id.outputs.file_id }}
|
||||
TAG_ID: ${{ steps.create_tag.outputs.tag_id }}
|
||||
run: |
|
||||
echo "Tagging file $FILE_ID with tag $TAG_ID..."
|
||||
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -u admin:admin \
|
||||
-X PUT \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Content-Length: 0' \
|
||||
"http://localhost:8080/remote.php/dav/systemtags-relations/files/$FILE_ID/$TAG_ID")
|
||||
|
||||
if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "409" ]; then
|
||||
echo "File tagged successfully (HTTP $HTTP_CODE)"
|
||||
else
|
||||
echo "Failed to tag file (HTTP $HTTP_CODE)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Wait for vector sync to complete indexing
|
||||
env:
|
||||
NEXTCLOUD_HOST: "http://localhost:8080"
|
||||
NEXTCLOUD_USERNAME: "admin"
|
||||
NEXTCLOUD_PASSWORD: "admin"
|
||||
run: |
|
||||
echo "Waiting for vector sync to index the manual..."
|
||||
max_attempts=60
|
||||
attempt=0
|
||||
|
||||
# Wait for initial scan to pick up the file
|
||||
sleep 10
|
||||
|
||||
while [ $attempt -lt $max_attempts ]; do
|
||||
attempt=$((attempt + 1))
|
||||
|
||||
# Check vector sync status via MCP
|
||||
STATUS=$(curl -s http://localhost:8000/health || echo "{}")
|
||||
echo "Attempt $attempt/$max_attempts: $STATUS"
|
||||
|
||||
# Also check indexed count via semantic search
|
||||
# If we get results, indexing is done
|
||||
RESULT=$(curl -s -X POST http://localhost:8000/mcp \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"nc_get_vector_sync_status","arguments":{}}}' \
|
||||
2>/dev/null || echo "{}")
|
||||
|
||||
echo "Vector sync status: $RESULT"
|
||||
|
||||
# Check if pending is 0 and indexed > 0
|
||||
INDEXED=$(echo "$RESULT" | jq -r '.result.structuredContent.indexed // 0' 2>/dev/null || echo "0")
|
||||
PENDING=$(echo "$RESULT" | jq -r '.result.structuredContent.pending // 1' 2>/dev/null || echo "1")
|
||||
|
||||
echo "Indexed: $INDEXED, Pending: $PENDING"
|
||||
|
||||
if [ "$INDEXED" -gt "0" ] && [ "$PENDING" -eq "0" ]; then
|
||||
echo "Indexing complete! $INDEXED documents indexed."
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 10
|
||||
done
|
||||
|
||||
if [ $attempt -ge $max_attempts ]; then
|
||||
echo "WARNING: Indexing may not be complete, proceeding anyway..."
|
||||
fi
|
||||
|
||||
- name: Run RAG evaluation tests
|
||||
env:
|
||||
NEXTCLOUD_HOST: "http://localhost:8080"
|
||||
NEXTCLOUD_USERNAME: "admin"
|
||||
NEXTCLOUD_PASSWORD: "admin"
|
||||
RAG_MANUAL_PATH: ${{ inputs.manual_path }}
|
||||
OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
|
||||
OPENAI_BASE_URL: "https://models.github.ai/inference"
|
||||
OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
## v0.48.0 (2025-11-23)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add tag management methods to WebDAV client
|
||||
|
||||
## v0.47.0 (2025-11-23)
|
||||
|
||||
### Feat
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
```markdown
|
||||
<p align="center">
|
||||
<img src="astrolabe.svg" alt="Nextcloud MCP Server" width="128" height="128">
|
||||
</p>
|
||||
|
||||
# Nextcloud MCP Server
|
||||
|
||||
[](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
|
||||
[](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)
|
||||
[](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
|
||||
|
||||
**A production-ready MCP server that connects AI assistants to your Nextcloud instance.**
|
||||
|
||||
@@ -224,4 +223,3 @@ This project is licensed under the AGPL-3.0 License. See [LICENSE](./LICENSE) fo
|
||||
- [Model Context Protocol](https://github.com/modelcontextprotocol)
|
||||
- [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk)
|
||||
- [Nextcloud](https://nextcloud.com/)
|
||||
```
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: nextcloud-mcp-server
|
||||
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
|
||||
type: application
|
||||
version: 0.47.0
|
||||
appVersion: "0.47.0"
|
||||
version: 0.48.0
|
||||
appVersion: "0.48.0"
|
||||
keywords:
|
||||
- nextcloud
|
||||
- mcp
|
||||
|
||||
@@ -1295,3 +1295,233 @@ class WebDAVClient(BaseNextcloudClient):
|
||||
|
||||
logger.debug(f"Found {len(files)} files with tag ID {tag_id}")
|
||||
return files
|
||||
|
||||
async def get_file_info(self, path: str) -> dict[str, Any] | None:
|
||||
"""Get file info including file ID via WebDAV PROPFIND.
|
||||
|
||||
Args:
|
||||
path: Path to the file (relative to user's files directory)
|
||||
|
||||
Returns:
|
||||
File info dictionary with id, name, size, content_type, etc.
|
||||
Returns None if file not found.
|
||||
"""
|
||||
webdav_path = f"{self._get_webdav_base_path()}/{path.lstrip('/')}"
|
||||
|
||||
propfind_body = """<?xml version="1.0"?>
|
||||
<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:prop>
|
||||
<oc:fileid/>
|
||||
<d:displayname/>
|
||||
<d:getcontentlength/>
|
||||
<d:getcontenttype/>
|
||||
<d:getlastmodified/>
|
||||
<d:getetag/>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
</d:propfind>"""
|
||||
|
||||
try:
|
||||
response = await self._client.request(
|
||||
"PROPFIND",
|
||||
webdav_path,
|
||||
headers={"Depth": "0"},
|
||||
content=propfind_body,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
logger.debug(f"File not found: {path}")
|
||||
return None
|
||||
raise
|
||||
|
||||
# Parse XML response
|
||||
root = ET.fromstring(response.content)
|
||||
ns = {
|
||||
"d": "DAV:",
|
||||
"oc": "http://owncloud.org/ns",
|
||||
}
|
||||
|
||||
response_elem = root.find("d:response", ns)
|
||||
if response_elem is None:
|
||||
return None
|
||||
|
||||
propstat = response_elem.find("d:propstat", ns)
|
||||
if propstat is None:
|
||||
return None
|
||||
|
||||
prop = propstat.find("d:prop", ns)
|
||||
if prop is None:
|
||||
return None
|
||||
|
||||
# Extract properties
|
||||
fileid_elem = prop.find("oc:fileid", ns)
|
||||
displayname_elem = prop.find("d:displayname", ns)
|
||||
contentlength_elem = prop.find("d:getcontentlength", ns)
|
||||
contenttype_elem = prop.find("d:getcontenttype", ns)
|
||||
lastmodified_elem = prop.find("d:getlastmodified", ns)
|
||||
etag_elem = prop.find("d:getetag", ns)
|
||||
resourcetype_elem = prop.find("d:resourcetype", ns)
|
||||
|
||||
is_directory = (
|
||||
resourcetype_elem is not None
|
||||
and resourcetype_elem.find("d:collection", ns) is not None
|
||||
)
|
||||
|
||||
file_info = {
|
||||
"id": int(fileid_elem.text) if fileid_elem is not None else None,
|
||||
"path": path,
|
||||
"name": displayname_elem.text
|
||||
if displayname_elem is not None
|
||||
else path.split("/")[-1],
|
||||
"size": int(contentlength_elem.text)
|
||||
if contentlength_elem is not None and contentlength_elem.text
|
||||
else 0,
|
||||
"content_type": contenttype_elem.text
|
||||
if contenttype_elem is not None
|
||||
else "",
|
||||
"last_modified": lastmodified_elem.text
|
||||
if lastmodified_elem is not None
|
||||
else None,
|
||||
"etag": etag_elem.text.strip('"')
|
||||
if etag_elem is not None and etag_elem.text
|
||||
else None,
|
||||
"is_directory": is_directory,
|
||||
}
|
||||
|
||||
logger.debug(f"Got file info for '{path}': id={file_info['id']}")
|
||||
return file_info
|
||||
|
||||
async def create_tag(
|
||||
self,
|
||||
name: str,
|
||||
user_visible: bool = True,
|
||||
user_assignable: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Create a system tag via OCS API.
|
||||
|
||||
Args:
|
||||
name: Name of the tag to create
|
||||
user_visible: Whether the tag is visible to users
|
||||
user_assignable: Whether users can assign this tag
|
||||
|
||||
Returns:
|
||||
Tag dictionary with id, name, userVisible, userAssignable
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag creation fails (409 if already exists)
|
||||
"""
|
||||
response = await self._client.post(
|
||||
"/ocs/v2.php/apps/systemtags/api/v1/tags",
|
||||
headers={
|
||||
"OCS-APIRequest": "true",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json={
|
||||
"name": name,
|
||||
"userVisible": user_visible,
|
||||
"userAssignable": user_assignable,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse OCS response
|
||||
data = response.json()
|
||||
ocs_data = data.get("ocs", {}).get("data", {})
|
||||
|
||||
tag_info = {
|
||||
"id": ocs_data.get("id"),
|
||||
"name": ocs_data.get("name", name),
|
||||
"userVisible": ocs_data.get("userVisible", user_visible),
|
||||
"userAssignable": ocs_data.get("userAssignable", user_assignable),
|
||||
}
|
||||
|
||||
logger.info(f"Created tag '{name}' with ID {tag_info['id']}")
|
||||
return tag_info
|
||||
|
||||
async def get_or_create_tag(
|
||||
self,
|
||||
name: str,
|
||||
user_visible: bool = True,
|
||||
user_assignable: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Get a tag by name, creating it if it doesn't exist.
|
||||
|
||||
Args:
|
||||
name: Name of the tag
|
||||
user_visible: Whether the tag is visible to users (for creation)
|
||||
user_assignable: Whether users can assign this tag (for creation)
|
||||
|
||||
Returns:
|
||||
Tag dictionary with id, name, userVisible, userAssignable
|
||||
"""
|
||||
# First try to get existing tag
|
||||
existing_tag = await self.get_tag_by_name(name)
|
||||
if existing_tag:
|
||||
logger.debug(f"Tag '{name}' already exists with ID {existing_tag['id']}")
|
||||
return existing_tag
|
||||
|
||||
# Create new tag
|
||||
try:
|
||||
return await self.create_tag(name, user_visible, user_assignable)
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 409:
|
||||
# Tag was created between our check and creation, fetch it
|
||||
existing_tag = await self.get_tag_by_name(name)
|
||||
if existing_tag:
|
||||
return existing_tag
|
||||
raise
|
||||
|
||||
async def assign_tag_to_file(self, file_id: int, tag_id: int) -> bool:
|
||||
"""Assign a system tag to a file.
|
||||
|
||||
Args:
|
||||
file_id: Numeric file ID
|
||||
tag_id: Numeric tag ID
|
||||
|
||||
Returns:
|
||||
True if tag was assigned successfully (or already assigned)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag assignment fails
|
||||
"""
|
||||
response = await self._client.request(
|
||||
"PUT",
|
||||
f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
|
||||
headers={"Content-Length": "0"},
|
||||
content=b"",
|
||||
)
|
||||
|
||||
# 201 = Created (new assignment), 409 = Conflict (already assigned)
|
||||
if response.status_code in (201, 409):
|
||||
logger.info(f"Tagged file {file_id} with tag {tag_id}")
|
||||
return True
|
||||
|
||||
response.raise_for_status()
|
||||
return True
|
||||
|
||||
async def remove_tag_from_file(self, file_id: int, tag_id: int) -> bool:
|
||||
"""Remove a system tag from a file.
|
||||
|
||||
Args:
|
||||
file_id: Numeric file ID
|
||||
tag_id: Numeric tag ID
|
||||
|
||||
Returns:
|
||||
True if tag was removed successfully (or wasn't assigned)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag removal fails
|
||||
"""
|
||||
response = await self._client.request(
|
||||
"DELETE",
|
||||
f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
|
||||
)
|
||||
|
||||
# 204 = No Content (removed), 404 = Not Found (wasn't assigned)
|
||||
if response.status_code in (204, 404):
|
||||
logger.info(f"Removed tag {tag_id} from file {file_id}")
|
||||
return True
|
||||
|
||||
response.raise_for_status()
|
||||
return True
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "nextcloud-mcp-server"
|
||||
version = "0.47.0"
|
||||
version = "0.48.0"
|
||||
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
|
||||
authors = [
|
||||
{name = "Chris Coutinho", email = "chris@coutinho.io"}
|
||||
|
||||
@@ -10,6 +10,7 @@ Environment Variables:
|
||||
OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
|
||||
OPENAI_EMBEDDING_MODEL: Embedding model (default: "text-embedding-3-small")
|
||||
OPENAI_GENERATION_MODEL: Generation model for sampling (default: "gpt-4o-mini")
|
||||
RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: "Nextcloud_User_Manual.pdf")
|
||||
|
||||
For GitHub CI, set:
|
||||
OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -18,15 +19,17 @@ For GitHub CI, set:
|
||||
OPENAI_GENERATION_MODEL: openai/gpt-4o-mini
|
||||
|
||||
Prerequisites:
|
||||
- Nextcloud User Manual indexed in Qdrant (via vector sync)
|
||||
- Nextcloud User Manual PDF uploaded to Nextcloud
|
||||
- VECTOR_SYNC_ENABLED=true on the MCP server
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, AsyncGenerator
|
||||
|
||||
import anyio
|
||||
import pytest
|
||||
from mcp import ClientSession
|
||||
|
||||
@@ -34,6 +37,11 @@ from nextcloud_mcp_server.providers.openai import OpenAIProvider
|
||||
from tests.conftest import create_mcp_client_session
|
||||
from tests.integration.sampling_support import create_openai_sampling_callback
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default path to the Nextcloud User Manual PDF
|
||||
DEFAULT_MANUAL_PATH = "Nextcloud Manual.pdf"
|
||||
|
||||
# Skip all tests if OpenAI API key not configured
|
||||
pytestmark = [
|
||||
pytest.mark.integration,
|
||||
@@ -58,6 +66,86 @@ def ground_truth_qa():
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def indexed_manual_pdf(nc_client, nc_mcp_client):
|
||||
"""Ensure the Nextcloud User Manual PDF is tagged and indexed for vector search.
|
||||
|
||||
This fixture:
|
||||
1. Gets file info for the manual PDF
|
||||
2. Creates/gets the 'vector-index' tag
|
||||
3. Assigns the tag to the file
|
||||
4. Waits for vector sync to complete indexing
|
||||
|
||||
Environment Variables:
|
||||
RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: Nextcloud Manual.pdf)
|
||||
"""
|
||||
manual_path = os.getenv("RAG_MANUAL_PATH", DEFAULT_MANUAL_PATH)
|
||||
|
||||
logger.info(f"Setting up indexed manual PDF: {manual_path}")
|
||||
|
||||
# Get file info to verify file exists and get file ID
|
||||
file_info = await nc_client.webdav.get_file_info(manual_path)
|
||||
if not file_info:
|
||||
pytest.skip(f"Manual PDF not found at '{manual_path}'")
|
||||
|
||||
file_id = file_info["id"]
|
||||
logger.info(f"Found manual PDF: {manual_path} (file_id={file_id})")
|
||||
|
||||
# Create or get the vector-index tag
|
||||
tag = await nc_client.webdav.get_or_create_tag("vector-index")
|
||||
tag_id = tag["id"]
|
||||
logger.info(f"Using tag 'vector-index' (tag_id={tag_id})")
|
||||
|
||||
# Assign tag to file
|
||||
await nc_client.webdav.assign_tag_to_file(file_id, tag_id)
|
||||
logger.info(f"Tagged file {file_id} with vector-index tag")
|
||||
|
||||
# Wait for vector sync to complete indexing
|
||||
max_attempts = 60
|
||||
poll_interval = 10
|
||||
|
||||
logger.info("Waiting for vector sync to index the manual...")
|
||||
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
try:
|
||||
# Call the MCP tool via the existing client session
|
||||
result = await nc_mcp_client.call_tool(
|
||||
"nc_get_vector_sync_status",
|
||||
arguments={},
|
||||
)
|
||||
|
||||
if not result.isError:
|
||||
content = result.structuredContent or {}
|
||||
indexed = content.get("indexed_count", 0)
|
||||
pending = content.get("pending_count", 1)
|
||||
|
||||
logger.info(
|
||||
f"Attempt {attempt}/{max_attempts}: "
|
||||
f"indexed={indexed}, pending={pending}"
|
||||
)
|
||||
|
||||
if indexed > 0 and pending == 0:
|
||||
logger.info(
|
||||
f"Vector indexing complete: {indexed} documents indexed"
|
||||
)
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Attempt {attempt}: Error checking status: {e}")
|
||||
|
||||
if attempt < max_attempts:
|
||||
await anyio.sleep(poll_interval)
|
||||
else:
|
||||
logger.warning(
|
||||
f"Vector indexing may not be complete after {max_attempts} attempts"
|
||||
)
|
||||
|
||||
yield {
|
||||
"path": manual_path,
|
||||
"file_id": file_id,
|
||||
"tag_id": tag_id,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def openai_provider():
|
||||
"""OpenAI provider configured from environment (embeddings only)."""
|
||||
@@ -129,7 +217,9 @@ async def test_openai_embeddings_work(openai_provider: OpenAIProvider):
|
||||
assert len(embedding) in [1536, 3072]
|
||||
|
||||
|
||||
async def test_semantic_search_retrieval(nc_mcp_client, ground_truth_qa):
|
||||
async def test_semantic_search_retrieval(
|
||||
nc_mcp_client, ground_truth_qa, indexed_manual_pdf
|
||||
):
|
||||
"""Test that semantic search retrieves relevant documents from the manual.
|
||||
|
||||
This tests the retrieval component of RAG - ensuring that queries
|
||||
@@ -167,7 +257,7 @@ async def test_semantic_search_retrieval(nc_mcp_client, ground_truth_qa):
|
||||
|
||||
|
||||
async def test_semantic_search_answer_with_sampling(
|
||||
nc_mcp_client_with_sampling, ground_truth_qa
|
||||
nc_mcp_client_with_sampling, ground_truth_qa, indexed_manual_pdf
|
||||
):
|
||||
"""Test semantic search with MCP sampling for answer generation.
|
||||
|
||||
@@ -243,7 +333,7 @@ async def test_semantic_search_answer_with_sampling(
|
||||
],
|
||||
)
|
||||
async def test_retrieval_quality_all_queries(
|
||||
nc_mcp_client, ground_truth_qa, qa_index, min_expected_results
|
||||
nc_mcp_client, ground_truth_qa, indexed_manual_pdf, qa_index, min_expected_results
|
||||
):
|
||||
"""Test retrieval quality for all ground truth queries.
|
||||
|
||||
@@ -274,7 +364,7 @@ async def test_retrieval_quality_all_queries(
|
||||
)
|
||||
|
||||
|
||||
async def test_no_results_for_unrelated_query(nc_mcp_client):
|
||||
async def test_no_results_for_unrelated_query(nc_mcp_client, indexed_manual_pdf):
|
||||
"""Test that completely unrelated queries return low/no scores.
|
||||
|
||||
The Nextcloud manual shouldn't have relevant content for
|
||||
|
||||
@@ -117,3 +117,244 @@ def test_parse_search_response_with_empty_tags(mocker):
|
||||
assert len(results) == 1
|
||||
assert "tags" in results[0]
|
||||
assert results[0]["tags"] == []
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_get_file_info_returns_file_details(mocker):
|
||||
"""Test that get_file_info returns file info including file ID."""
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock PROPFIND response
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status_code = 207
|
||||
mock_response.content = b"""<?xml version="1.0"?>
|
||||
<d:multistatus xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:response>
|
||||
<d:href>/remote.php/dav/files/testuser/Documents/test.pdf</d:href>
|
||||
<d:propstat>
|
||||
<d:prop>
|
||||
<oc:fileid>12345</oc:fileid>
|
||||
<d:displayname>test.pdf</d:displayname>
|
||||
<d:getcontentlength>1024</d:getcontentlength>
|
||||
<d:getcontenttype>application/pdf</d:getcontenttype>
|
||||
<d:getlastmodified>Sat, 01 Jan 2025 00:00:00 GMT</d:getlastmodified>
|
||||
<d:getetag>"abc123"</d:getetag>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
</d:propstat>
|
||||
</d:response>
|
||||
</d:multistatus>"""
|
||||
mock_response.raise_for_status = mocker.Mock()
|
||||
|
||||
mock_http_client.request = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Call get_file_info
|
||||
result = await client.get_file_info("Documents/test.pdf")
|
||||
|
||||
# Verify result
|
||||
assert result is not None
|
||||
assert result["id"] == 12345
|
||||
assert result["name"] == "test.pdf"
|
||||
assert result["path"] == "Documents/test.pdf"
|
||||
assert result["content_type"] == "application/pdf"
|
||||
assert result["size"] == 1024
|
||||
assert result["etag"] == "abc123"
|
||||
assert result["is_directory"] is False
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_get_file_info_returns_none_for_missing_file(mocker):
|
||||
"""Test that get_file_info returns None for missing files."""
|
||||
from httpx import HTTPStatusError, Response
|
||||
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock 404 response
|
||||
mock_response = mocker.Mock(spec=Response)
|
||||
mock_response.status_code = 404
|
||||
mock_http_client.request = AsyncMock(
|
||||
side_effect=HTTPStatusError(
|
||||
"Not Found", request=mocker.Mock(), response=mock_response
|
||||
)
|
||||
)
|
||||
|
||||
# Call get_file_info
|
||||
result = await client.get_file_info("nonexistent.pdf")
|
||||
|
||||
# Verify result is None
|
||||
assert result is None
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_create_tag_creates_system_tag(mocker):
|
||||
"""Test that create_tag creates a system tag via OCS API."""
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock OCS response
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json = mocker.Mock(
|
||||
return_value={
|
||||
"ocs": {
|
||||
"data": {
|
||||
"id": 42,
|
||||
"name": "vector-index",
|
||||
"userVisible": True,
|
||||
"userAssignable": True,
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
mock_response.raise_for_status = mocker.Mock()
|
||||
|
||||
mock_http_client.post = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Call create_tag
|
||||
result = await client.create_tag("vector-index")
|
||||
|
||||
# Verify result
|
||||
assert result["id"] == 42
|
||||
assert result["name"] == "vector-index"
|
||||
assert result["userVisible"] is True
|
||||
assert result["userAssignable"] is True
|
||||
|
||||
# Verify API call
|
||||
mock_http_client.post.assert_called_once()
|
||||
call_args = mock_http_client.post.call_args
|
||||
assert call_args[0][0] == "/ocs/v2.php/apps/systemtags/api/v1/tags"
|
||||
assert call_args[1]["json"]["name"] == "vector-index"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_get_or_create_tag_returns_existing_tag(mocker):
|
||||
"""Test that get_or_create_tag returns existing tag without creating."""
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock existing tag
|
||||
mocker.patch.object(
|
||||
client,
|
||||
"get_tag_by_name",
|
||||
return_value={"id": 42, "name": "vector-index", "userVisible": True},
|
||||
)
|
||||
mock_create = mocker.patch.object(client, "create_tag")
|
||||
|
||||
# Call get_or_create_tag
|
||||
result = await client.get_or_create_tag("vector-index")
|
||||
|
||||
# Verify existing tag returned without creating
|
||||
assert result["id"] == 42
|
||||
mock_create.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_get_or_create_tag_creates_new_tag(mocker):
|
||||
"""Test that get_or_create_tag creates tag when not found."""
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock no existing tag
|
||||
mocker.patch.object(client, "get_tag_by_name", return_value=None)
|
||||
mocker.patch.object(
|
||||
client,
|
||||
"create_tag",
|
||||
return_value={"id": 42, "name": "vector-index", "userVisible": True},
|
||||
)
|
||||
|
||||
# Call get_or_create_tag
|
||||
result = await client.get_or_create_tag("vector-index")
|
||||
|
||||
# Verify tag was created
|
||||
assert result["id"] == 42
|
||||
client.create_tag.assert_called_once_with("vector-index", True, True)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_assign_tag_to_file_success(mocker):
|
||||
"""Test that assign_tag_to_file assigns tag via WebDAV."""
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock 201 Created response
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status_code = 201
|
||||
|
||||
mock_http_client.request = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Call assign_tag_to_file
|
||||
result = await client.assign_tag_to_file(12345, 42)
|
||||
|
||||
# Verify result
|
||||
assert result is True
|
||||
|
||||
# Verify API call
|
||||
mock_http_client.request.assert_called_once()
|
||||
call_args = mock_http_client.request.call_args
|
||||
assert call_args[0][0] == "PUT"
|
||||
assert "/systemtags-relations/files/12345/42" in call_args[0][1]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_assign_tag_to_file_already_assigned(mocker):
|
||||
"""Test that assign_tag_to_file handles already assigned (409) gracefully."""
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock 409 Conflict response (already assigned)
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status_code = 409
|
||||
|
||||
mock_http_client.request = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Call assign_tag_to_file
|
||||
result = await client.assign_tag_to_file(12345, 42)
|
||||
|
||||
# Verify result (should succeed even with 409)
|
||||
assert result is True
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_remove_tag_from_file_success(mocker):
|
||||
"""Test that remove_tag_from_file removes tag via WebDAV."""
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock 204 No Content response
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status_code = 204
|
||||
|
||||
mock_http_client.request = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Call remove_tag_from_file
|
||||
result = await client.remove_tag_from_file(12345, 42)
|
||||
|
||||
# Verify result
|
||||
assert result is True
|
||||
|
||||
# Verify API call
|
||||
mock_http_client.request.assert_called_once()
|
||||
call_args = mock_http_client.request.call_args
|
||||
assert call_args[0][0] == "DELETE"
|
||||
assert "/systemtags-relations/files/12345/42" in call_args[0][1]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
async def test_remove_tag_from_file_not_assigned(mocker):
|
||||
"""Test that remove_tag_from_file handles not assigned (404) gracefully."""
|
||||
mock_http_client = AsyncMock()
|
||||
client = WebDAVClient(mock_http_client, "testuser")
|
||||
|
||||
# Mock 404 Not Found response (tag wasn't assigned)
|
||||
mock_response = AsyncMock()
|
||||
mock_response.status_code = 404
|
||||
|
||||
mock_http_client.request = AsyncMock(return_value=mock_response)
|
||||
|
||||
# Call remove_tag_from_file
|
||||
result = await client.remove_tag_from_file(12345, 42)
|
||||
|
||||
# Verify result (should succeed even with 404)
|
||||
assert result is True
|
||||
|
||||
Reference in New Issue
Block a user