diff --git a/.github/workflows/rag-evaluation.yml b/.github/workflows/rag-evaluation.yml
new file mode 100644
index 0000000..9804881
--- /dev/null
+++ b/.github/workflows/rag-evaluation.yml
@@ -0,0 +1,271 @@
+name: RAG Evaluation
+
+on:
+ workflow_dispatch:
+ inputs:
+ embedding_model:
+ description: 'OpenAI embedding model'
+ required: false
+ default: 'openai/text-embedding-3-small'
+ generation_model:
+ description: 'OpenAI generation model'
+ required: false
+ default: 'openai/gpt-4o-mini'
+
+jobs:
+ rag-evaluation:
+ runs-on: ubuntu-latest
+ timeout-minutes: 45
+
+ steps:
+ - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+ with:
+ submodules: 'true'
+
+ - name: Clone Nextcloud documentation
+ uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+ with:
+ repository: 'nextcloud/documentation'
+ path: 'nextcloud-docs'
+
+ - name: Install Sphinx and LaTeX dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y \
+ python3-sphinx \
+ python3-pip \
+ latexmk \
+ texlive-latex-recommended \
+ texlive-latex-extra \
+ texlive-fonts-recommended \
+ texlive-fonts-extra
+
+ - name: Build User Manual PDF
+ run: |
+ cd nextcloud-docs/user_manual
+ pip3 install -r ../requirements.txt
+ make latexpdf
+ ls -la _build/latex/
+ cp _build/latex/NextcloudUserManual.pdf ../../Nextcloud_User_Manual.pdf
+ echo "PDF built successfully"
+
+ ###### Required to build OIDC App ######
+ - name: Set up php 8.4
+ uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
+ with:
+ php-version: 8.4
+ coverage: none
+
+ - name: Install OIDC app composer dependencies
+ run: |
+ cd third_party/oidc
+ composer install --no-dev
+ ###### Required to build OIDC App ######
+
+ - name: Run docker compose with vector sync
+ uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
+ with:
+ compose-file: "./docker-compose.yml"
+ up-flags: "--build"
+ env:
+ # Override MCP container environment for OpenAI + vector sync
+ VECTOR_SYNC_ENABLED: "true"
+ VECTOR_SYNC_SCAN_INTERVAL: "30"
+ OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+ OPENAI_BASE_URL: "https://models.github.ai/inference"
+ OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+ OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+
+ - name: Install the latest version of uv
+ uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
+
+ - name: Wait for Nextcloud to be ready
+ run: |
+ echo "Waiting for Nextcloud..."
+ max_attempts=60
+ attempt=0
+ until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
+ attempt=$((attempt + 1))
+ if [ $attempt -ge $max_attempts ]; then
+ echo "Service did not become ready in time."
+ exit 1
+ fi
+ echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
+ sleep 5
+ done
+ echo "Nextcloud is ready."
+
+ - name: Wait for MCP server to be ready
+ run: |
+ echo "Waiting for MCP server..."
+ max_attempts=30
+ attempt=0
+ until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8000/health | grep -q "200"; do
+ attempt=$((attempt + 1))
+ if [ $attempt -ge $max_attempts ]; then
+ echo "MCP server did not become ready in time."
+ exit 1
+ fi
+ echo "Attempt $attempt/$max_attempts: MCP not ready, sleeping for 2 seconds..."
+ sleep 2
+ done
+ echo "MCP server is ready."
+
+ - name: Upload User Manual PDF to Nextcloud
+ run: |
+ echo "Uploading Nextcloud_User_Manual.pdf to Nextcloud..."
+ HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -u admin:admin \
+ -X PUT \
+ -T Nextcloud_User_Manual.pdf \
+ "http://localhost:8080/remote.php/dav/files/admin/Nextcloud_User_Manual.pdf")
+
+ if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "204" ]; then
+ echo "PDF uploaded successfully (HTTP $HTTP_CODE)"
+ else
+ echo "Failed to upload PDF (HTTP $HTTP_CODE)"
+ exit 1
+ fi
+
+ - name: Create vector-index tag
+ id: create_tag
+ run: |
+ # Create the tag using OCS API
+ echo "Creating vector-index tag..."
+ RESPONSE=$(curl -s -u admin:admin \
+ -X POST \
+ -H 'Content-Type: application/json' \
+ -H 'OCS-APIRequest: true' \
+ -d '{"name":"vector-index","userVisible":true,"userAssignable":true}' \
+ "http://localhost:8080/ocs/v2.php/apps/systemtags/api/v1/tags")
+
+ echo "Create tag response: $RESPONSE"
+
+ # Get tag ID from response or lookup
+ TAG_ID=$(echo "$RESPONSE" | grep -oP '(?<="id":)[0-9]+' | head -1 || echo "")
+
+ if [ -z "$TAG_ID" ]; then
+ echo "Tag may already exist, looking it up..."
+ TAG_ID=$(curl -s -u admin:admin \
+ -X PROPFIND \
+ -H 'Content-Type: application/xml' \
+ -d '' \
+ http://localhost:8080/remote.php/dav/systemtags/ \
+ | grep -B2 "vector-index" | grep -oP '(?<=)[0-9]+(?=)' | head -1 || echo "")
+ fi
+
+ if [ -z "$TAG_ID" ]; then
+ echo "ERROR: Could not create or find vector-index tag"
+ exit 1
+ fi
+
+ echo "Tag ID: $TAG_ID"
+ echo "tag_id=$TAG_ID" >> $GITHUB_OUTPUT
+
+ - name: Get file ID of uploaded PDF
+ id: get_file_id
+ run: |
+ echo "Getting file ID for Nextcloud_User_Manual.pdf..."
+
+ # Get file ID using PROPFIND
+ FILE_ID=$(curl -s -u admin:admin \
+ -X PROPFIND \
+ -H 'Content-Type: application/xml' \
+ -d '' \
+ "http://localhost:8080/remote.php/dav/files/admin/Nextcloud_User_Manual.pdf" \
+ | grep -oP '(?<=)[0-9]+(?=)' || echo "")
+
+ if [ -z "$FILE_ID" ]; then
+ echo "ERROR: Could not find file ID"
+ exit 1
+ fi
+
+ echo "Found file ID: $FILE_ID"
+ echo "file_id=$FILE_ID" >> $GITHUB_OUTPUT
+
+ - name: Tag file with vector-index
+ env:
+ FILE_ID: ${{ steps.get_file_id.outputs.file_id }}
+ TAG_ID: ${{ steps.create_tag.outputs.tag_id }}
+ run: |
+ echo "Tagging file $FILE_ID with tag $TAG_ID..."
+
+ HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -u admin:admin \
+ -X PUT \
+ -H 'Content-Type: application/json' \
+ -H 'Content-Length: 0' \
+ "http://localhost:8080/remote.php/dav/systemtags-relations/files/$FILE_ID/$TAG_ID")
+
+ if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "409" ]; then
+ echo "File tagged successfully (HTTP $HTTP_CODE)"
+ else
+ echo "Failed to tag file (HTTP $HTTP_CODE)"
+ exit 1
+ fi
+
+ - name: Wait for vector sync to complete indexing
+ env:
+ NEXTCLOUD_HOST: "http://localhost:8080"
+ NEXTCLOUD_USERNAME: "admin"
+ NEXTCLOUD_PASSWORD: "admin"
+ run: |
+ echo "Waiting for vector sync to index the manual..."
+ max_attempts=60
+ attempt=0
+
+ # Wait for initial scan to pick up the file
+ sleep 10
+
+ while [ $attempt -lt $max_attempts ]; do
+ attempt=$((attempt + 1))
+
+ # Check vector sync status via MCP
+ STATUS=$(curl -s http://localhost:8000/health || echo "{}")
+ echo "Attempt $attempt/$max_attempts: $STATUS"
+
+ # Also check indexed count via semantic search
+ # If we get results, indexing is done
+ RESULT=$(curl -s -X POST http://localhost:8000/mcp \
+ -H "Content-Type: application/json" \
+ -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"nc_get_vector_sync_status","arguments":{}}}' \
+ 2>/dev/null || echo "{}")
+
+ echo "Vector sync status: $RESULT"
+
+ # Check if pending is 0 and indexed > 0
+ INDEXED=$(echo "$RESULT" | jq -r '.result.structuredContent.indexed // 0' 2>/dev/null || echo "0")
+ PENDING=$(echo "$RESULT" | jq -r '.result.structuredContent.pending // 1' 2>/dev/null || echo "1")
+
+ echo "Indexed: $INDEXED, Pending: $PENDING"
+
+ if [ "$INDEXED" -gt "0" ] && [ "$PENDING" -eq "0" ]; then
+ echo "Indexing complete! $INDEXED documents indexed."
+ break
+ fi
+
+ sleep 10
+ done
+
+ if [ $attempt -ge $max_attempts ]; then
+ echo "WARNING: Indexing may not be complete, proceeding anyway..."
+ fi
+
+ - name: Run RAG evaluation tests
+ env:
+ NEXTCLOUD_HOST: "http://localhost:8080"
+ NEXTCLOUD_USERNAME: "admin"
+ NEXTCLOUD_PASSWORD: "admin"
+ OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+ OPENAI_BASE_URL: "https://models.github.ai/inference"
+ OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+ OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+ run: |
+ uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO
+
+ - name: Upload test results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: rag-evaluation-results
+ path: |
+ pytest-results.xml
+ retention-days: 30