nextcloud-mcp-server/.github/workflows/rag-evaluation.yml

name: RAG Evaluation

on:
  workflow_dispatch:
    inputs:
      manual_path:
        description: 'Path to Nextcloud User Manual PDF in Nextcloud'
        required: false
        default: 'Nextcloud Manual.pdf'
      embedding_model:
        description: 'OpenAI embedding model'
        required: false
        default: 'openai/text-embedding-3-small'
      generation_model:
        description: 'OpenAI generation model'
        required: false
        default: 'openai/gpt-4o-mini'

jobs:
  rag-evaluation:
    runs-on: ubuntu-latest
    timeout-minutes: 30
    permissions:
      models: read

    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Run docker compose with vector sync
        uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0
        with:
          compose-file: |
            ./docker-compose.yml
            ./docker-compose.ci.yml
          up-flags: "--build"
        env:
          # Environment variables passed to docker-compose.ci.yml
          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
          OPENAI_BASE_URL: "https://models.github.ai/inference"
          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
          VECTOR_SYNC_SCAN_INTERVAL: "5"

      - name: Install the latest version of uv
        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0

      - name: Wait for Nextcloud to be ready
        run: |
          echo "Waiting for Nextcloud..."
          max_attempts=60
          attempt=0
          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
            attempt=$((attempt + 1))
            if [ $attempt -ge $max_attempts ]; then
              echo "Service did not become ready in time."
              exit 1
            fi
            echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
            sleep 5
          done
          echo "Nextcloud is ready."

      - name: Wait for MCP server to be ready
        run: |
          echo "Waiting for MCP server..."
          max_attempts=30
          attempt=0
          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8000/health/live | grep -q "200"; do
            attempt=$((attempt + 1))
            if [ $attempt -ge $max_attempts ]; then
              echo "MCP server did not become ready in time."
              exit 1
            fi
            echo "Attempt $attempt/$max_attempts: MCP not ready, sleeping for 2 seconds..."
            sleep 2
          done
          echo "MCP server is ready."

      - name: Run RAG evaluation tests
        env:
          NEXTCLOUD_HOST: "http://localhost:8080"
          NEXTCLOUD_USERNAME: "admin"
          NEXTCLOUD_PASSWORD: "admin"
          RAG_MANUAL_PATH: ${{ inputs.manual_path }}
          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
          OPENAI_BASE_URL: "https://models.github.ai/inference"
          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
        run: |
          uv run pytest tests/integration/test_rag.py -v --log-cli-level=INFO --provider openai

      - name: Capture MCP container logs
        if: always()
        run: |
          echo "=== MCP Container Logs ==="
          docker compose logs mcp --tail=500

      - name: Upload test results
        if: always()
        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
        with:
          name: rag-evaluation-results
          path: |
            pytest-results.xml
          retention-days: 30