bump: version 0.48.3 → 0.48.4

fix: Add rate limit retry logic to OpenAI provider
Add exponential backoff retry handling for OpenAI API rate limits (429 errors). This is needed for GitHub Models API which has stricter rate limits than standard OpenAI API. - Add retry_on_rate_limit decorator with exponential backoff - Max 5 retries with delays: 2s → 4s → 8s → 16s → 32s - Apply to embed(), _embed_batch_request(), and generate() methods 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-23 16:40:06 +00:00 · 2025-11-23 17:24:48 +01:00 · 2025-11-23 16:33:39 +01:00 · 2025-11-23 16:12:37 +01:00 · 2025-11-23 15:43:45 +01:00 · 2025-11-23 12:43:43 +01:00
8 changed files with 100 additions and 20 deletions
@@ -29,16 +29,17 @@ jobs:
      - name: Run docker compose with vector sync
        uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
        with:
-          compose-file: "./docker-compose.yml"
+          compose-file: |
+            ./docker-compose.yml
+            ./docker-compose.ci.yml
          up-flags: "--build"
        env:
-          # Override MCP container environment for OpenAI + vector sync
-          VECTOR_SYNC_ENABLED: "true"
-          VECTOR_SYNC_SCAN_INTERVAL: "5"
+          # Environment variables passed to docker-compose.ci.yml
          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
          OPENAI_BASE_URL: "https://models.github.ai/inference"
          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+          VECTOR_SYNC_SCAN_INTERVAL: "5"

      - name: Install the latest version of uv
        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
@@ -86,11 +87,17 @@ jobs:
          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
        run: |
-          uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO --provider openai
+          uv run pytest tests/integration/test_rag.py -v --log-cli-level=INFO --provider openai
+
+      - name: Capture MCP container logs
+        if: always()
+        run: |
+          echo "=== MCP Container Logs ==="
+          docker compose logs mcp --tail=500

      - name: Upload test results
        if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5
        with:
          name: rag-evaluation-results
          path: |
@@ -1,3 +1,9 @@
+## v0.48.4 (2025-11-23)
+
+### Fix
+
+- Add rate limit retry logic to OpenAI provider
+
 ## v0.48.3 (2025-11-23)

 ### Fix
@@ -2,8 +2,8 @@ apiVersion: v2
 name: nextcloud-mcp-server
 description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
 type: application
-version: 0.48.3
-appVersion: "0.48.3"
+version: 0.48.4
+appVersion: "0.48.4"
 keywords:
  - nextcloud
  - mcp
@@ -0,0 +1,25 @@
+# CI-specific overrides for RAG evaluation pipeline
+# This file is used by the rag-evaluation.yml workflow to configure the MCP
+# container with OpenAI/GitHub Models API for vector embeddings.
+#
+# Usage:
+#   docker compose -f docker-compose.yml -f docker-compose.ci.yml up
+#
+# Environment variables (set in CI workflow):
+#   OPENAI_API_KEY - API key for embeddings (GitHub Models uses GITHUB_TOKEN)
+#   OPENAI_BASE_URL - API endpoint (e.g., https://models.github.ai/inference)
+#   OPENAI_EMBEDDING_MODEL - Model name (e.g., openai/text-embedding-3-small)
+#   OPENAI_GENERATION_MODEL - Model name for generation (e.g., openai/gpt-4o-mini)
+
+services:
+  mcp:
+    environment:
+      # OpenAI provider configuration (required for CI vector sync)
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - OPENAI_BASE_URL=${OPENAI_BASE_URL:-https://models.github.ai/inference}
+      - OPENAI_EMBEDDING_MODEL=${OPENAI_EMBEDDING_MODEL:-openai/text-embedding-3-small}
+      - OPENAI_GENERATION_MODEL=${OPENAI_GENERATION_MODEL:-openai/gpt-4o-mini}
+      # Faster sync for CI
+      - VECTOR_SYNC_SCAN_INTERVAL=${VECTOR_SYNC_SCAN_INTERVAL:-5}
+      # Enable document processing for PDF parsing
+      - ENABLE_DOCUMENT_PROCESSING=true
@@ -34,7 +34,7 @@ services:
      - ./app-hooks:/docker-entrypoint-hooks.d:ro
      # Mount OIDC development directory outside /var/www/html to avoid rsync conflicts
      # The post-installation hook will register /opt/apps as an additional app directory
-      - ./third_party:/opt/apps:ro
+      #- ./third_party:/opt/apps:ro
    environment:
      - NEXTCLOUD_TRUSTED_DOMAINS=app
      - NEXTCLOUD_ADMIN_USER=admin
@@ -7,13 +7,48 @@ Supports:
 """

 import logging
+from functools import wraps

-from openai import AsyncOpenAI
+import anyio
+from openai import AsyncOpenAI, RateLimitError

 from .base import Provider

 logger = logging.getLogger(__name__)

+# Rate limit retry configuration
+MAX_RETRIES = 5
+INITIAL_RETRY_DELAY = 2.0  # seconds
+MAX_RETRY_DELAY = 60.0  # seconds
+
+
+def retry_on_rate_limit(func):
+    """Decorator to retry on OpenAI rate limit errors with exponential backoff."""
+
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        retry_delay = INITIAL_RETRY_DELAY
+        last_error: Exception | None = None
+
+        for attempt in range(1, MAX_RETRIES + 1):
+            try:
+                return await func(*args, **kwargs)
+            except RateLimitError as e:
+                last_error = e
+                if attempt < MAX_RETRIES:
+                    logger.warning(
+                        f"Rate limit hit (attempt {attempt}/{MAX_RETRIES}), "
+                        f"retrying in {retry_delay:.1f}s..."
+                    )
+                    await anyio.sleep(retry_delay)
+                    retry_delay = min(retry_delay * 2, MAX_RETRY_DELAY)
+
+        logger.error(f"Rate limit exceeded after {MAX_RETRIES} attempts")
+        raise last_error  # type: ignore[misc]
+
+    return wrapper
+
+
 # Well-known embedding dimensions for OpenAI models
 OPENAI_EMBEDDING_DIMENSIONS: dict[str, int] = {
    "text-embedding-3-small": 1536,
@@ -86,6 +121,7 @@ class OpenAIProvider(Provider):
        """Whether this provider supports text generation."""
        return self.generation_model is not None

+    @retry_on_rate_limit
    async def embed(self, text: str) -> list[float]:
        """
        Generate embedding vector for text.
@@ -151,14 +187,8 @@ class OpenAIProvider(Provider):
        for i in range(0, len(texts), batch_size):
            batch = texts[i : i + batch_size]

-            response = await self.client.embeddings.create(
-                input=batch,
-                model=self.embedding_model,
-            )
-
-            # Sort by index to maintain order
-            sorted_data = sorted(response.data, key=lambda x: x.index)
-            batch_embeddings = [item.embedding for item in sorted_data]
+            # Use helper method with retry logic for each batch
+            batch_embeddings = await self._embed_batch_request(batch)
            all_embeddings.extend(batch_embeddings)

            # Update dimension if not set
@@ -171,6 +201,17 @@ class OpenAIProvider(Provider):

        return all_embeddings

+    @retry_on_rate_limit
+    async def _embed_batch_request(self, batch: list[str]) -> list[list[float]]:
+        """Make a single batch embedding request with retry logic."""
+        response = await self.client.embeddings.create(
+            input=batch,
+            model=self.embedding_model,
+        )
+        # Sort by index to maintain order
+        sorted_data = sorted(response.data, key=lambda x: x.index)
+        return [item.embedding for item in sorted_data]
+
    def get_dimension(self) -> int:
        """
        Get embedding dimension.
@@ -194,6 +235,7 @@ class OpenAIProvider(Provider):
            )
        return self._dimension

+    @retry_on_rate_limit
    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
        """
        Generate text from a prompt.
@@ -1,6 +1,6 @@
 [project]
 name = "nextcloud-mcp-server"
-version = "0.48.3"
+version = "0.48.4"
 description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
 authors = [
    {name = "Chris Coutinho", email = "chris@coutinho.io"}
@@ -1936,7 +1936,7 @@ wheels = [

 [[package]]
 name = "nextcloud-mcp-server"
-version = "0.48.3"
+version = "0.48.4"
 source = { editable = "." }
 dependencies = [
    { name = "aiosqlite" },
Author	SHA1	Message	Date
github-actions[bot]	e3a6894904	bump: version 0.48.3 → 0.48.4	2025-11-23 16:40:06 +00:00
Chris Coutinho	92b97bda00	fix: Add rate limit retry logic to OpenAI provider Add exponential backoff retry handling for OpenAI API rate limits (429 errors). This is needed for GitHub Models API which has stricter rate limits than standard OpenAI API. - Add retry_on_rate_limit decorator with exponential backoff - Max 5 retries with delays: 2s → 4s → 8s → 16s → 32s - Apply to embed(), _embed_batch_request(), and generate() methods 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-23 17:24:48 +01:00
Chris Coutinho	d5c6039296	ci: Update rag pipeline	2025-11-23 16:33:39 +01:00
Chris Coutinho	3fa13c8bfd	ci: Update rag pipeline	2025-11-23 16:12:37 +01:00
Chris Coutinho	9d306b71fa	ci: Fix pytest path	2025-11-23 15:43:45 +01:00
Chris Coutinho	38a936c120	Merge pull request #352 from cbcoutinho/renovate/major-github-artifact-actions chore(deps): update actions/upload-artifact action to v5	2025-11-23 12:43:43 +01:00
renovate-bot-cbcoutinho[bot]	0b2d449ffa	chore(deps): update actions/upload-artifact action to v5	2025-11-23 05:04:36 +00:00
Chris Coutinho	d881373dce	ci: Remove third_party from app mounts	2025-11-23 05:48:17 +01:00