From 92b97bda0076b2388429d6449d2577eae99d09da Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Sun, 23 Nov 2025 17:24:48 +0100 Subject: [PATCH] fix: Add rate limit retry logic to OpenAI provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add exponential backoff retry handling for OpenAI API rate limits (429 errors). This is needed for GitHub Models API which has stricter rate limits than standard OpenAI API. - Add retry_on_rate_limit decorator with exponential backoff - Max 5 retries with delays: 2s → 4s → 8s → 16s → 32s - Apply to embed(), _embed_batch_request(), and generate() methods 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- nextcloud_mcp_server/providers/openai.py | 60 ++++++++++++++++++++---- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/nextcloud_mcp_server/providers/openai.py b/nextcloud_mcp_server/providers/openai.py index 786736c..294f261 100644 --- a/nextcloud_mcp_server/providers/openai.py +++ b/nextcloud_mcp_server/providers/openai.py @@ -7,13 +7,48 @@ Supports: """ import logging +from functools import wraps -from openai import AsyncOpenAI +import anyio +from openai import AsyncOpenAI, RateLimitError from .base import Provider logger = logging.getLogger(__name__) +# Rate limit retry configuration +MAX_RETRIES = 5 +INITIAL_RETRY_DELAY = 2.0 # seconds +MAX_RETRY_DELAY = 60.0 # seconds + + +def retry_on_rate_limit(func): + """Decorator to retry on OpenAI rate limit errors with exponential backoff.""" + + @wraps(func) + async def wrapper(*args, **kwargs): + retry_delay = INITIAL_RETRY_DELAY + last_error: Exception | None = None + + for attempt in range(1, MAX_RETRIES + 1): + try: + return await func(*args, **kwargs) + except RateLimitError as e: + last_error = e + if attempt < MAX_RETRIES: + logger.warning( + f"Rate limit hit (attempt {attempt}/{MAX_RETRIES}), " + f"retrying in {retry_delay:.1f}s..." + ) + await anyio.sleep(retry_delay) + retry_delay = min(retry_delay * 2, MAX_RETRY_DELAY) + + logger.error(f"Rate limit exceeded after {MAX_RETRIES} attempts") + raise last_error # type: ignore[misc] + + return wrapper + + # Well-known embedding dimensions for OpenAI models OPENAI_EMBEDDING_DIMENSIONS: dict[str, int] = { "text-embedding-3-small": 1536, @@ -86,6 +121,7 @@ class OpenAIProvider(Provider): """Whether this provider supports text generation.""" return self.generation_model is not None + @retry_on_rate_limit async def embed(self, text: str) -> list[float]: """ Generate embedding vector for text. @@ -151,14 +187,8 @@ class OpenAIProvider(Provider): for i in range(0, len(texts), batch_size): batch = texts[i : i + batch_size] - response = await self.client.embeddings.create( - input=batch, - model=self.embedding_model, - ) - - # Sort by index to maintain order - sorted_data = sorted(response.data, key=lambda x: x.index) - batch_embeddings = [item.embedding for item in sorted_data] + # Use helper method with retry logic for each batch + batch_embeddings = await self._embed_batch_request(batch) all_embeddings.extend(batch_embeddings) # Update dimension if not set @@ -171,6 +201,17 @@ class OpenAIProvider(Provider): return all_embeddings + @retry_on_rate_limit + async def _embed_batch_request(self, batch: list[str]) -> list[list[float]]: + """Make a single batch embedding request with retry logic.""" + response = await self.client.embeddings.create( + input=batch, + model=self.embedding_model, + ) + # Sort by index to maintain order + sorted_data = sorted(response.data, key=lambda x: x.index) + return [item.embedding for item in sorted_data] + def get_dimension(self) -> int: """ Get embedding dimension. @@ -194,6 +235,7 @@ class OpenAIProvider(Provider): ) return self._dimension + @retry_on_rate_limit async def generate(self, prompt: str, max_tokens: int = 500) -> str: """ Generate text from a prompt.