From 25ef33de7f137212f195b4b12b5930f6809f42cf Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Thu, 20 Nov 2025 16:50:13 +0100 Subject: [PATCH] feat: Use Ollama native batch API in embed_batch() - Switch from sequential loop to /api/embed batch endpoint - Use 'input' array parameter instead of individual 'prompt' requests - Process in chunks of 32 to avoid quality degradation (issue #6262) - Reduces HTTP overhead: 128 texts = 4 requests instead of 128 - Maintains backward compatibility with embed() for single embeddings Ref: ollama/ollama#6262 --- nextcloud_mcp_server/providers/ollama.py | 29 +++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/nextcloud_mcp_server/providers/ollama.py b/nextcloud_mcp_server/providers/ollama.py index c97e7b3..5fd37d0 100644 --- a/nextcloud_mcp_server/providers/ollama.py +++ b/nextcloud_mcp_server/providers/ollama.py @@ -92,14 +92,21 @@ class OllamaProvider(Provider): response.raise_for_status() return response.json()["embedding"] - async def embed_batch(self, texts: list[str]) -> list[list[float]]: + async def embed_batch( + self, texts: list[str], batch_size: int = 32 + ) -> list[list[float]]: """ - Generate embeddings for multiple texts (batched requests). + Generate embeddings for multiple texts using Ollama's batch API. - Note: Ollama doesn't have native batch API, so we send requests sequentially. + Uses /api/embed endpoint with array input for efficient batch processing. + Conservative batch size (32) prevents quality degradation observed in + Ollama issue #6262 with larger batches. + + Note: Ollama processes batches serially, not in parallel. Args: texts: List of texts to embed + batch_size: Maximum texts per batch (default: 32) Returns: List of vector embeddings @@ -112,11 +119,17 @@ class OllamaProvider(Provider): "Embedding not supported - no embedding_model configured" ) - embeddings = [] - for text in texts: - embedding = await self.embed(text) - embeddings.append(embedding) - return embeddings + all_embeddings = [] + for i in range(0, len(texts), batch_size): + batch = texts[i : i + batch_size] + response = await self.client.post( + f"{self.base_url}/api/embed", + json={"model": self.embedding_model, "input": batch}, + ) + response.raise_for_status() + all_embeddings.extend(response.json()["embeddings"]) + + return all_embeddings async def _detect_dimension(self): """