From 704b8a9207d3e6258af309d732ebc50d2c5720be Mon Sep 17 00:00:00 2001 From: Zamil Majdy Date: Tue, 13 Jan 2026 15:16:32 -0600 Subject: [PATCH] fix(backend): use AsyncOpenAI to prevent blocking event loop Critical async fix: - Changed from sync OpenAI client to AsyncOpenAI - Added await to embeddings.create() call - Prevents blocking the event loop during API calls Impact: - Before: API calls blocked entire event loop (200-500ms per embedding) - After: Non-blocking concurrent request handling - Aligns with async patterns used elsewhere (llm.py, codex.py, chat/service.py) Location: backend/api/features/store/embeddings.py:15, 31, 93 Testing: - Verify embeddings still generate correctly - Check concurrent request handling improves --- .../backend/backend/api/features/store/embeddings.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/autogpt_platform/backend/backend/api/features/store/embeddings.py b/autogpt_platform/backend/backend/api/features/store/embeddings.py index e9d7345bb2..8eaaaa1f4b 100644 --- a/autogpt_platform/backend/backend/api/features/store/embeddings.py +++ b/autogpt_platform/backend/backend/api/features/store/embeddings.py @@ -12,7 +12,7 @@ from functools import cache from typing import Any import prisma -from openai import OpenAI +from openai import AsyncOpenAI from prisma.enums import ContentType from backend.data.db import execute_raw_with_schema, query_raw_with_schema @@ -28,9 +28,9 @@ EMBEDDING_DIM = 1536 @cache -def get_openai_client() -> OpenAI | None: +def get_openai_client() -> AsyncOpenAI | None: """ - Get or create a singleton OpenAI client for connection reuse. + Get or create a singleton async OpenAI client for connection reuse. Returns None if API key is not configured. """ @@ -38,7 +38,7 @@ def get_openai_client() -> OpenAI | None: api_key = settings.secrets.openai_internal_api_key if not api_key: return None - return OpenAI(api_key=api_key) + return AsyncOpenAI(api_key=api_key) def build_searchable_text( @@ -90,7 +90,7 @@ async def generate_embedding(text: str) -> list[float] | None: truncated_text = text[:32000] start_time = time.time() - response = client.embeddings.create( + response = await client.embeddings.create( model=EMBEDDING_MODEL, input=truncated_text, )