fix(backend): use AsyncOpenAI to prevent blocking event loop

Critical async fix:
- Changed from sync OpenAI client to AsyncOpenAI
- Added await to embeddings.create() call
- Prevents blocking the event loop during API calls

Impact:
- Before: API calls blocked entire event loop (200-500ms per embedding)
- After: Non-blocking concurrent request handling
- Aligns with async patterns used elsewhere (llm.py, codex.py, chat/service.py)

Location: backend/api/features/store/embeddings.py:15, 31, 93

Testing:
- Verify embeddings still generate correctly
- Check concurrent request handling improves
This commit is contained in:
Zamil Majdy
2026-01-13 15:16:32 -06:00
parent 1a5abcc36a
commit 704b8a9207

View File

@@ -12,7 +12,7 @@ from functools import cache
from typing import Any
import prisma
from openai import OpenAI
from openai import AsyncOpenAI
from prisma.enums import ContentType
from backend.data.db import execute_raw_with_schema, query_raw_with_schema
@@ -28,9 +28,9 @@ EMBEDDING_DIM = 1536
@cache
def get_openai_client() -> OpenAI | None:
def get_openai_client() -> AsyncOpenAI | None:
"""
Get or create a singleton OpenAI client for connection reuse.
Get or create a singleton async OpenAI client for connection reuse.
Returns None if API key is not configured.
"""
@@ -38,7 +38,7 @@ def get_openai_client() -> OpenAI | None:
api_key = settings.secrets.openai_internal_api_key
if not api_key:
return None
return OpenAI(api_key=api_key)
return AsyncOpenAI(api_key=api_key)
def build_searchable_text(
@@ -90,7 +90,7 @@ async def generate_embedding(text: str) -> list[float] | None:
truncated_text = text[:32000]
start_time = time.time()
response = client.embeddings.create(
response = await client.embeddings.create(
model=EMBEDDING_MODEL,
input=truncated_text,
)