zmail pr comments and add retry logic

This commit is contained in:
Swifty
2026-01-14 15:23:38 +01:00
parent 94ec749e4e
commit aaa52c12e2
3 changed files with 222 additions and 136 deletions

View File

@@ -1,5 +1,6 @@
"""Database operations for chat sessions."""
import asyncio
import logging
from datetime import UTC, datetime
from typing import Any, cast
@@ -10,6 +11,7 @@ from prisma.types import (
ChatMessageCreateInput,
ChatSessionCreateInput,
ChatSessionUpdateInput,
ChatSessionWhereInput,
)
from backend.data.db import transaction
@@ -25,7 +27,8 @@ async def get_chat_session(session_id: str) -> PrismaChatSession | None:
include={"Messages": True},
)
if session and session.Messages:
# Sort messages by sequence in Python since Prisma doesn't support order_by in include
# Sort messages by sequence in Python - Prisma Python client doesn't support
# order_by in include clauses (unlike Prisma JS), so we sort after fetching
session.Messages.sort(key=lambda m: m.sequence)
return session
@@ -79,6 +82,7 @@ async def update_chat_session(
include={"Messages": True},
)
if session and session.Messages:
# Sort in Python - Prisma Python doesn't support order_by in include clauses
session.Messages.sort(key=lambda m: m.sequence)
return session
@@ -95,9 +99,9 @@ async def add_chat_message(
function_call: dict[str, Any] | None = None,
) -> PrismaChatMessage:
"""Add a message to a chat session."""
# Build the input dict dynamically - only include optional fields when they
# have values, as Prisma TypedDict validation fails when optional fields
# are explicitly set to None
# Build input dict dynamically rather than using ChatMessageCreateInput directly
# because Prisma's TypedDict validation rejects optional fields set to None.
# We only include fields that have values, then cast at the end.
data: dict[str, Any] = {
"Session": {"connect": {"id": session_id}},
"role": role,
@@ -120,15 +124,15 @@ async def add_chat_message(
if function_call is not None:
data["functionCall"] = SafeJson(function_call)
# Update session's updatedAt timestamp
await PrismaChatSession.prisma().update(
where={"id": session_id},
data={"updatedAt": datetime.now(UTC)},
)
return await PrismaChatMessage.prisma().create(
data=cast(ChatMessageCreateInput, data)
# Run message create and session timestamp update in parallel for lower latency
_, message = await asyncio.gather(
PrismaChatSession.prisma().update(
where={"id": session_id},
data={"updatedAt": datetime.now(UTC)},
),
PrismaChatMessage.prisma().create(data=cast(ChatMessageCreateInput, data)),
)
return message
async def add_chat_messages_batch(
@@ -148,9 +152,9 @@ async def add_chat_messages_batch(
async with transaction() as tx:
for i, msg in enumerate(messages):
# Build the input dict dynamically - only include optional JSON fields
# when they have values, as Prisma TypedDict validation fails when
# optional fields are explicitly set to None
# Build input dict dynamically rather than using ChatMessageCreateInput
# directly because Prisma's TypedDict validation rejects optional fields
# set to None. We only include fields that have values, then cast.
data: dict[str, Any] = {
"Session": {"connect": {"id": session_id}},
"role": msg["role"],
@@ -178,7 +182,9 @@ async def add_chat_messages_batch(
)
created_messages.append(created)
# Update session's updatedAt timestamp within the same transaction
# Update session's updatedAt timestamp within the same transaction.
# Note: Token usage (total_prompt_tokens, total_completion_tokens) is updated
# separately via update_chat_session() after streaming completes.
await PrismaChatSession.prisma(tx).update(
where={"id": session_id},
data={"updatedAt": datetime.now(UTC)},
@@ -219,8 +225,8 @@ async def delete_chat_session(session_id: str, user_id: str | None = None) -> bo
True if deleted successfully, False otherwise.
"""
try:
# Build where clause with optional user_id validation
where_clause: dict[str, Any] = {"id": session_id}
# Build typed where clause with optional user_id validation
where_clause: ChatSessionWhereInput = {"id": session_id}
if user_id is not None:
where_clause["userId"] = user_id

View File

@@ -32,9 +32,20 @@ from .config import ChatConfig
logger = logging.getLogger(__name__)
config = ChatConfig()
# Redis cache key prefix for chat sessions
CHAT_SESSION_CACHE_PREFIX = "chat:session:"
def _get_session_cache_key(session_id: str) -> str:
"""Get the Redis cache key for a chat session."""
return f"{CHAT_SESSION_CACHE_PREFIX}{session_id}"
# Session-level locks to prevent race conditions during concurrent upserts.
# Uses WeakValueDictionary to automatically garbage collect locks when no longer referenced,
# preventing unbounded memory growth while maintaining lock semantics for active sessions.
# Invalidation: Locks are auto-removed by GC when no coroutine holds a reference (after
# async with lock: completes). Explicit cleanup also occurs in delete_chat_session().
_session_locks: WeakValueDictionary[str, asyncio.Lock] = WeakValueDictionary()
_session_locks_mutex = asyncio.Lock()
@@ -265,7 +276,7 @@ class ChatSession(BaseModel):
async def _get_session_from_cache(session_id: str) -> ChatSession | None:
"""Get a chat session from Redis cache."""
redis_key = f"chat:session:{session_id}"
redis_key = _get_session_cache_key(session_id)
async_redis = await get_redis_async()
raw_session: bytes | None = await async_redis.get(redis_key)
@@ -287,7 +298,7 @@ async def _get_session_from_cache(session_id: str) -> ChatSession | None:
async def _cache_session(session: ChatSession) -> None:
"""Cache a chat session in Redis."""
redis_key = f"chat:session:{session.session_id}"
redis_key = _get_session_cache_key(session.session_id)
async_redis = await get_redis_async()
await async_redis.setex(redis_key, config.session_ttl, session.model_dump_json())
@@ -547,7 +558,7 @@ async def delete_chat_session(session_id: str, user_id: str | None = None) -> bo
# Only invalidate cache and clean up lock after DB confirms deletion
try:
redis_key = f"chat:session:{session_id}"
redis_key = _get_session_cache_key(session_id)
async_redis = await get_redis_async()
await async_redis.delete(redis_key)
except Exception as e:
@@ -582,7 +593,7 @@ async def update_session_title(session_id: str, title: str) -> bool:
# Invalidate cache so next fetch gets updated title
try:
redis_key = f"chat:session:{session_id}"
redis_key = _get_session_cache_key(session_id)
async_redis = await get_redis_async()
await async_redis.delete(redis_key)
except Exception as e:

View File

@@ -1,10 +1,17 @@
import asyncio
import logging
from collections.abc import AsyncGenerator
from typing import Any
import orjson
from langfuse import Langfuse
from openai import AsyncOpenAI
from openai import (
APIConnectionError,
APIError,
APIStatusError,
AsyncOpenAI,
RateLimitError,
)
from openai.types.chat import ChatCompletionChunk, ChatCompletionToolParam
from backend.data.understanding import (
@@ -655,6 +662,31 @@ async def stream_chat_completion(
logger.warning(f"Failed to end Langfuse trace: {e}")
# Retry configuration for OpenAI API calls
MAX_RETRIES = 3
BASE_DELAY_SECONDS = 1.0
MAX_DELAY_SECONDS = 30.0
def _is_retryable_error(error: Exception) -> bool:
"""Determine if an error is retryable."""
if isinstance(error, RateLimitError):
return True
if isinstance(error, APIConnectionError):
return True
if isinstance(error, APIStatusError):
# APIStatusError has a response with status_code
# Retry on 5xx status codes (server errors)
if error.response.status_code >= 500:
return True
if isinstance(error, APIError):
# Retry on overloaded errors or 500 errors (may not have status code)
error_message = str(error).lower()
if "overloaded" in error_message or "internal server error" in error_message:
return True
return False
async def _stream_chat_chunks(
session: ChatSession,
tools: list[ChatCompletionToolParam],
@@ -665,6 +697,7 @@ async def _stream_chat_chunks(
Pure streaming function for OpenAI chat completions with tool calling.
This function is database-agnostic and focuses only on streaming logic.
Implements exponential backoff retry for transient API errors.
Args:
session: Chat session with conversation history
@@ -692,136 +725,172 @@ async def _stream_chat_chunks(
# Loop to handle tool calls and continue conversation
while True:
try:
logger.info("Creating OpenAI chat completion stream...")
retry_count = 0
last_error: Exception | None = None
# Create the stream with proper types
stream = await client.chat.completions.create(
model=model,
messages=messages,
tools=tools,
tool_choice="auto",
stream=True,
stream_options={"include_usage": True},
)
while retry_count <= MAX_RETRIES:
try:
logger.info(
f"Creating OpenAI chat completion stream..."
f"{f' (retry {retry_count}/{MAX_RETRIES})' if retry_count > 0 else ''}"
)
# Variables to accumulate tool calls
tool_calls: list[dict[str, Any]] = []
active_tool_call_idx: int | None = None
finish_reason: str | None = None
# Track which tool call indices have had their start event emitted
emitted_start_for_idx: set[int] = set()
# Create the stream with proper types
stream = await client.chat.completions.create(
model=model,
messages=messages,
tools=tools,
tool_choice="auto",
stream=True,
stream_options={"include_usage": True},
)
# Track if we've started the text block
text_started = False
# Variables to accumulate tool calls
tool_calls: list[dict[str, Any]] = []
active_tool_call_idx: int | None = None
finish_reason: str | None = None
# Track which tool call indices have had their start event emitted
emitted_start_for_idx: set[int] = set()
# Process the stream
chunk: ChatCompletionChunk
async for chunk in stream:
if chunk.usage:
yield StreamUsage(
promptTokens=chunk.usage.prompt_tokens,
completionTokens=chunk.usage.completion_tokens,
totalTokens=chunk.usage.total_tokens,
)
# Track if we've started the text block
text_started = False
if chunk.choices:
choice = chunk.choices[0]
delta = choice.delta
# Capture finish reason
if choice.finish_reason:
finish_reason = choice.finish_reason
logger.info(f"Finish reason: {finish_reason}")
# Handle content streaming
if delta.content:
# Emit text-start on first text content
if not text_started and text_block_id:
yield StreamTextStart(id=text_block_id)
text_started = True
# Stream the text delta
text_response = StreamTextDelta(
id=text_block_id or "",
delta=delta.content,
# Process the stream
chunk: ChatCompletionChunk
async for chunk in stream:
if chunk.usage:
yield StreamUsage(
promptTokens=chunk.usage.prompt_tokens,
completionTokens=chunk.usage.completion_tokens,
totalTokens=chunk.usage.total_tokens,
)
yield text_response
# Handle tool calls
if delta.tool_calls:
for tc_chunk in delta.tool_calls:
idx = tc_chunk.index
if chunk.choices:
choice = chunk.choices[0]
delta = choice.delta
# Update active tool call index if needed
if (
active_tool_call_idx is None
or active_tool_call_idx != idx
):
active_tool_call_idx = idx
# Capture finish reason
if choice.finish_reason:
finish_reason = choice.finish_reason
logger.info(f"Finish reason: {finish_reason}")
# Ensure we have a tool call object at this index
while len(tool_calls) <= idx:
tool_calls.append(
{
"id": "",
"type": "function",
"function": {
"name": "",
"arguments": "",
# Handle content streaming
if delta.content:
# Emit text-start on first text content
if not text_started and text_block_id:
yield StreamTextStart(id=text_block_id)
text_started = True
# Stream the text delta
text_response = StreamTextDelta(
id=text_block_id or "",
delta=delta.content,
)
yield text_response
# Handle tool calls
if delta.tool_calls:
for tc_chunk in delta.tool_calls:
idx = tc_chunk.index
# Update active tool call index if needed
if (
active_tool_call_idx is None
or active_tool_call_idx != idx
):
active_tool_call_idx = idx
# Ensure we have a tool call object at this index
while len(tool_calls) <= idx:
tool_calls.append(
{
"id": "",
"type": "function",
"function": {
"name": "",
"arguments": "",
},
},
},
)
)
# Accumulate the tool call data
if tc_chunk.id:
tool_calls[idx]["id"] = tc_chunk.id
if tc_chunk.function:
if tc_chunk.function.name:
tool_calls[idx]["function"][
"name"
] = tc_chunk.function.name
if tc_chunk.function.arguments:
tool_calls[idx]["function"][
"arguments"
] += tc_chunk.function.arguments
# Accumulate the tool call data
if tc_chunk.id:
tool_calls[idx]["id"] = tc_chunk.id
if tc_chunk.function:
if tc_chunk.function.name:
tool_calls[idx]["function"][
"name"
] = tc_chunk.function.name
if tc_chunk.function.arguments:
tool_calls[idx]["function"][
"arguments"
] += tc_chunk.function.arguments
# Emit StreamToolInputStart only after we have the tool call ID
if (
idx not in emitted_start_for_idx
and tool_calls[idx]["id"]
and tool_calls[idx]["function"]["name"]
):
yield StreamToolInputStart(
toolCallId=tool_calls[idx]["id"],
toolName=tool_calls[idx]["function"]["name"],
)
emitted_start_for_idx.add(idx)
logger.info(f"Stream complete. Finish reason: {finish_reason}")
# Emit StreamToolInputStart only after we have the tool call ID
if (
idx not in emitted_start_for_idx
and tool_calls[idx]["id"]
and tool_calls[idx]["function"]["name"]
):
yield StreamToolInputStart(
toolCallId=tool_calls[idx]["id"],
toolName=tool_calls[idx]["function"]["name"],
)
emitted_start_for_idx.add(idx)
logger.info(f"Stream complete. Finish reason: {finish_reason}")
# Yield all accumulated tool calls after the stream is complete
# This ensures all tool call arguments have been fully received
for idx, tool_call in enumerate(tool_calls):
try:
async for tc in _yield_tool_call(tool_calls, idx, session):
yield tc
except (orjson.JSONDecodeError, KeyError, TypeError) as e:
# Yield all accumulated tool calls after the stream is complete
# This ensures all tool call arguments have been fully received
for idx, tool_call in enumerate(tool_calls):
try:
async for tc in _yield_tool_call(tool_calls, idx, session):
yield tc
except (orjson.JSONDecodeError, KeyError, TypeError) as e:
logger.error(
f"Failed to parse tool call {idx}: {e}",
exc_info=True,
extra={"tool_call": tool_call},
)
yield StreamError(
errorText=f"Invalid tool call arguments for tool {tool_call.get('function', {}).get('name', 'unknown')}: {e}",
)
# Re-raise to trigger retry logic in the parent function
raise
yield StreamFinish()
return
except Exception as e:
last_error = e
if _is_retryable_error(e) and retry_count < MAX_RETRIES:
retry_count += 1
# Calculate delay with exponential backoff
delay = min(
BASE_DELAY_SECONDS * (2 ** (retry_count - 1)),
MAX_DELAY_SECONDS,
)
logger.warning(
f"Retryable error in stream: {e!s}. "
f"Retrying in {delay:.1f}s (attempt {retry_count}/{MAX_RETRIES})"
)
await asyncio.sleep(delay)
continue # Retry the stream
else:
# Non-retryable error or max retries exceeded
logger.error(
f"Failed to parse tool call {idx}: {e}",
f"Error in stream (not retrying): {e!s}",
exc_info=True,
extra={"tool_call": tool_call},
)
yield StreamError(
errorText=f"Invalid tool call arguments for tool {tool_call.get('function', {}).get('name', 'unknown')}: {e}",
)
# Re-raise to trigger retry logic in the parent function
raise
error_response = StreamError(errorText=str(e))
yield error_response
yield StreamFinish()
return
yield StreamFinish()
return
except Exception as e:
logger.error(f"Error in stream: {e!s}", exc_info=True)
error_response = StreamError(errorText=str(e))
yield error_response
# If we exit the retry loop without returning, it means we exhausted retries
if last_error:
logger.error(
f"Max retries ({MAX_RETRIES}) exceeded. Last error: {last_error!s}",
exc_info=True,
)
yield StreamError(errorText=f"Max retries exceeded: {last_error!s}")
yield StreamFinish()
return