mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-01-26 15:38:14 -05:00
Compare commits
2 Commits
dev
...
bently/sec
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
90dfed68af | ||
|
|
2c84ab1d55 |
@@ -673,6 +673,69 @@ def _is_region_blocked_error(error: Exception) -> bool:
|
||||
return "not available in your region" in str(error).lower()
|
||||
|
||||
|
||||
async def _summarize_messages(
|
||||
messages: list,
|
||||
model: str = "openai/gpt-4o-mini",
|
||||
api_key: str | None = None,
|
||||
base_url: str | None = None,
|
||||
timeout: float = 30.0,
|
||||
) -> str:
|
||||
"""Summarize a list of messages into concise context.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts to summarize
|
||||
model: Model to use for summarization (default: gpt-4o-mini)
|
||||
api_key: API key for OpenAI client
|
||||
base_url: Base URL for OpenAI client
|
||||
timeout: Request timeout in seconds (default: 30.0)
|
||||
|
||||
Returns:
|
||||
Summarized text
|
||||
"""
|
||||
# Format messages for summarization
|
||||
conversation = []
|
||||
for msg in messages:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content", "")
|
||||
if content and role in ("user", "assistant"):
|
||||
conversation.append(f"{role.upper()}: {content}")
|
||||
|
||||
conversation_text = "\n\n".join(conversation)
|
||||
|
||||
# Truncate conversation to fit within summarization model's context
|
||||
# gpt-4o-mini has 128k context, but we limit to ~25k tokens (~100k chars) for safety
|
||||
MAX_CHARS = 100_000
|
||||
if len(conversation_text) > MAX_CHARS:
|
||||
conversation_text = conversation_text[:MAX_CHARS] + "\n\n[truncated]"
|
||||
|
||||
# Call LLM to summarize
|
||||
import openai
|
||||
|
||||
summarization_client = openai.AsyncOpenAI(
|
||||
api_key=api_key, base_url=base_url, timeout=timeout
|
||||
)
|
||||
|
||||
response = await summarization_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Summarize this conversation history concisely. "
|
||||
"Preserve key facts, decisions, and context. "
|
||||
"Format as 2-3 short paragraphs."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": f"Summarize:\n\n{conversation_text}"},
|
||||
],
|
||||
max_tokens=500,
|
||||
temperature=0.3,
|
||||
)
|
||||
|
||||
summary = response.choices[0].message.content
|
||||
return summary or "No summary available."
|
||||
|
||||
|
||||
async def _stream_chat_chunks(
|
||||
session: ChatSession,
|
||||
tools: list[ChatCompletionToolParam],
|
||||
@@ -709,6 +772,89 @@ async def _stream_chat_chunks(
|
||||
)
|
||||
messages = [system_message] + messages
|
||||
|
||||
# Apply context window management
|
||||
try:
|
||||
from backend.util.prompt import estimate_token_count
|
||||
|
||||
# Convert to dict for token counting
|
||||
# OpenAI message types are TypedDicts, so they're already dict-like
|
||||
messages_dict = []
|
||||
for msg in messages:
|
||||
# TypedDict objects are already dicts, just filter None values
|
||||
if isinstance(msg, dict):
|
||||
msg_dict = {k: v for k, v in msg.items() if v is not None}
|
||||
else:
|
||||
# Fallback for unexpected types
|
||||
msg_dict = dict(msg)
|
||||
messages_dict.append(msg_dict)
|
||||
|
||||
# Estimate tokens
|
||||
token_count = estimate_token_count(messages_dict, model="gpt-4o")
|
||||
|
||||
# If over threshold, summarize old messages
|
||||
if token_count > 120_000:
|
||||
KEEP_RECENT = 15
|
||||
MIN_MESSAGES_TO_SUMMARIZE = 5 # Don't summarize if too few old messages
|
||||
|
||||
# Check if we have a system prompt at the start
|
||||
has_system_prompt = (
|
||||
len(messages) > 0 and messages[0].get("role") == "system"
|
||||
)
|
||||
|
||||
if len(messages) > KEEP_RECENT:
|
||||
# Split messages based on whether system prompt exists
|
||||
recent_messages = messages[-KEEP_RECENT:]
|
||||
|
||||
if has_system_prompt:
|
||||
# Keep system prompt separate, summarize everything between system and recent
|
||||
system_msg = messages[0]
|
||||
old_messages_dict = messages_dict[1:-KEEP_RECENT]
|
||||
else:
|
||||
# No system prompt, summarize everything except recent
|
||||
system_msg = None
|
||||
old_messages_dict = messages_dict[:-KEEP_RECENT]
|
||||
|
||||
# Only summarize if we have enough old messages
|
||||
if len(old_messages_dict) >= MIN_MESSAGES_TO_SUMMARIZE:
|
||||
# Summarize old messages
|
||||
summary_text = await _summarize_messages(
|
||||
old_messages_dict,
|
||||
model="openai/gpt-4o-mini",
|
||||
api_key=config.api_key,
|
||||
base_url=config.base_url,
|
||||
)
|
||||
|
||||
# Build new message list
|
||||
from openai.types.chat import ChatCompletionSystemMessageParam
|
||||
|
||||
summary_msg = ChatCompletionSystemMessageParam(
|
||||
role="system",
|
||||
content=f"[Previous conversation summary]: {summary_text}",
|
||||
)
|
||||
|
||||
# Rebuild messages based on whether we have a system prompt
|
||||
if has_system_prompt:
|
||||
# system_prompt + summary + recent_messages
|
||||
messages = [system_msg, summary_msg] + recent_messages
|
||||
else:
|
||||
# summary + recent_messages (no original system prompt)
|
||||
messages = [summary_msg] + recent_messages
|
||||
|
||||
logger.info(
|
||||
f"Context summarized: {token_count} tokens, "
|
||||
f"summarized {len(old_messages_dict)} old messages, "
|
||||
f"kept last {KEEP_RECENT} messages"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Skipping summarization: only {len(old_messages_dict)} old messages "
|
||||
f"(minimum {MIN_MESSAGES_TO_SUMMARIZE} required)"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Context summarization failed: {e}", exc_info=True)
|
||||
# Continue with original messages (fallback)
|
||||
|
||||
# Loop to handle tool calls and continue conversation
|
||||
while True:
|
||||
retry_count = 0
|
||||
|
||||
Reference in New Issue
Block a user