mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-02-03 11:24:57 -05:00
Compare commits
20 Commits
ntindle/fi
...
claude/fix
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f7bb8a1f93 | ||
|
|
7dc53071e8 | ||
|
|
4878665c66 | ||
|
|
678ddde751 | ||
|
|
aef6f57cfd | ||
|
|
14cee1670a | ||
|
|
d81d1ce024 | ||
|
|
2dd341c369 | ||
|
|
f7350c797a | ||
|
|
1081590384 | ||
|
|
2abbb7fbc8 | ||
|
|
05b60db554 | ||
|
|
cc4839bedb | ||
|
|
dbbff04616 | ||
|
|
e6438b9a76 | ||
|
|
e10ff8d37f | ||
|
|
9538992eaf | ||
|
|
27b72062f2 | ||
|
|
9a79a8d257 | ||
|
|
a9bf08748b |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -180,3 +180,4 @@ autogpt_platform/backend/settings.py
|
||||
.claude/settings.local.json
|
||||
CLAUDE.local.md
|
||||
/autogpt_platform/backend/logs
|
||||
.next
|
||||
@@ -3,9 +3,13 @@ import logging
|
||||
import time
|
||||
from asyncio import CancelledError
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
import openai
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from backend.util.prompt import CompressResult
|
||||
|
||||
import orjson
|
||||
from langfuse import get_client
|
||||
from openai import (
|
||||
@@ -15,7 +19,13 @@ from openai import (
|
||||
PermissionDeniedError,
|
||||
RateLimitError,
|
||||
)
|
||||
from openai.types.chat import ChatCompletionChunk, ChatCompletionToolParam
|
||||
from openai.types.chat import (
|
||||
ChatCompletionChunk,
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionStreamOptionsParam,
|
||||
ChatCompletionSystemMessageParam,
|
||||
ChatCompletionToolParam,
|
||||
)
|
||||
|
||||
from backend.data.redis_client import get_redis_async
|
||||
from backend.data.understanding import (
|
||||
@@ -794,207 +804,58 @@ def _is_region_blocked_error(error: Exception) -> bool:
|
||||
return "not available in your region" in str(error).lower()
|
||||
|
||||
|
||||
async def _summarize_messages(
|
||||
async def _manage_context_window(
|
||||
messages: list,
|
||||
model: str,
|
||||
api_key: str | None = None,
|
||||
base_url: str | None = None,
|
||||
timeout: float = 30.0,
|
||||
) -> str:
|
||||
"""Summarize a list of messages into concise context.
|
||||
) -> "CompressResult":
|
||||
"""
|
||||
Manage context window using the unified compress_context function.
|
||||
|
||||
Uses the same model as the chat for higher quality summaries.
|
||||
This is a thin wrapper that creates an OpenAI client for summarization
|
||||
and delegates to the shared compression logic in prompt.py.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts to summarize
|
||||
model: Model to use for summarization (same as chat model)
|
||||
api_key: API key for OpenAI client
|
||||
base_url: Base URL for OpenAI client
|
||||
timeout: Request timeout in seconds (default: 30.0)
|
||||
messages: List of messages in OpenAI format
|
||||
model: Model name for token counting and summarization
|
||||
api_key: API key for summarization calls
|
||||
base_url: Base URL for summarization calls
|
||||
|
||||
Returns:
|
||||
Summarized text
|
||||
CompressResult with compacted messages and metadata
|
||||
"""
|
||||
# Format messages for summarization
|
||||
conversation = []
|
||||
for msg in messages:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content", "")
|
||||
# Include user, assistant, and tool messages (tool outputs are important context)
|
||||
if content and role in ("user", "assistant", "tool"):
|
||||
conversation.append(f"{role.upper()}: {content}")
|
||||
|
||||
conversation_text = "\n\n".join(conversation)
|
||||
|
||||
# Handle empty conversation
|
||||
if not conversation_text:
|
||||
return "No conversation history available."
|
||||
|
||||
# Truncate conversation to fit within summarization model's context
|
||||
# gpt-4o-mini has 128k context, but we limit to ~25k tokens (~100k chars) for safety
|
||||
MAX_CHARS = 100_000
|
||||
if len(conversation_text) > MAX_CHARS:
|
||||
conversation_text = conversation_text[:MAX_CHARS] + "\n\n[truncated]"
|
||||
|
||||
# Call LLM to summarize
|
||||
import openai
|
||||
|
||||
summarization_client = openai.AsyncOpenAI(
|
||||
api_key=api_key, base_url=base_url, timeout=timeout
|
||||
)
|
||||
from backend.util.prompt import compress_context
|
||||
|
||||
response = await summarization_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Create a detailed summary of the conversation so far. "
|
||||
"This summary will be used as context when continuing the conversation.\n\n"
|
||||
"Before writing the summary, analyze each message chronologically to identify:\n"
|
||||
"- User requests and their explicit goals\n"
|
||||
"- Your approach and key decisions made\n"
|
||||
"- Technical specifics (file names, tool outputs, function signatures)\n"
|
||||
"- Errors encountered and resolutions applied\n\n"
|
||||
"You MUST include ALL of the following sections:\n\n"
|
||||
"## 1. Primary Request and Intent\n"
|
||||
"The user's explicit goals and what they are trying to accomplish.\n\n"
|
||||
"## 2. Key Technical Concepts\n"
|
||||
"Technologies, frameworks, tools, and patterns being used or discussed.\n\n"
|
||||
"## 3. Files and Resources Involved\n"
|
||||
"Specific files examined or modified, with relevant snippets and identifiers.\n\n"
|
||||
"## 4. Errors and Fixes\n"
|
||||
"Problems encountered, error messages, and their resolutions. "
|
||||
"Include any user feedback on fixes.\n\n"
|
||||
"## 5. Problem Solving\n"
|
||||
"Issues that have been resolved and how they were addressed.\n\n"
|
||||
"## 6. All User Messages\n"
|
||||
"A complete list of all user inputs (excluding tool outputs) to preserve their exact requests.\n\n"
|
||||
"## 7. Pending Tasks\n"
|
||||
"Work items the user explicitly requested that have not yet been completed.\n\n"
|
||||
"## 8. Current Work\n"
|
||||
"Precise description of what was being worked on most recently, including relevant context.\n\n"
|
||||
"## 9. Next Steps\n"
|
||||
"What should happen next, aligned with the user's most recent requests. "
|
||||
"Include verbatim quotes of recent instructions if relevant."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": f"Summarize:\n\n{conversation_text}"},
|
||||
],
|
||||
max_tokens=1500,
|
||||
temperature=0.3,
|
||||
)
|
||||
# Convert messages to dict format
|
||||
messages_dict = []
|
||||
for msg in messages:
|
||||
if isinstance(msg, dict):
|
||||
msg_dict = {k: v for k, v in msg.items() if v is not None}
|
||||
else:
|
||||
msg_dict = dict(msg)
|
||||
messages_dict.append(msg_dict)
|
||||
|
||||
summary = response.choices[0].message.content
|
||||
return summary or "No summary available."
|
||||
|
||||
|
||||
def _ensure_tool_pairs_intact(
|
||||
recent_messages: list[dict],
|
||||
all_messages: list[dict],
|
||||
start_index: int,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Ensure tool_call/tool_response pairs stay together after slicing.
|
||||
|
||||
When slicing messages for context compaction, a naive slice can separate
|
||||
an assistant message containing tool_calls from its corresponding tool
|
||||
response messages. This causes API validation errors (e.g., Anthropic's
|
||||
"unexpected tool_use_id found in tool_result blocks").
|
||||
|
||||
This function checks for orphan tool responses in the slice and extends
|
||||
backwards to include their corresponding assistant messages.
|
||||
|
||||
Args:
|
||||
recent_messages: The sliced messages to validate
|
||||
all_messages: The complete message list (for looking up missing assistants)
|
||||
start_index: The index in all_messages where recent_messages begins
|
||||
|
||||
Returns:
|
||||
A potentially extended list of messages with tool pairs intact
|
||||
"""
|
||||
if not recent_messages:
|
||||
return recent_messages
|
||||
|
||||
# Collect all tool_call_ids from assistant messages in the slice
|
||||
available_tool_call_ids: set[str] = set()
|
||||
for msg in recent_messages:
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
for tc in msg["tool_calls"]:
|
||||
tc_id = tc.get("id")
|
||||
if tc_id:
|
||||
available_tool_call_ids.add(tc_id)
|
||||
|
||||
# Find orphan tool responses (tool messages whose tool_call_id is missing)
|
||||
orphan_tool_call_ids: set[str] = set()
|
||||
for msg in recent_messages:
|
||||
if msg.get("role") == "tool":
|
||||
tc_id = msg.get("tool_call_id")
|
||||
if tc_id and tc_id not in available_tool_call_ids:
|
||||
orphan_tool_call_ids.add(tc_id)
|
||||
|
||||
if not orphan_tool_call_ids:
|
||||
# No orphans, slice is valid
|
||||
return recent_messages
|
||||
|
||||
# Find the assistant messages that contain the orphan tool_call_ids
|
||||
# Search backwards from start_index in all_messages
|
||||
messages_to_prepend: list[dict] = []
|
||||
for i in range(start_index - 1, -1, -1):
|
||||
msg = all_messages[i]
|
||||
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
||||
msg_tool_ids = {tc.get("id") for tc in msg["tool_calls"] if tc.get("id")}
|
||||
if msg_tool_ids & orphan_tool_call_ids:
|
||||
# This assistant message has tool_calls we need
|
||||
# Also collect its contiguous tool responses that follow it
|
||||
assistant_and_responses: list[dict] = [msg]
|
||||
|
||||
# Scan forward from this assistant to collect tool responses
|
||||
for j in range(i + 1, start_index):
|
||||
following_msg = all_messages[j]
|
||||
if following_msg.get("role") == "tool":
|
||||
tool_id = following_msg.get("tool_call_id")
|
||||
if tool_id and tool_id in msg_tool_ids:
|
||||
assistant_and_responses.append(following_msg)
|
||||
else:
|
||||
# Stop at first non-tool message
|
||||
break
|
||||
|
||||
# Prepend the assistant and its tool responses (maintain order)
|
||||
messages_to_prepend = assistant_and_responses + messages_to_prepend
|
||||
# Mark these as found
|
||||
orphan_tool_call_ids -= msg_tool_ids
|
||||
# Also add this assistant's tool_call_ids to available set
|
||||
available_tool_call_ids |= msg_tool_ids
|
||||
|
||||
if not orphan_tool_call_ids:
|
||||
# Found all missing assistants
|
||||
break
|
||||
|
||||
if orphan_tool_call_ids:
|
||||
# Some tool_call_ids couldn't be resolved - remove those tool responses
|
||||
# This shouldn't happen in normal operation but handles edge cases
|
||||
logger.warning(
|
||||
f"Could not find assistant messages for tool_call_ids: {orphan_tool_call_ids}. "
|
||||
"Removing orphan tool responses."
|
||||
)
|
||||
recent_messages = [
|
||||
msg
|
||||
for msg in recent_messages
|
||||
if not (
|
||||
msg.get("role") == "tool"
|
||||
and msg.get("tool_call_id") in orphan_tool_call_ids
|
||||
# Only create client if api_key is provided (enables summarization)
|
||||
# Use context manager to avoid socket leaks
|
||||
if api_key:
|
||||
async with openai.AsyncOpenAI(
|
||||
api_key=api_key, base_url=base_url, timeout=30.0
|
||||
) as client:
|
||||
return await compress_context(
|
||||
messages=messages_dict,
|
||||
model=model,
|
||||
client=client,
|
||||
)
|
||||
]
|
||||
|
||||
if messages_to_prepend:
|
||||
logger.info(
|
||||
f"Extended recent messages by {len(messages_to_prepend)} to preserve "
|
||||
f"tool_call/tool_response pairs"
|
||||
else:
|
||||
# No API key - use truncation-only mode
|
||||
return await compress_context(
|
||||
messages=messages_dict,
|
||||
model=model,
|
||||
client=None,
|
||||
)
|
||||
return messages_to_prepend + recent_messages
|
||||
|
||||
return recent_messages
|
||||
|
||||
|
||||
async def _stream_chat_chunks(
|
||||
@@ -1022,11 +883,8 @@ async def _stream_chat_chunks(
|
||||
|
||||
logger.info("Starting pure chat stream")
|
||||
|
||||
# Build messages with system prompt prepended
|
||||
messages = session.to_openai_messages()
|
||||
if system_prompt:
|
||||
from openai.types.chat import ChatCompletionSystemMessageParam
|
||||
|
||||
system_message = ChatCompletionSystemMessageParam(
|
||||
role="system",
|
||||
content=system_prompt,
|
||||
@@ -1034,314 +892,38 @@ async def _stream_chat_chunks(
|
||||
messages = [system_message] + messages
|
||||
|
||||
# Apply context window management
|
||||
token_count = 0 # Initialize for exception handler
|
||||
try:
|
||||
from backend.util.prompt import estimate_token_count
|
||||
context_result = await _manage_context_window(
|
||||
messages=messages,
|
||||
model=model,
|
||||
api_key=config.api_key,
|
||||
base_url=config.base_url,
|
||||
)
|
||||
|
||||
# Convert to dict for token counting
|
||||
# OpenAI message types are TypedDicts, so they're already dict-like
|
||||
messages_dict = []
|
||||
for msg in messages:
|
||||
# TypedDict objects are already dicts, just filter None values
|
||||
if isinstance(msg, dict):
|
||||
msg_dict = {k: v for k, v in msg.items() if v is not None}
|
||||
else:
|
||||
# Fallback for unexpected types
|
||||
msg_dict = dict(msg)
|
||||
messages_dict.append(msg_dict)
|
||||
|
||||
# Estimate tokens using appropriate tokenizer
|
||||
# Normalize model name for token counting (tiktoken only supports OpenAI models)
|
||||
token_count_model = model
|
||||
if "/" in model:
|
||||
# Strip provider prefix (e.g., "anthropic/claude-opus-4.5" -> "claude-opus-4.5")
|
||||
token_count_model = model.split("/")[-1]
|
||||
|
||||
# For Claude and other non-OpenAI models, approximate with gpt-4o tokenizer
|
||||
# Most modern LLMs have similar tokenization (~1 token per 4 chars)
|
||||
if "claude" in token_count_model.lower() or not any(
|
||||
known in token_count_model.lower()
|
||||
for known in ["gpt", "o1", "chatgpt", "text-"]
|
||||
):
|
||||
token_count_model = "gpt-4o"
|
||||
|
||||
# Attempt token counting with error handling
|
||||
try:
|
||||
token_count = estimate_token_count(messages_dict, model=token_count_model)
|
||||
except Exception as token_error:
|
||||
# If token counting fails, use gpt-4o as fallback approximation
|
||||
logger.warning(
|
||||
f"Token counting failed for model {token_count_model}: {token_error}. "
|
||||
"Using gpt-4o approximation."
|
||||
)
|
||||
token_count = estimate_token_count(messages_dict, model="gpt-4o")
|
||||
|
||||
# If over threshold, summarize old messages
|
||||
if token_count > 120_000:
|
||||
KEEP_RECENT = 15
|
||||
|
||||
# Check if we have a system prompt at the start
|
||||
has_system_prompt = (
|
||||
len(messages) > 0 and messages[0].get("role") == "system"
|
||||
)
|
||||
|
||||
# Always attempt mitigation when over limit, even with few messages
|
||||
if messages:
|
||||
# Split messages based on whether system prompt exists
|
||||
# Calculate start index for the slice
|
||||
slice_start = max(0, len(messages_dict) - KEEP_RECENT)
|
||||
recent_messages = messages_dict[-KEEP_RECENT:]
|
||||
|
||||
# Ensure tool_call/tool_response pairs stay together
|
||||
# This prevents API errors from orphan tool responses
|
||||
recent_messages = _ensure_tool_pairs_intact(
|
||||
recent_messages, messages_dict, slice_start
|
||||
)
|
||||
|
||||
if has_system_prompt:
|
||||
# Keep system prompt separate, summarize everything between system and recent
|
||||
system_msg = messages[0]
|
||||
old_messages_dict = messages_dict[1:-KEEP_RECENT]
|
||||
else:
|
||||
# No system prompt, summarize everything except recent
|
||||
system_msg = None
|
||||
old_messages_dict = messages_dict[:-KEEP_RECENT]
|
||||
|
||||
# Summarize any non-empty old messages (no minimum threshold)
|
||||
# If we're over the token limit, we need to compress whatever we can
|
||||
if old_messages_dict:
|
||||
# Summarize old messages using the same model as chat
|
||||
summary_text = await _summarize_messages(
|
||||
old_messages_dict,
|
||||
model=model,
|
||||
api_key=config.api_key,
|
||||
base_url=config.base_url,
|
||||
)
|
||||
|
||||
# Build new message list
|
||||
# Use assistant role (not system) to prevent privilege escalation
|
||||
# of user-influenced content to instruction-level authority
|
||||
from openai.types.chat import ChatCompletionAssistantMessageParam
|
||||
|
||||
summary_msg = ChatCompletionAssistantMessageParam(
|
||||
role="assistant",
|
||||
content=(
|
||||
"[Previous conversation summary — for context only]: "
|
||||
f"{summary_text}"
|
||||
),
|
||||
)
|
||||
|
||||
# Rebuild messages based on whether we have a system prompt
|
||||
if has_system_prompt:
|
||||
# system_prompt + summary + recent_messages
|
||||
messages = [system_msg, summary_msg] + recent_messages
|
||||
else:
|
||||
# summary + recent_messages (no original system prompt)
|
||||
messages = [summary_msg] + recent_messages
|
||||
|
||||
logger.info(
|
||||
f"Context summarized: {token_count} tokens, "
|
||||
f"summarized {len(old_messages_dict)} old messages, "
|
||||
f"kept last {KEEP_RECENT} messages"
|
||||
)
|
||||
|
||||
# Fallback: If still over limit after summarization, progressively drop recent messages
|
||||
# This handles edge cases where recent messages are extremely large
|
||||
new_messages_dict = []
|
||||
for msg in messages:
|
||||
if isinstance(msg, dict):
|
||||
msg_dict = {k: v for k, v in msg.items() if v is not None}
|
||||
else:
|
||||
msg_dict = dict(msg)
|
||||
new_messages_dict.append(msg_dict)
|
||||
|
||||
new_token_count = estimate_token_count(
|
||||
new_messages_dict, model=token_count_model
|
||||
)
|
||||
|
||||
if new_token_count > 120_000:
|
||||
# Still over limit - progressively reduce KEEP_RECENT
|
||||
logger.warning(
|
||||
f"Still over limit after summarization: {new_token_count} tokens. "
|
||||
"Reducing number of recent messages kept."
|
||||
)
|
||||
|
||||
for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]:
|
||||
if keep_count == 0:
|
||||
# Try with just system prompt + summary (no recent messages)
|
||||
if has_system_prompt:
|
||||
messages = [system_msg, summary_msg]
|
||||
else:
|
||||
messages = [summary_msg]
|
||||
logger.info(
|
||||
"Trying with 0 recent messages (system + summary only)"
|
||||
)
|
||||
else:
|
||||
# Slice from ORIGINAL recent_messages to avoid duplicating summary
|
||||
reduced_recent = (
|
||||
recent_messages[-keep_count:]
|
||||
if len(recent_messages) >= keep_count
|
||||
else recent_messages
|
||||
)
|
||||
# Ensure tool pairs stay intact in the reduced slice
|
||||
reduced_slice_start = max(
|
||||
0, len(recent_messages) - keep_count
|
||||
)
|
||||
reduced_recent = _ensure_tool_pairs_intact(
|
||||
reduced_recent, recent_messages, reduced_slice_start
|
||||
)
|
||||
if has_system_prompt:
|
||||
messages = [
|
||||
system_msg,
|
||||
summary_msg,
|
||||
] + reduced_recent
|
||||
else:
|
||||
messages = [summary_msg] + reduced_recent
|
||||
|
||||
new_messages_dict = []
|
||||
for msg in messages:
|
||||
if isinstance(msg, dict):
|
||||
msg_dict = {
|
||||
k: v for k, v in msg.items() if v is not None
|
||||
}
|
||||
else:
|
||||
msg_dict = dict(msg)
|
||||
new_messages_dict.append(msg_dict)
|
||||
|
||||
new_token_count = estimate_token_count(
|
||||
new_messages_dict, model=token_count_model
|
||||
)
|
||||
|
||||
if new_token_count <= 120_000:
|
||||
logger.info(
|
||||
f"Reduced to {keep_count} recent messages, "
|
||||
f"now {new_token_count} tokens"
|
||||
)
|
||||
break
|
||||
else:
|
||||
logger.error(
|
||||
f"Unable to reduce token count below threshold even with 0 messages. "
|
||||
f"Final count: {new_token_count} tokens"
|
||||
)
|
||||
# ABSOLUTE LAST RESORT: Drop system prompt
|
||||
# This should only happen if summary itself is massive
|
||||
if has_system_prompt and len(messages) > 1:
|
||||
messages = messages[1:] # Drop system prompt
|
||||
logger.critical(
|
||||
"CRITICAL: Dropped system prompt as absolute last resort. "
|
||||
"Behavioral consistency may be affected."
|
||||
)
|
||||
# Yield error to user
|
||||
yield StreamError(
|
||||
errorText=(
|
||||
"Warning: System prompt dropped due to size constraints. "
|
||||
"Assistant behavior may be affected."
|
||||
)
|
||||
)
|
||||
else:
|
||||
# No old messages to summarize - all messages are "recent"
|
||||
# Apply progressive truncation to reduce token count
|
||||
logger.warning(
|
||||
f"Token count {token_count} exceeds threshold but no old messages to summarize. "
|
||||
f"Applying progressive truncation to recent messages."
|
||||
)
|
||||
|
||||
# Create a base list excluding system prompt to avoid duplication
|
||||
# This is the pool of messages we'll slice from in the loop
|
||||
# Use messages_dict for type consistency with _ensure_tool_pairs_intact
|
||||
base_msgs = (
|
||||
messages_dict[1:] if has_system_prompt else messages_dict
|
||||
)
|
||||
|
||||
# Try progressively smaller keep counts
|
||||
new_token_count = token_count # Initialize with current count
|
||||
for keep_count in [12, 10, 8, 5, 3, 2, 1, 0]:
|
||||
if keep_count == 0:
|
||||
# Try with just system prompt (no recent messages)
|
||||
if has_system_prompt:
|
||||
messages = [system_msg]
|
||||
logger.info(
|
||||
"Trying with 0 recent messages (system prompt only)"
|
||||
)
|
||||
else:
|
||||
# No system prompt and no recent messages = empty messages list
|
||||
# This is invalid, skip this iteration
|
||||
continue
|
||||
else:
|
||||
if len(base_msgs) < keep_count:
|
||||
continue # Skip if we don't have enough messages
|
||||
|
||||
# Slice from base_msgs to get recent messages (without system prompt)
|
||||
recent_messages = base_msgs[-keep_count:]
|
||||
|
||||
# Ensure tool pairs stay intact in the reduced slice
|
||||
reduced_slice_start = max(0, len(base_msgs) - keep_count)
|
||||
recent_messages = _ensure_tool_pairs_intact(
|
||||
recent_messages, base_msgs, reduced_slice_start
|
||||
)
|
||||
|
||||
if has_system_prompt:
|
||||
messages = [system_msg] + recent_messages
|
||||
else:
|
||||
messages = recent_messages
|
||||
|
||||
new_messages_dict = []
|
||||
for msg in messages:
|
||||
if msg is None:
|
||||
continue # Skip None messages (type safety)
|
||||
if isinstance(msg, dict):
|
||||
msg_dict = {
|
||||
k: v for k, v in msg.items() if v is not None
|
||||
}
|
||||
else:
|
||||
msg_dict = dict(msg)
|
||||
new_messages_dict.append(msg_dict)
|
||||
|
||||
new_token_count = estimate_token_count(
|
||||
new_messages_dict, model=token_count_model
|
||||
)
|
||||
|
||||
if new_token_count <= 120_000:
|
||||
logger.info(
|
||||
f"Reduced to {keep_count} recent messages, "
|
||||
f"now {new_token_count} tokens"
|
||||
)
|
||||
break
|
||||
else:
|
||||
# Even with 0 messages still over limit
|
||||
logger.error(
|
||||
f"Unable to reduce token count below threshold even with 0 messages. "
|
||||
f"Final count: {new_token_count} tokens. Messages may be extremely large."
|
||||
)
|
||||
# ABSOLUTE LAST RESORT: Drop system prompt
|
||||
if has_system_prompt and len(messages) > 1:
|
||||
messages = messages[1:] # Drop system prompt
|
||||
logger.critical(
|
||||
"CRITICAL: Dropped system prompt as absolute last resort. "
|
||||
"Behavioral consistency may be affected."
|
||||
)
|
||||
# Yield error to user
|
||||
yield StreamError(
|
||||
errorText=(
|
||||
"Warning: System prompt dropped due to size constraints. "
|
||||
"Assistant behavior may be affected."
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Context summarization failed: {e}", exc_info=True)
|
||||
# If we were over the token limit, yield error to user
|
||||
# Don't silently continue with oversized messages that will fail
|
||||
if token_count > 120_000:
|
||||
if context_result.error:
|
||||
if "System prompt dropped" in context_result.error:
|
||||
# Warning only - continue with reduced context
|
||||
yield StreamError(
|
||||
errorText=(
|
||||
f"Unable to manage context window (token limit exceeded: {token_count} tokens). "
|
||||
"Context summarization failed. Please start a new conversation."
|
||||
"Warning: System prompt dropped due to size constraints. "
|
||||
"Assistant behavior may be affected."
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Any other error - abort to prevent failed LLM calls
|
||||
yield StreamError(
|
||||
errorText=(
|
||||
f"Context window management failed: {context_result.error}. "
|
||||
"Please start a new conversation."
|
||||
)
|
||||
)
|
||||
yield StreamFinish()
|
||||
return
|
||||
# Otherwise, continue with original messages (under limit)
|
||||
|
||||
messages = context_result.messages
|
||||
if context_result.was_compacted:
|
||||
logger.info(
|
||||
f"Context compacted for streaming: {context_result.token_count} tokens"
|
||||
)
|
||||
|
||||
# Loop to handle tool calls and continue conversation
|
||||
while True:
|
||||
@@ -1369,14 +951,6 @@ async def _stream_chat_chunks(
|
||||
:128
|
||||
] # OpenRouter limit
|
||||
|
||||
# Create the stream with proper types
|
||||
from typing import cast
|
||||
|
||||
from openai.types.chat import (
|
||||
ChatCompletionMessageParam,
|
||||
ChatCompletionStreamOptionsParam,
|
||||
)
|
||||
|
||||
stream = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=cast(list[ChatCompletionMessageParam], messages),
|
||||
@@ -1900,17 +1474,36 @@ async def _generate_llm_continuation(
|
||||
# Build system prompt
|
||||
system_prompt, _ = await _build_system_prompt(user_id)
|
||||
|
||||
# Build messages in OpenAI format
|
||||
messages = session.to_openai_messages()
|
||||
if system_prompt:
|
||||
from openai.types.chat import ChatCompletionSystemMessageParam
|
||||
|
||||
system_message = ChatCompletionSystemMessageParam(
|
||||
role="system",
|
||||
content=system_prompt,
|
||||
)
|
||||
messages = [system_message] + messages
|
||||
|
||||
# Apply context window management to prevent oversized requests
|
||||
context_result = await _manage_context_window(
|
||||
messages=messages,
|
||||
model=config.model,
|
||||
api_key=config.api_key,
|
||||
base_url=config.base_url,
|
||||
)
|
||||
|
||||
if context_result.error and "System prompt dropped" not in context_result.error:
|
||||
logger.error(
|
||||
f"Context window management failed for session {session_id}: "
|
||||
f"{context_result.error} (tokens={context_result.token_count})"
|
||||
)
|
||||
return
|
||||
|
||||
messages = context_result.messages
|
||||
if context_result.was_compacted:
|
||||
logger.info(
|
||||
f"Context compacted for LLM continuation: "
|
||||
f"{context_result.token_count} tokens"
|
||||
)
|
||||
|
||||
# Build extra_body for tracing
|
||||
extra_body: dict[str, Any] = {
|
||||
"posthogProperties": {
|
||||
@@ -1923,19 +1516,54 @@ async def _generate_llm_continuation(
|
||||
if session_id:
|
||||
extra_body["session_id"] = session_id[:128]
|
||||
|
||||
# Make non-streaming LLM call (no tools - just text response)
|
||||
from typing import cast
|
||||
retry_count = 0
|
||||
last_error: Exception | None = None
|
||||
response = None
|
||||
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
while retry_count <= MAX_RETRIES:
|
||||
try:
|
||||
logger.info(
|
||||
f"Generating LLM continuation for session {session_id}"
|
||||
f"{f' (retry {retry_count}/{MAX_RETRIES})' if retry_count > 0 else ''}"
|
||||
)
|
||||
|
||||
# No tools parameter = text-only response (no tool calls)
|
||||
response = await client.chat.completions.create(
|
||||
model=config.model,
|
||||
messages=cast(list[ChatCompletionMessageParam], messages),
|
||||
extra_body=extra_body,
|
||||
)
|
||||
response = await client.chat.completions.create(
|
||||
model=config.model,
|
||||
messages=cast(list[ChatCompletionMessageParam], messages),
|
||||
extra_body=extra_body,
|
||||
)
|
||||
last_error = None # Clear any previous error on success
|
||||
break # Success, exit retry loop
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
if _is_retryable_error(e) and retry_count < MAX_RETRIES:
|
||||
retry_count += 1
|
||||
delay = min(
|
||||
BASE_DELAY_SECONDS * (2 ** (retry_count - 1)),
|
||||
MAX_DELAY_SECONDS,
|
||||
)
|
||||
logger.warning(
|
||||
f"Retryable error in LLM continuation: {e!s}. "
|
||||
f"Retrying in {delay:.1f}s (attempt {retry_count}/{MAX_RETRIES})"
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
else:
|
||||
# Non-retryable error - log and exit gracefully
|
||||
logger.error(
|
||||
f"Non-retryable error in LLM continuation: {e!s}",
|
||||
exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
if response.choices and response.choices[0].message.content:
|
||||
if last_error:
|
||||
logger.error(
|
||||
f"Max retries ({MAX_RETRIES}) exceeded for LLM continuation. "
|
||||
f"Last error: {last_error!s}"
|
||||
)
|
||||
return
|
||||
|
||||
if response and response.choices and response.choices[0].message.content:
|
||||
assistant_content = response.choices[0].message.content
|
||||
|
||||
# Reload session from DB to avoid race condition with user messages
|
||||
|
||||
@@ -139,11 +139,10 @@ async def decompose_goal_external(
|
||||
"""
|
||||
client = _get_client()
|
||||
|
||||
# Build the request payload
|
||||
payload: dict[str, Any] = {"description": description}
|
||||
if context:
|
||||
# The external service uses user_instruction for additional context
|
||||
payload["user_instruction"] = context
|
||||
description = f"{description}\n\nAdditional context from user:\n{context}"
|
||||
|
||||
payload: dict[str, Any] = {"description": description}
|
||||
if library_agents:
|
||||
payload["library_agents"] = library_agents
|
||||
|
||||
|
||||
@@ -38,6 +38,8 @@ class ResponseType(str, Enum):
|
||||
OPERATION_STARTED = "operation_started"
|
||||
OPERATION_PENDING = "operation_pending"
|
||||
OPERATION_IN_PROGRESS = "operation_in_progress"
|
||||
# Input validation
|
||||
INPUT_VALIDATION_ERROR = "input_validation_error"
|
||||
|
||||
|
||||
# Base response model
|
||||
@@ -68,6 +70,10 @@ class AgentInfo(BaseModel):
|
||||
has_external_trigger: bool | None = None
|
||||
new_output: bool | None = None
|
||||
graph_id: str | None = None
|
||||
inputs: dict[str, Any] | None = Field(
|
||||
default=None,
|
||||
description="Input schema for the agent, including field names, types, and defaults",
|
||||
)
|
||||
|
||||
|
||||
class AgentsFoundResponse(ToolResponseBase):
|
||||
@@ -194,6 +200,20 @@ class ErrorResponse(ToolResponseBase):
|
||||
details: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class InputValidationErrorResponse(ToolResponseBase):
|
||||
"""Response when run_agent receives unknown input fields."""
|
||||
|
||||
type: ResponseType = ResponseType.INPUT_VALIDATION_ERROR
|
||||
unrecognized_fields: list[str] = Field(
|
||||
description="List of input field names that were not recognized"
|
||||
)
|
||||
inputs: dict[str, Any] = Field(
|
||||
description="The agent's valid input schema for reference"
|
||||
)
|
||||
graph_id: str | None = None
|
||||
graph_version: int | None = None
|
||||
|
||||
|
||||
# Agent output models
|
||||
class ExecutionOutputInfo(BaseModel):
|
||||
"""Summary of a single execution's outputs."""
|
||||
|
||||
@@ -30,6 +30,7 @@ from .models import (
|
||||
ErrorResponse,
|
||||
ExecutionOptions,
|
||||
ExecutionStartedResponse,
|
||||
InputValidationErrorResponse,
|
||||
SetupInfo,
|
||||
SetupRequirementsResponse,
|
||||
ToolResponseBase,
|
||||
@@ -273,6 +274,22 @@ class RunAgentTool(BaseTool):
|
||||
input_properties = graph.input_schema.get("properties", {})
|
||||
required_fields = set(graph.input_schema.get("required", []))
|
||||
provided_inputs = set(params.inputs.keys())
|
||||
valid_fields = set(input_properties.keys())
|
||||
|
||||
# Check for unknown input fields
|
||||
unrecognized_fields = provided_inputs - valid_fields
|
||||
if unrecognized_fields:
|
||||
return InputValidationErrorResponse(
|
||||
message=(
|
||||
f"Unknown input field(s) provided: {', '.join(sorted(unrecognized_fields))}. "
|
||||
f"Agent was not executed. Please use the correct field names from the schema."
|
||||
),
|
||||
session_id=session_id,
|
||||
unrecognized_fields=sorted(unrecognized_fields),
|
||||
inputs=graph.input_schema,
|
||||
graph_id=graph.id,
|
||||
graph_version=graph.version,
|
||||
)
|
||||
|
||||
# If agent has inputs but none were provided AND use_defaults is not set,
|
||||
# always show what's available first so user can decide
|
||||
|
||||
@@ -402,3 +402,42 @@ async def test_run_agent_schedule_without_name(setup_test_data):
|
||||
# Should return error about missing schedule_name
|
||||
assert result_data.get("type") == "error"
|
||||
assert "schedule_name" in result_data["message"].lower()
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="session")
|
||||
async def test_run_agent_rejects_unknown_input_fields(setup_test_data):
|
||||
"""Test that run_agent returns input_validation_error for unknown input fields."""
|
||||
user = setup_test_data["user"]
|
||||
store_submission = setup_test_data["store_submission"]
|
||||
|
||||
tool = RunAgentTool()
|
||||
agent_marketplace_id = f"{user.email.split('@')[0]}/{store_submission.slug}"
|
||||
session = make_session(user_id=user.id)
|
||||
|
||||
# Execute with unknown input field names
|
||||
response = await tool.execute(
|
||||
user_id=user.id,
|
||||
session_id=str(uuid.uuid4()),
|
||||
tool_call_id=str(uuid.uuid4()),
|
||||
username_agent_slug=agent_marketplace_id,
|
||||
inputs={
|
||||
"unknown_field": "some value",
|
||||
"another_unknown": "another value",
|
||||
},
|
||||
session=session,
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
assert hasattr(response, "output")
|
||||
assert isinstance(response.output, str)
|
||||
result_data = orjson.loads(response.output)
|
||||
|
||||
# Should return input_validation_error type with unrecognized fields
|
||||
assert result_data.get("type") == "input_validation_error"
|
||||
assert "unrecognized_fields" in result_data
|
||||
assert set(result_data["unrecognized_fields"]) == {
|
||||
"another_unknown",
|
||||
"unknown_field",
|
||||
}
|
||||
assert "inputs" in result_data # Contains the valid schema
|
||||
assert "Agent was not executed" in result_data["message"]
|
||||
|
||||
@@ -5,6 +5,8 @@ import uuid
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
from pydantic_core import PydanticUndefined
|
||||
|
||||
from backend.api.features.chat.model import ChatSession
|
||||
from backend.data.block import get_block
|
||||
from backend.data.execution import ExecutionContext
|
||||
@@ -75,15 +77,22 @@ class RunBlockTool(BaseTool):
|
||||
self,
|
||||
user_id: str,
|
||||
block: Any,
|
||||
input_data: dict[str, Any] | None = None,
|
||||
) -> tuple[dict[str, CredentialsMetaInput], list[CredentialsMetaInput]]:
|
||||
"""
|
||||
Check if user has required credentials for a block.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
block: Block to check credentials for
|
||||
input_data: Input data for the block (used to determine provider via discriminator)
|
||||
|
||||
Returns:
|
||||
tuple[matched_credentials, missing_credentials]
|
||||
"""
|
||||
matched_credentials: dict[str, CredentialsMetaInput] = {}
|
||||
missing_credentials: list[CredentialsMetaInput] = []
|
||||
input_data = input_data or {}
|
||||
|
||||
# Get credential field info from block's input schema
|
||||
credentials_fields_info = block.input_schema.get_credentials_fields_info()
|
||||
@@ -96,14 +105,33 @@ class RunBlockTool(BaseTool):
|
||||
available_creds = await creds_manager.store.get_all_creds(user_id)
|
||||
|
||||
for field_name, field_info in credentials_fields_info.items():
|
||||
# field_info.provider is a frozenset of acceptable providers
|
||||
# field_info.supported_types is a frozenset of acceptable types
|
||||
effective_field_info = field_info
|
||||
if field_info.discriminator and field_info.discriminator_mapping:
|
||||
# Get discriminator from input, falling back to schema default
|
||||
discriminator_value = input_data.get(field_info.discriminator)
|
||||
if discriminator_value is None:
|
||||
field = block.input_schema.model_fields.get(
|
||||
field_info.discriminator
|
||||
)
|
||||
if field and field.default is not PydanticUndefined:
|
||||
discriminator_value = field.default
|
||||
|
||||
if (
|
||||
discriminator_value
|
||||
and discriminator_value in field_info.discriminator_mapping
|
||||
):
|
||||
effective_field_info = field_info.discriminate(discriminator_value)
|
||||
logger.debug(
|
||||
f"Discriminated provider for {field_name}: "
|
||||
f"{discriminator_value} -> {effective_field_info.provider}"
|
||||
)
|
||||
|
||||
matching_cred = next(
|
||||
(
|
||||
cred
|
||||
for cred in available_creds
|
||||
if cred.provider in field_info.provider
|
||||
and cred.type in field_info.supported_types
|
||||
if cred.provider in effective_field_info.provider
|
||||
and cred.type in effective_field_info.supported_types
|
||||
),
|
||||
None,
|
||||
)
|
||||
@@ -117,8 +145,8 @@ class RunBlockTool(BaseTool):
|
||||
)
|
||||
else:
|
||||
# Create a placeholder for the missing credential
|
||||
provider = next(iter(field_info.provider), "unknown")
|
||||
cred_type = next(iter(field_info.supported_types), "api_key")
|
||||
provider = next(iter(effective_field_info.provider), "unknown")
|
||||
cred_type = next(iter(effective_field_info.supported_types), "api_key")
|
||||
missing_credentials.append(
|
||||
CredentialsMetaInput(
|
||||
id=field_name,
|
||||
@@ -186,10 +214,9 @@ class RunBlockTool(BaseTool):
|
||||
|
||||
logger.info(f"Executing block {block.name} ({block_id}) for user {user_id}")
|
||||
|
||||
# Check credentials
|
||||
creds_manager = IntegrationCredentialsManager()
|
||||
matched_credentials, missing_credentials = await self._check_block_credentials(
|
||||
user_id, block
|
||||
user_id, block, input_data
|
||||
)
|
||||
|
||||
if missing_credentials:
|
||||
|
||||
@@ -104,60 +104,18 @@ async def list_library_agents(
|
||||
order_by = {"updatedAt": "desc"}
|
||||
|
||||
try:
|
||||
# For LAST_EXECUTED sorting, we need to fetch execution data and sort in Python
|
||||
# since Prisma doesn't support sorting by nested relations
|
||||
if sort_by == library_model.LibraryAgentSort.LAST_EXECUTED:
|
||||
# TODO: This fetches all agents into memory for sorting, which may cause
|
||||
# performance issues for users with many agents. Prisma doesn't support
|
||||
# sorting by nested relations, so a dedicated lastExecutedAt column or
|
||||
# raw SQL query would be needed for database-level pagination.
|
||||
library_agents = await prisma.models.LibraryAgent.prisma().find_many(
|
||||
where=where_clause,
|
||||
include=library_agent_include(
|
||||
user_id,
|
||||
include_nodes=False,
|
||||
include_executions=True,
|
||||
execution_limit=1,
|
||||
),
|
||||
)
|
||||
|
||||
def get_sort_key(
|
||||
agent: prisma.models.LibraryAgent,
|
||||
) -> tuple[int, float]:
|
||||
"""
|
||||
Returns a tuple for sorting: (has_no_executions, -timestamp).
|
||||
|
||||
Agents WITH executions come first (sorted by most recent execution),
|
||||
agents WITHOUT executions come last (sorted by creation date).
|
||||
"""
|
||||
graph = agent.AgentGraph
|
||||
if graph and graph.Executions and len(graph.Executions) > 0:
|
||||
execution = graph.Executions[0]
|
||||
timestamp = execution.updatedAt or execution.createdAt
|
||||
return (0, -timestamp.timestamp())
|
||||
return (1, -agent.createdAt.timestamp())
|
||||
|
||||
library_agents.sort(key=get_sort_key)
|
||||
|
||||
# Apply pagination after sorting
|
||||
agent_count = len(library_agents)
|
||||
start_idx = (page - 1) * page_size
|
||||
end_idx = start_idx + page_size
|
||||
library_agents = library_agents[start_idx:end_idx]
|
||||
else:
|
||||
# Standard sorting via database
|
||||
library_agents = await prisma.models.LibraryAgent.prisma().find_many(
|
||||
where=where_clause,
|
||||
include=library_agent_include(
|
||||
user_id, include_nodes=False, include_executions=False
|
||||
),
|
||||
order=order_by,
|
||||
skip=(page - 1) * page_size,
|
||||
take=page_size,
|
||||
)
|
||||
agent_count = await prisma.models.LibraryAgent.prisma().count(
|
||||
where=where_clause
|
||||
)
|
||||
library_agents = await prisma.models.LibraryAgent.prisma().find_many(
|
||||
where=where_clause,
|
||||
include=library_agent_include(
|
||||
user_id, include_nodes=False, include_executions=include_executions
|
||||
),
|
||||
order=order_by,
|
||||
skip=(page - 1) * page_size,
|
||||
take=page_size,
|
||||
)
|
||||
agent_count = await prisma.models.LibraryAgent.prisma().count(
|
||||
where=where_clause
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Retrieved {len(library_agents)} library agents for user #{user_id}"
|
||||
@@ -387,20 +345,6 @@ async def get_library_agent_by_graph_id(
|
||||
graph_id: str,
|
||||
graph_version: Optional[int] = None,
|
||||
) -> library_model.LibraryAgent | None:
|
||||
"""
|
||||
Retrieves a library agent by its graph ID for a given user.
|
||||
|
||||
Args:
|
||||
user_id: The ID of the user who owns the library agent.
|
||||
graph_id: The ID of the agent graph to look up.
|
||||
graph_version: Optional specific version of the graph to retrieve.
|
||||
|
||||
Returns:
|
||||
The LibraryAgent if found, otherwise None.
|
||||
|
||||
Raises:
|
||||
DatabaseError: If there's an error during retrieval.
|
||||
"""
|
||||
try:
|
||||
filter: prisma.types.LibraryAgentWhereInput = {
|
||||
"agentGraphId": graph_id,
|
||||
@@ -684,17 +628,6 @@ async def update_library_agent(
|
||||
async def delete_library_agent(
|
||||
library_agent_id: str, user_id: str, soft_delete: bool = True
|
||||
) -> None:
|
||||
"""
|
||||
Deletes a library agent and cleans up associated schedules and webhooks.
|
||||
|
||||
Args:
|
||||
library_agent_id: The ID of the library agent to delete.
|
||||
user_id: The ID of the user who owns the library agent.
|
||||
soft_delete: If True, marks the agent as deleted; if False, permanently removes it.
|
||||
|
||||
Raises:
|
||||
NotFoundError: If the library agent is not found or doesn't belong to the user.
|
||||
"""
|
||||
# First get the agent to find the graph_id for cleanup
|
||||
library_agent = await prisma.models.LibraryAgent.prisma().find_unique(
|
||||
where={"id": library_agent_id}, include={"AgentGraph": True}
|
||||
@@ -1188,20 +1121,6 @@ async def update_preset(
|
||||
async def set_preset_webhook(
|
||||
user_id: str, preset_id: str, webhook_id: str | None
|
||||
) -> library_model.LibraryAgentPreset:
|
||||
"""
|
||||
Sets or removes a webhook connection for a preset.
|
||||
|
||||
Args:
|
||||
user_id: The ID of the user who owns the preset.
|
||||
preset_id: The ID of the preset to update.
|
||||
webhook_id: The ID of the webhook to connect, or None to disconnect.
|
||||
|
||||
Returns:
|
||||
The updated LibraryAgentPreset.
|
||||
|
||||
Raises:
|
||||
NotFoundError: If the preset is not found or doesn't belong to the user.
|
||||
"""
|
||||
current = await prisma.models.AgentPreset.prisma().find_unique(
|
||||
where={"id": preset_id},
|
||||
include=AGENT_PRESET_INCLUDE,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import datetime
|
||||
|
||||
import prisma.enums
|
||||
import prisma.models
|
||||
@@ -9,7 +9,6 @@ from backend.data.db import connect
|
||||
from backend.data.includes import library_agent_include
|
||||
|
||||
from . import db
|
||||
from . import model as library_model
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -226,183 +225,3 @@ async def test_add_agent_to_library_not_found(mocker):
|
||||
mock_store_listing_version.return_value.find_unique.assert_called_once_with(
|
||||
where={"id": "version123"}, include={"AgentGraph": True}
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_library_agents_sort_by_last_executed(mocker):
|
||||
"""
|
||||
Test LAST_EXECUTED sorting behavior:
|
||||
- Agents WITH executions come first, sorted by most recent execution (updatedAt)
|
||||
- Agents WITHOUT executions come last, sorted by creation date
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Agent 1: Has execution that finished 1 hour ago
|
||||
agent1_execution = prisma.models.AgentGraphExecution(
|
||||
id="exec1",
|
||||
agentGraphId="agent1",
|
||||
agentGraphVersion=1,
|
||||
userId="test-user",
|
||||
createdAt=now - timedelta(hours=2),
|
||||
updatedAt=now - timedelta(hours=1), # Finished 1 hour ago
|
||||
executionStatus=prisma.enums.AgentExecutionStatus.COMPLETED,
|
||||
isDeleted=False,
|
||||
isShared=False,
|
||||
)
|
||||
agent1_graph = prisma.models.AgentGraph(
|
||||
id="agent1",
|
||||
version=1,
|
||||
name="Agent With Recent Execution",
|
||||
description="Has execution finished 1 hour ago",
|
||||
userId="test-user",
|
||||
isActive=True,
|
||||
createdAt=now - timedelta(days=5),
|
||||
Executions=[agent1_execution],
|
||||
)
|
||||
library_agent1 = prisma.models.LibraryAgent(
|
||||
id="lib1",
|
||||
userId="test-user",
|
||||
agentGraphId="agent1",
|
||||
agentGraphVersion=1,
|
||||
settings="{}", # type: ignore
|
||||
isCreatedByUser=True,
|
||||
isDeleted=False,
|
||||
isArchived=False,
|
||||
createdAt=now - timedelta(days=5),
|
||||
updatedAt=now - timedelta(days=5),
|
||||
isFavorite=False,
|
||||
useGraphIsActiveVersion=True,
|
||||
AgentGraph=agent1_graph,
|
||||
)
|
||||
|
||||
# Agent 2: Has execution that finished 3 hours ago
|
||||
agent2_execution = prisma.models.AgentGraphExecution(
|
||||
id="exec2",
|
||||
agentGraphId="agent2",
|
||||
agentGraphVersion=1,
|
||||
userId="test-user",
|
||||
createdAt=now - timedelta(hours=5),
|
||||
updatedAt=now - timedelta(hours=3), # Finished 3 hours ago
|
||||
executionStatus=prisma.enums.AgentExecutionStatus.COMPLETED,
|
||||
isDeleted=False,
|
||||
isShared=False,
|
||||
)
|
||||
agent2_graph = prisma.models.AgentGraph(
|
||||
id="agent2",
|
||||
version=1,
|
||||
name="Agent With Older Execution",
|
||||
description="Has execution finished 3 hours ago",
|
||||
userId="test-user",
|
||||
isActive=True,
|
||||
createdAt=now - timedelta(days=3),
|
||||
Executions=[agent2_execution],
|
||||
)
|
||||
library_agent2 = prisma.models.LibraryAgent(
|
||||
id="lib2",
|
||||
userId="test-user",
|
||||
agentGraphId="agent2",
|
||||
agentGraphVersion=1,
|
||||
settings="{}", # type: ignore
|
||||
isCreatedByUser=True,
|
||||
isDeleted=False,
|
||||
isArchived=False,
|
||||
createdAt=now - timedelta(days=3),
|
||||
updatedAt=now - timedelta(days=3),
|
||||
isFavorite=False,
|
||||
useGraphIsActiveVersion=True,
|
||||
AgentGraph=agent2_graph,
|
||||
)
|
||||
|
||||
# Agent 3: No executions, created 1 day ago (should come after agents with executions)
|
||||
agent3_graph = prisma.models.AgentGraph(
|
||||
id="agent3",
|
||||
version=1,
|
||||
name="Agent Without Executions (Newer)",
|
||||
description="No executions, created 1 day ago",
|
||||
userId="test-user",
|
||||
isActive=True,
|
||||
createdAt=now - timedelta(days=1),
|
||||
Executions=[],
|
||||
)
|
||||
library_agent3 = prisma.models.LibraryAgent(
|
||||
id="lib3",
|
||||
userId="test-user",
|
||||
agentGraphId="agent3",
|
||||
agentGraphVersion=1,
|
||||
settings="{}", # type: ignore
|
||||
isCreatedByUser=True,
|
||||
isDeleted=False,
|
||||
isArchived=False,
|
||||
createdAt=now - timedelta(days=1),
|
||||
updatedAt=now - timedelta(days=1),
|
||||
isFavorite=False,
|
||||
useGraphIsActiveVersion=True,
|
||||
AgentGraph=agent3_graph,
|
||||
)
|
||||
|
||||
# Agent 4: No executions, created 2 days ago
|
||||
agent4_graph = prisma.models.AgentGraph(
|
||||
id="agent4",
|
||||
version=1,
|
||||
name="Agent Without Executions (Older)",
|
||||
description="No executions, created 2 days ago",
|
||||
userId="test-user",
|
||||
isActive=True,
|
||||
createdAt=now - timedelta(days=2),
|
||||
Executions=[],
|
||||
)
|
||||
library_agent4 = prisma.models.LibraryAgent(
|
||||
id="lib4",
|
||||
userId="test-user",
|
||||
agentGraphId="agent4",
|
||||
agentGraphVersion=1,
|
||||
settings="{}", # type: ignore
|
||||
isCreatedByUser=True,
|
||||
isDeleted=False,
|
||||
isArchived=False,
|
||||
createdAt=now - timedelta(days=2),
|
||||
updatedAt=now - timedelta(days=2),
|
||||
isFavorite=False,
|
||||
useGraphIsActiveVersion=True,
|
||||
AgentGraph=agent4_graph,
|
||||
)
|
||||
|
||||
# Return agents in random order to verify sorting works
|
||||
mock_library_agents = [
|
||||
library_agent3,
|
||||
library_agent1,
|
||||
library_agent4,
|
||||
library_agent2,
|
||||
]
|
||||
|
||||
# Mock prisma calls
|
||||
mock_agent_graph = mocker.patch("prisma.models.AgentGraph.prisma")
|
||||
mock_agent_graph.return_value.find_many = mocker.AsyncMock(return_value=[])
|
||||
|
||||
mock_library_agent = mocker.patch("prisma.models.LibraryAgent.prisma")
|
||||
mock_library_agent.return_value.find_many = mocker.AsyncMock(
|
||||
return_value=mock_library_agents
|
||||
)
|
||||
|
||||
# Call function with LAST_EXECUTED sort
|
||||
result = await db.list_library_agents(
|
||||
"test-user",
|
||||
sort_by=library_model.LibraryAgentSort.LAST_EXECUTED,
|
||||
)
|
||||
|
||||
# Verify sorting order:
|
||||
# 1. Agent 1 (execution finished 1 hour ago) - most recent execution
|
||||
# 2. Agent 2 (execution finished 3 hours ago) - older execution
|
||||
# 3. Agent 3 (no executions, created 1 day ago) - newer creation
|
||||
# 4. Agent 4 (no executions, created 2 days ago) - older creation
|
||||
assert len(result.agents) == 4
|
||||
assert (
|
||||
result.agents[0].id == "lib1"
|
||||
), "Agent with most recent execution should be first"
|
||||
assert result.agents[1].id == "lib2", "Agent with older execution should be second"
|
||||
assert (
|
||||
result.agents[2].id == "lib3"
|
||||
), "Agent without executions (newer) should be third"
|
||||
assert (
|
||||
result.agents[3].id == "lib4"
|
||||
), "Agent without executions (older) should be last"
|
||||
|
||||
@@ -442,7 +442,6 @@ class LibraryAgentSort(str, Enum):
|
||||
|
||||
CREATED_AT = "createdAt"
|
||||
UPDATED_AT = "updatedAt"
|
||||
LAST_EXECUTED = "lastExecuted"
|
||||
|
||||
|
||||
class LibraryAgentUpdateRequest(pydantic.BaseModel):
|
||||
|
||||
@@ -28,7 +28,7 @@ async def list_library_agents(
|
||||
None, description="Search term to filter agents"
|
||||
),
|
||||
sort_by: library_model.LibraryAgentSort = Query(
|
||||
library_model.LibraryAgentSort.LAST_EXECUTED,
|
||||
library_model.LibraryAgentSort.UPDATED_AT,
|
||||
description="Criteria to sort results by",
|
||||
),
|
||||
page: int = Query(
|
||||
|
||||
@@ -112,7 +112,7 @@ async def test_get_library_agents_success(
|
||||
mock_db_call.assert_called_once_with(
|
||||
user_id=test_user_id,
|
||||
search_term="test",
|
||||
sort_by=library_model.LibraryAgentSort.LAST_EXECUTED,
|
||||
sort_by=library_model.LibraryAgentSort.UPDATED_AT,
|
||||
page=1,
|
||||
page_size=15,
|
||||
)
|
||||
|
||||
@@ -66,18 +66,24 @@ async def event_broadcaster(manager: ConnectionManager):
|
||||
execution_bus = AsyncRedisExecutionEventBus()
|
||||
notification_bus = AsyncRedisNotificationEventBus()
|
||||
|
||||
async def execution_worker():
|
||||
async for event in execution_bus.listen("*"):
|
||||
await manager.send_execution_update(event)
|
||||
try:
|
||||
|
||||
async def notification_worker():
|
||||
async for notification in notification_bus.listen("*"):
|
||||
await manager.send_notification(
|
||||
user_id=notification.user_id,
|
||||
payload=notification.payload,
|
||||
)
|
||||
async def execution_worker():
|
||||
async for event in execution_bus.listen("*"):
|
||||
await manager.send_execution_update(event)
|
||||
|
||||
await asyncio.gather(execution_worker(), notification_worker())
|
||||
async def notification_worker():
|
||||
async for notification in notification_bus.listen("*"):
|
||||
await manager.send_notification(
|
||||
user_id=notification.user_id,
|
||||
payload=notification.payload,
|
||||
)
|
||||
|
||||
await asyncio.gather(execution_worker(), notification_worker())
|
||||
finally:
|
||||
# Ensure PubSub connections are closed on any exit to prevent leaks
|
||||
await execution_bus.close()
|
||||
await notification_bus.close()
|
||||
|
||||
|
||||
async def authenticate_websocket(websocket: WebSocket) -> str:
|
||||
|
||||
@@ -32,7 +32,7 @@ from backend.data.model import (
|
||||
from backend.integrations.providers import ProviderName
|
||||
from backend.util import json
|
||||
from backend.util.logging import TruncatedLogger
|
||||
from backend.util.prompt import compress_prompt, estimate_token_count
|
||||
from backend.util.prompt import compress_context, estimate_token_count
|
||||
from backend.util.text import TextFormatter
|
||||
|
||||
logger = TruncatedLogger(logging.getLogger(__name__), "[LLM-Block]")
|
||||
@@ -634,11 +634,18 @@ async def llm_call(
|
||||
context_window = llm_model.context_window
|
||||
|
||||
if compress_prompt_to_fit:
|
||||
prompt = compress_prompt(
|
||||
result = await compress_context(
|
||||
messages=prompt,
|
||||
target_tokens=llm_model.context_window // 2,
|
||||
lossy_ok=True,
|
||||
client=None, # Truncation-only, no LLM summarization
|
||||
reserve=0, # Caller handles response token budget separately
|
||||
)
|
||||
if result.error:
|
||||
logger.warning(
|
||||
f"Prompt compression did not meet target: {result.error}. "
|
||||
f"Proceeding with {result.token_count} tokens."
|
||||
)
|
||||
prompt = result.messages
|
||||
|
||||
# Calculate available tokens based on context window and input length
|
||||
estimated_input_tokens = estimate_token_count(prompt)
|
||||
|
||||
@@ -873,14 +873,13 @@ def is_block_auth_configured(
|
||||
|
||||
|
||||
async def initialize_blocks() -> None:
|
||||
# First, sync all provider costs to blocks
|
||||
# Imported here to avoid circular import
|
||||
from backend.sdk.cost_integration import sync_all_provider_costs
|
||||
from backend.util.retry import func_retry
|
||||
|
||||
sync_all_provider_costs()
|
||||
|
||||
for cls in get_blocks().values():
|
||||
block = cls()
|
||||
@func_retry
|
||||
async def sync_block_to_db(block: Block) -> None:
|
||||
existing_block = await AgentBlock.prisma().find_first(
|
||||
where={"OR": [{"id": block.id}, {"name": block.name}]}
|
||||
)
|
||||
@@ -893,7 +892,7 @@ async def initialize_blocks() -> None:
|
||||
outputSchema=json.dumps(block.output_schema.jsonschema()),
|
||||
)
|
||||
)
|
||||
continue
|
||||
return
|
||||
|
||||
input_schema = json.dumps(block.input_schema.jsonschema())
|
||||
output_schema = json.dumps(block.output_schema.jsonschema())
|
||||
@@ -913,6 +912,25 @@ async def initialize_blocks() -> None:
|
||||
},
|
||||
)
|
||||
|
||||
failed_blocks: list[str] = []
|
||||
for cls in get_blocks().values():
|
||||
block = cls()
|
||||
try:
|
||||
await sync_block_to_db(block)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to sync block {block.name} to database: {e}. "
|
||||
"Block is still available in memory.",
|
||||
exc_info=True,
|
||||
)
|
||||
failed_blocks.append(block.name)
|
||||
|
||||
if failed_blocks:
|
||||
logger.error(
|
||||
f"Failed to sync {len(failed_blocks)} block(s) to database: "
|
||||
f"{', '.join(failed_blocks)}. These blocks are still available in memory."
|
||||
)
|
||||
|
||||
|
||||
# Note on the return type annotation: https://github.com/microsoft/pyright/issues/10281
|
||||
def get_block(block_id: str) -> AnyBlockSchema | None:
|
||||
|
||||
@@ -133,10 +133,23 @@ class RedisEventBus(BaseRedisEventBus[M], ABC):
|
||||
|
||||
|
||||
class AsyncRedisEventBus(BaseRedisEventBus[M], ABC):
|
||||
def __init__(self):
|
||||
self._pubsub: AsyncPubSub | None = None
|
||||
|
||||
@property
|
||||
async def connection(self) -> redis.AsyncRedis:
|
||||
return await redis.get_redis_async()
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the PubSub connection if it exists."""
|
||||
if self._pubsub is not None:
|
||||
try:
|
||||
await self._pubsub.close()
|
||||
except Exception:
|
||||
logger.warning("Failed to close PubSub connection", exc_info=True)
|
||||
finally:
|
||||
self._pubsub = None
|
||||
|
||||
async def publish_event(self, event: M, channel_key: str):
|
||||
"""
|
||||
Publish an event to Redis. Gracefully handles connection failures
|
||||
@@ -157,6 +170,7 @@ class AsyncRedisEventBus(BaseRedisEventBus[M], ABC):
|
||||
await self.connection, channel_key
|
||||
)
|
||||
assert isinstance(pubsub, AsyncPubSub)
|
||||
self._pubsub = pubsub
|
||||
|
||||
if "*" in channel_key:
|
||||
await pubsub.psubscribe(full_channel_name)
|
||||
|
||||
@@ -119,9 +119,7 @@ def library_agent_include(
|
||||
if include_executions:
|
||||
agent_graph_include["Executions"] = {
|
||||
"where": {"userId": user_id},
|
||||
"order_by": {
|
||||
"updatedAt": "desc"
|
||||
}, # Uses updatedAt because it reflects when the executioncompleted or last progressed
|
||||
"order_by": {"createdAt": "desc"},
|
||||
"take": execution_limit,
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ from backend.data.analytics import (
|
||||
get_accuracy_trends_and_alerts,
|
||||
get_marketplace_graphs_for_monitoring,
|
||||
)
|
||||
from backend.data.auth.oauth import cleanup_expired_oauth_tokens
|
||||
from backend.data.credit import UsageTransactionMetadata, get_user_credit_model
|
||||
from backend.data.execution import (
|
||||
create_graph_execution,
|
||||
@@ -219,6 +220,9 @@ class DatabaseManager(AppService):
|
||||
# Onboarding
|
||||
increment_onboarding_runs = _(increment_onboarding_runs)
|
||||
|
||||
# OAuth
|
||||
cleanup_expired_oauth_tokens = _(cleanup_expired_oauth_tokens)
|
||||
|
||||
# Store
|
||||
get_store_agents = _(get_store_agents)
|
||||
get_store_agent_details = _(get_store_agent_details)
|
||||
@@ -349,6 +353,9 @@ class DatabaseManagerAsyncClient(AppServiceClient):
|
||||
# Onboarding
|
||||
increment_onboarding_runs = d.increment_onboarding_runs
|
||||
|
||||
# OAuth
|
||||
cleanup_expired_oauth_tokens = d.cleanup_expired_oauth_tokens
|
||||
|
||||
# Store
|
||||
get_store_agents = d.get_store_agents
|
||||
get_store_agent_details = d.get_store_agent_details
|
||||
|
||||
@@ -24,11 +24,9 @@ from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field, ValidationError
|
||||
from sqlalchemy import MetaData, create_engine
|
||||
|
||||
from backend.data.auth.oauth import cleanup_expired_oauth_tokens
|
||||
from backend.data.block import BlockInput
|
||||
from backend.data.execution import GraphExecutionWithNodes
|
||||
from backend.data.model import CredentialsMetaInput
|
||||
from backend.data.onboarding import increment_onboarding_runs
|
||||
from backend.executor import utils as execution_utils
|
||||
from backend.monitoring import (
|
||||
NotificationJobArgs,
|
||||
@@ -38,7 +36,11 @@ from backend.monitoring import (
|
||||
report_execution_accuracy_alerts,
|
||||
report_late_executions,
|
||||
)
|
||||
from backend.util.clients import get_database_manager_client, get_scheduler_client
|
||||
from backend.util.clients import (
|
||||
get_database_manager_async_client,
|
||||
get_database_manager_client,
|
||||
get_scheduler_client,
|
||||
)
|
||||
from backend.util.cloud_storage import cleanup_expired_files_async
|
||||
from backend.util.exceptions import (
|
||||
GraphNotFoundError,
|
||||
@@ -148,6 +150,7 @@ def execute_graph(**kwargs):
|
||||
async def _execute_graph(**kwargs):
|
||||
args = GraphExecutionJobArgs(**kwargs)
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
db = get_database_manager_async_client()
|
||||
try:
|
||||
logger.info(f"Executing recurring job for graph #{args.graph_id}")
|
||||
graph_exec: GraphExecutionWithNodes = await execution_utils.add_graph_execution(
|
||||
@@ -157,7 +160,7 @@ async def _execute_graph(**kwargs):
|
||||
inputs=args.input_data,
|
||||
graph_credentials_inputs=args.input_credentials,
|
||||
)
|
||||
await increment_onboarding_runs(args.user_id)
|
||||
await db.increment_onboarding_runs(args.user_id)
|
||||
elapsed = asyncio.get_event_loop().time() - start_time
|
||||
logger.info(
|
||||
f"Graph execution started with ID {graph_exec.id} for graph {args.graph_id} "
|
||||
@@ -246,8 +249,13 @@ def cleanup_expired_files():
|
||||
|
||||
def cleanup_oauth_tokens():
|
||||
"""Clean up expired OAuth tokens from the database."""
|
||||
|
||||
# Wait for completion
|
||||
run_async(cleanup_expired_oauth_tokens())
|
||||
async def _cleanup():
|
||||
db = get_database_manager_async_client()
|
||||
return await db.cleanup_expired_oauth_tokens()
|
||||
|
||||
run_async(_cleanup())
|
||||
|
||||
|
||||
def execution_accuracy_alerts():
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import fastapi
|
||||
from fastapi.routing import APIRoute
|
||||
|
||||
from backend.api.features.integrations.router import router as integrations_router
|
||||
from backend.integrations.providers import ProviderName
|
||||
from backend.integrations.webhooks import utils as webhooks_utils
|
||||
|
||||
|
||||
def test_webhook_ingress_url_matches_route(monkeypatch) -> None:
|
||||
app = fastapi.FastAPI()
|
||||
app.include_router(integrations_router, prefix="/api/integrations")
|
||||
|
||||
provider = ProviderName.GITHUB
|
||||
webhook_id = "webhook_123"
|
||||
base_url = "https://example.com"
|
||||
|
||||
monkeypatch.setattr(webhooks_utils.app_config, "platform_base_url", base_url)
|
||||
|
||||
route = next(
|
||||
route
|
||||
for route in integrations_router.routes
|
||||
if isinstance(route, APIRoute)
|
||||
and route.path == "/{provider}/webhooks/{webhook_id}/ingress"
|
||||
and "POST" in route.methods
|
||||
)
|
||||
expected_path = f"/api/integrations{route.path}".format(
|
||||
provider=provider.value,
|
||||
webhook_id=webhook_id,
|
||||
)
|
||||
actual_url = urlparse(webhooks_utils.webhook_ingress_url(provider, webhook_id))
|
||||
expected_base = urlparse(base_url)
|
||||
|
||||
assert (actual_url.scheme, actual_url.netloc) == (
|
||||
expected_base.scheme,
|
||||
expected_base.netloc,
|
||||
)
|
||||
assert actual_url.path == expected_path
|
||||
@@ -1,10 +1,19 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
from typing import Any
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from tiktoken import encoding_for_model
|
||||
|
||||
from backend.util import json
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------#
|
||||
# CONSTANTS #
|
||||
# ---------------------------------------------------------------------------#
|
||||
@@ -100,9 +109,17 @@ def _is_objective_message(msg: dict) -> bool:
|
||||
def _truncate_tool_message_content(msg: dict, enc, max_tokens: int) -> None:
|
||||
"""
|
||||
Carefully truncate tool message content while preserving tool structure.
|
||||
Only truncates tool_result content, leaves tool_use intact.
|
||||
Handles both Anthropic-style (list content) and OpenAI-style (string content) tool messages.
|
||||
"""
|
||||
content = msg.get("content")
|
||||
|
||||
# OpenAI-style tool message: role="tool" with string content
|
||||
if msg.get("role") == "tool" and isinstance(content, str):
|
||||
if _tok_len(content, enc) > max_tokens:
|
||||
msg["content"] = _truncate_middle_tokens(content, enc, max_tokens)
|
||||
return
|
||||
|
||||
# Anthropic-style: list content with tool_result items
|
||||
if not isinstance(content, list):
|
||||
return
|
||||
|
||||
@@ -140,141 +157,6 @@ def _truncate_middle_tokens(text: str, enc, max_tok: int) -> str:
|
||||
# ---------------------------------------------------------------------------#
|
||||
|
||||
|
||||
def compress_prompt(
|
||||
messages: list[dict],
|
||||
target_tokens: int,
|
||||
*,
|
||||
model: str = "gpt-4o",
|
||||
reserve: int = 2_048,
|
||||
start_cap: int = 8_192,
|
||||
floor_cap: int = 128,
|
||||
lossy_ok: bool = True,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Shrink *messages* so that::
|
||||
|
||||
token_count(prompt) + reserve ≤ target_tokens
|
||||
|
||||
Strategy
|
||||
--------
|
||||
1. **Token-aware truncation** – progressively halve a per-message cap
|
||||
(`start_cap`, `start_cap/2`, … `floor_cap`) and apply it to the
|
||||
*content* of every message except the first and last. Tool shells
|
||||
are included: we keep the envelope but shorten huge payloads.
|
||||
2. **Middle-out deletion** – if still over the limit, delete whole
|
||||
messages working outward from the centre, **skipping** any message
|
||||
that contains ``tool_calls`` or has ``role == "tool"``.
|
||||
3. **Last-chance trim** – if still too big, truncate the *first* and
|
||||
*last* message bodies down to `floor_cap` tokens.
|
||||
4. If the prompt is *still* too large:
|
||||
• raise ``ValueError`` when ``lossy_ok == False`` (default)
|
||||
• return the partially-trimmed prompt when ``lossy_ok == True``
|
||||
|
||||
Parameters
|
||||
----------
|
||||
messages Complete chat history (will be deep-copied).
|
||||
model Model name; passed to tiktoken to pick the right
|
||||
tokenizer (gpt-4o → 'o200k_base', others fallback).
|
||||
target_tokens Hard ceiling for prompt size **excluding** the model's
|
||||
forthcoming answer.
|
||||
reserve How many tokens you want to leave available for that
|
||||
answer (`max_tokens` in your subsequent completion call).
|
||||
start_cap Initial per-message truncation ceiling (tokens).
|
||||
floor_cap Lowest cap we'll accept before moving to deletions.
|
||||
lossy_ok If *True* return best-effort prompt instead of raising
|
||||
after all trim passes have been exhausted.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list[dict] – A *new* messages list that abides by the rules above.
|
||||
"""
|
||||
enc = encoding_for_model(model) # best-match tokenizer
|
||||
msgs = deepcopy(messages) # never mutate caller
|
||||
|
||||
def total_tokens() -> int:
|
||||
"""Current size of *msgs* in tokens."""
|
||||
return sum(_msg_tokens(m, enc) for m in msgs)
|
||||
|
||||
original_token_count = total_tokens()
|
||||
|
||||
if original_token_count + reserve <= target_tokens:
|
||||
return msgs
|
||||
|
||||
# ---- STEP 0 : normalise content --------------------------------------
|
||||
# Convert non-string payloads to strings so token counting is coherent.
|
||||
for i, m in enumerate(msgs):
|
||||
if not isinstance(m.get("content"), str) and m.get("content") is not None:
|
||||
if _is_tool_message(m):
|
||||
continue
|
||||
|
||||
# Keep first and last messages intact (unless they're tool messages)
|
||||
if i == 0 or i == len(msgs) - 1:
|
||||
continue
|
||||
|
||||
# Reasonable 20k-char ceiling prevents pathological blobs
|
||||
content_str = json.dumps(m["content"], separators=(",", ":"))
|
||||
if len(content_str) > 20_000:
|
||||
content_str = _truncate_middle_tokens(content_str, enc, 20_000)
|
||||
m["content"] = content_str
|
||||
|
||||
# ---- STEP 1 : token-aware truncation ---------------------------------
|
||||
cap = start_cap
|
||||
while total_tokens() + reserve > target_tokens and cap >= floor_cap:
|
||||
for m in msgs[1:-1]: # keep first & last intact
|
||||
if _is_tool_message(m):
|
||||
# For tool messages, only truncate tool result content, preserve structure
|
||||
_truncate_tool_message_content(m, enc, cap)
|
||||
continue
|
||||
|
||||
if _is_objective_message(m):
|
||||
# Never truncate objective messages - they contain the core task
|
||||
continue
|
||||
|
||||
content = m.get("content") or ""
|
||||
if _tok_len(content, enc) > cap:
|
||||
m["content"] = _truncate_middle_tokens(content, enc, cap)
|
||||
cap //= 2 # tighten the screw
|
||||
|
||||
# ---- STEP 2 : middle-out deletion -----------------------------------
|
||||
while total_tokens() + reserve > target_tokens and len(msgs) > 2:
|
||||
# Identify all deletable messages (not first/last, not tool messages, not objective messages)
|
||||
deletable_indices = []
|
||||
for i in range(1, len(msgs) - 1): # Skip first and last
|
||||
if not _is_tool_message(msgs[i]) and not _is_objective_message(msgs[i]):
|
||||
deletable_indices.append(i)
|
||||
|
||||
if not deletable_indices:
|
||||
break # nothing more we can drop
|
||||
|
||||
# Delete from center outward - find the index closest to center
|
||||
centre = len(msgs) // 2
|
||||
to_delete = min(deletable_indices, key=lambda i: abs(i - centre))
|
||||
del msgs[to_delete]
|
||||
|
||||
# ---- STEP 3 : final safety-net trim on first & last ------------------
|
||||
cap = start_cap
|
||||
while total_tokens() + reserve > target_tokens and cap >= floor_cap:
|
||||
for idx in (0, -1): # first and last
|
||||
if _is_tool_message(msgs[idx]):
|
||||
# For tool messages at first/last position, truncate tool result content only
|
||||
_truncate_tool_message_content(msgs[idx], enc, cap)
|
||||
continue
|
||||
|
||||
text = msgs[idx].get("content") or ""
|
||||
if _tok_len(text, enc) > cap:
|
||||
msgs[idx]["content"] = _truncate_middle_tokens(text, enc, cap)
|
||||
cap //= 2 # tighten the screw
|
||||
|
||||
# ---- STEP 4 : success or fail-gracefully -----------------------------
|
||||
if total_tokens() + reserve > target_tokens and not lossy_ok:
|
||||
raise ValueError(
|
||||
"compress_prompt: prompt still exceeds budget "
|
||||
f"({total_tokens() + reserve} > {target_tokens})."
|
||||
)
|
||||
|
||||
return msgs
|
||||
|
||||
|
||||
def estimate_token_count(
|
||||
messages: list[dict],
|
||||
*,
|
||||
@@ -293,7 +175,8 @@ def estimate_token_count(
|
||||
-------
|
||||
int – Token count.
|
||||
"""
|
||||
enc = encoding_for_model(model) # best-match tokenizer
|
||||
token_model = _normalize_model_for_tokenizer(model)
|
||||
enc = encoding_for_model(token_model)
|
||||
return sum(_msg_tokens(m, enc) for m in messages)
|
||||
|
||||
|
||||
@@ -315,6 +198,543 @@ def estimate_token_count_str(
|
||||
-------
|
||||
int – Token count.
|
||||
"""
|
||||
enc = encoding_for_model(model) # best-match tokenizer
|
||||
token_model = _normalize_model_for_tokenizer(model)
|
||||
enc = encoding_for_model(token_model)
|
||||
text = json.dumps(text) if not isinstance(text, str) else text
|
||||
return _tok_len(text, enc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------#
|
||||
# UNIFIED CONTEXT COMPRESSION #
|
||||
# ---------------------------------------------------------------------------#
|
||||
|
||||
# Default thresholds
|
||||
DEFAULT_TOKEN_THRESHOLD = 120_000
|
||||
DEFAULT_KEEP_RECENT = 15
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompressResult:
|
||||
"""Result of context compression."""
|
||||
|
||||
messages: list[dict]
|
||||
token_count: int
|
||||
was_compacted: bool
|
||||
error: str | None = None
|
||||
original_token_count: int = 0
|
||||
messages_summarized: int = 0
|
||||
messages_dropped: int = 0
|
||||
|
||||
|
||||
def _normalize_model_for_tokenizer(model: str) -> str:
|
||||
"""Normalize model name for tiktoken tokenizer selection."""
|
||||
if "/" in model:
|
||||
model = model.split("/")[-1]
|
||||
if "claude" in model.lower() or not any(
|
||||
known in model.lower() for known in ["gpt", "o1", "chatgpt", "text-"]
|
||||
):
|
||||
return "gpt-4o"
|
||||
return model
|
||||
|
||||
|
||||
def _extract_tool_call_ids_from_message(msg: dict) -> set[str]:
|
||||
"""
|
||||
Extract tool_call IDs from an assistant message.
|
||||
|
||||
Supports both formats:
|
||||
- OpenAI: {"role": "assistant", "tool_calls": [{"id": "..."}]}
|
||||
- Anthropic: {"role": "assistant", "content": [{"type": "tool_use", "id": "..."}]}
|
||||
|
||||
Returns:
|
||||
Set of tool_call IDs found in the message.
|
||||
"""
|
||||
ids: set[str] = set()
|
||||
if msg.get("role") != "assistant":
|
||||
return ids
|
||||
|
||||
# OpenAI format: tool_calls array
|
||||
if msg.get("tool_calls"):
|
||||
for tc in msg["tool_calls"]:
|
||||
tc_id = tc.get("id")
|
||||
if tc_id:
|
||||
ids.add(tc_id)
|
||||
|
||||
# Anthropic format: content list with tool_use blocks
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "tool_use":
|
||||
tc_id = block.get("id")
|
||||
if tc_id:
|
||||
ids.add(tc_id)
|
||||
|
||||
return ids
|
||||
|
||||
|
||||
def _extract_tool_response_ids_from_message(msg: dict) -> set[str]:
|
||||
"""
|
||||
Extract tool_call IDs that this message is responding to.
|
||||
|
||||
Supports both formats:
|
||||
- OpenAI: {"role": "tool", "tool_call_id": "..."}
|
||||
- Anthropic: {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "..."}]}
|
||||
|
||||
Returns:
|
||||
Set of tool_call IDs this message responds to.
|
||||
"""
|
||||
ids: set[str] = set()
|
||||
|
||||
# OpenAI format: role=tool with tool_call_id
|
||||
if msg.get("role") == "tool":
|
||||
tc_id = msg.get("tool_call_id")
|
||||
if tc_id:
|
||||
ids.add(tc_id)
|
||||
|
||||
# Anthropic format: content list with tool_result blocks
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "tool_result":
|
||||
tc_id = block.get("tool_use_id")
|
||||
if tc_id:
|
||||
ids.add(tc_id)
|
||||
|
||||
return ids
|
||||
|
||||
|
||||
def _is_tool_response_message(msg: dict) -> bool:
|
||||
"""Check if message is a tool response (OpenAI or Anthropic format)."""
|
||||
# OpenAI format
|
||||
if msg.get("role") == "tool":
|
||||
return True
|
||||
# Anthropic format
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "tool_result":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _remove_orphan_tool_responses(
|
||||
messages: list[dict], orphan_ids: set[str]
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Remove tool response messages/blocks that reference orphan tool_call IDs.
|
||||
|
||||
Supports both OpenAI and Anthropic formats.
|
||||
For Anthropic messages with mixed valid/orphan tool_result blocks,
|
||||
filters out only the orphan blocks instead of dropping the entire message.
|
||||
"""
|
||||
result = []
|
||||
for msg in messages:
|
||||
# OpenAI format: role=tool - drop entire message if orphan
|
||||
if msg.get("role") == "tool":
|
||||
tc_id = msg.get("tool_call_id")
|
||||
if tc_id and tc_id in orphan_ids:
|
||||
continue
|
||||
result.append(msg)
|
||||
continue
|
||||
|
||||
# Anthropic format: content list may have mixed tool_result blocks
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
has_tool_results = any(
|
||||
isinstance(b, dict) and b.get("type") == "tool_result" for b in content
|
||||
)
|
||||
if has_tool_results:
|
||||
# Filter out orphan tool_result blocks, keep valid ones
|
||||
filtered_content = [
|
||||
block
|
||||
for block in content
|
||||
if not (
|
||||
isinstance(block, dict)
|
||||
and block.get("type") == "tool_result"
|
||||
and block.get("tool_use_id") in orphan_ids
|
||||
)
|
||||
]
|
||||
# Only keep message if it has remaining content
|
||||
if filtered_content:
|
||||
msg = msg.copy()
|
||||
msg["content"] = filtered_content
|
||||
result.append(msg)
|
||||
continue
|
||||
|
||||
result.append(msg)
|
||||
return result
|
||||
|
||||
|
||||
def _ensure_tool_pairs_intact(
|
||||
recent_messages: list[dict],
|
||||
all_messages: list[dict],
|
||||
start_index: int,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Ensure tool_call/tool_response pairs stay together after slicing.
|
||||
|
||||
When slicing messages for context compaction, a naive slice can separate
|
||||
an assistant message containing tool_calls from its corresponding tool
|
||||
response messages. This causes API validation errors (e.g., Anthropic's
|
||||
"unexpected tool_use_id found in tool_result blocks").
|
||||
|
||||
This function checks for orphan tool responses in the slice and extends
|
||||
backwards to include their corresponding assistant messages.
|
||||
|
||||
Supports both formats:
|
||||
- OpenAI: tool_calls array + role="tool" responses
|
||||
- Anthropic: tool_use blocks + tool_result blocks
|
||||
|
||||
Args:
|
||||
recent_messages: The sliced messages to validate
|
||||
all_messages: The complete message list (for looking up missing assistants)
|
||||
start_index: The index in all_messages where recent_messages begins
|
||||
|
||||
Returns:
|
||||
A potentially extended list of messages with tool pairs intact
|
||||
"""
|
||||
if not recent_messages:
|
||||
return recent_messages
|
||||
|
||||
# Collect all tool_call_ids from assistant messages in the slice
|
||||
available_tool_call_ids: set[str] = set()
|
||||
for msg in recent_messages:
|
||||
available_tool_call_ids |= _extract_tool_call_ids_from_message(msg)
|
||||
|
||||
# Find orphan tool responses (responses whose tool_call_id is missing)
|
||||
orphan_tool_call_ids: set[str] = set()
|
||||
for msg in recent_messages:
|
||||
response_ids = _extract_tool_response_ids_from_message(msg)
|
||||
for tc_id in response_ids:
|
||||
if tc_id not in available_tool_call_ids:
|
||||
orphan_tool_call_ids.add(tc_id)
|
||||
|
||||
if not orphan_tool_call_ids:
|
||||
# No orphans, slice is valid
|
||||
return recent_messages
|
||||
|
||||
# Find the assistant messages that contain the orphan tool_call_ids
|
||||
# Search backwards from start_index in all_messages
|
||||
messages_to_prepend: list[dict] = []
|
||||
for i in range(start_index - 1, -1, -1):
|
||||
msg = all_messages[i]
|
||||
msg_tool_ids = _extract_tool_call_ids_from_message(msg)
|
||||
if msg_tool_ids & orphan_tool_call_ids:
|
||||
# This assistant message has tool_calls we need
|
||||
# Also collect its contiguous tool responses that follow it
|
||||
assistant_and_responses: list[dict] = [msg]
|
||||
|
||||
# Scan forward from this assistant to collect tool responses
|
||||
for j in range(i + 1, start_index):
|
||||
following_msg = all_messages[j]
|
||||
following_response_ids = _extract_tool_response_ids_from_message(
|
||||
following_msg
|
||||
)
|
||||
if following_response_ids and following_response_ids & msg_tool_ids:
|
||||
assistant_and_responses.append(following_msg)
|
||||
elif not _is_tool_response_message(following_msg):
|
||||
# Stop at first non-tool-response message
|
||||
break
|
||||
|
||||
# Prepend the assistant and its tool responses (maintain order)
|
||||
messages_to_prepend = assistant_and_responses + messages_to_prepend
|
||||
# Mark these as found
|
||||
orphan_tool_call_ids -= msg_tool_ids
|
||||
# Also add this assistant's tool_call_ids to available set
|
||||
available_tool_call_ids |= msg_tool_ids
|
||||
|
||||
if not orphan_tool_call_ids:
|
||||
# Found all missing assistants
|
||||
break
|
||||
|
||||
if orphan_tool_call_ids:
|
||||
# Some tool_call_ids couldn't be resolved - remove those tool responses
|
||||
# This shouldn't happen in normal operation but handles edge cases
|
||||
logger.warning(
|
||||
f"Could not find assistant messages for tool_call_ids: {orphan_tool_call_ids}. "
|
||||
"Removing orphan tool responses."
|
||||
)
|
||||
recent_messages = _remove_orphan_tool_responses(
|
||||
recent_messages, orphan_tool_call_ids
|
||||
)
|
||||
|
||||
if messages_to_prepend:
|
||||
logger.info(
|
||||
f"Extended recent messages by {len(messages_to_prepend)} to preserve "
|
||||
f"tool_call/tool_response pairs"
|
||||
)
|
||||
return messages_to_prepend + recent_messages
|
||||
|
||||
return recent_messages
|
||||
|
||||
|
||||
async def _summarize_messages_llm(
|
||||
messages: list[dict],
|
||||
client: AsyncOpenAI,
|
||||
model: str,
|
||||
timeout: float = 30.0,
|
||||
) -> str:
|
||||
"""Summarize messages using an LLM."""
|
||||
conversation = []
|
||||
for msg in messages:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content", "")
|
||||
if content and role in ("user", "assistant", "tool"):
|
||||
conversation.append(f"{role.upper()}: {content}")
|
||||
|
||||
conversation_text = "\n\n".join(conversation)
|
||||
|
||||
if not conversation_text:
|
||||
return "No conversation history available."
|
||||
|
||||
# Limit to ~100k chars for safety
|
||||
MAX_CHARS = 100_000
|
||||
if len(conversation_text) > MAX_CHARS:
|
||||
conversation_text = conversation_text[:MAX_CHARS] + "\n\n[truncated]"
|
||||
|
||||
response = await client.with_options(timeout=timeout).chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"Create a detailed summary of the conversation so far. "
|
||||
"This summary will be used as context when continuing the conversation.\n\n"
|
||||
"Before writing the summary, analyze each message chronologically to identify:\n"
|
||||
"- User requests and their explicit goals\n"
|
||||
"- Your approach and key decisions made\n"
|
||||
"- Technical specifics (file names, tool outputs, function signatures)\n"
|
||||
"- Errors encountered and resolutions applied\n\n"
|
||||
"You MUST include ALL of the following sections:\n\n"
|
||||
"## 1. Primary Request and Intent\n"
|
||||
"The user's explicit goals and what they are trying to accomplish.\n\n"
|
||||
"## 2. Key Technical Concepts\n"
|
||||
"Technologies, frameworks, tools, and patterns being used or discussed.\n\n"
|
||||
"## 3. Files and Resources Involved\n"
|
||||
"Specific files examined or modified, with relevant snippets and identifiers.\n\n"
|
||||
"## 4. Errors and Fixes\n"
|
||||
"Problems encountered, error messages, and their resolutions. "
|
||||
"Include any user feedback on fixes.\n\n"
|
||||
"## 5. Problem Solving\n"
|
||||
"Issues that have been resolved and how they were addressed.\n\n"
|
||||
"## 6. All User Messages\n"
|
||||
"A complete list of all user inputs (excluding tool outputs) to preserve their exact requests.\n\n"
|
||||
"## 7. Pending Tasks\n"
|
||||
"Work items the user explicitly requested that have not yet been completed.\n\n"
|
||||
"## 8. Current Work\n"
|
||||
"Precise description of what was being worked on most recently, including relevant context.\n\n"
|
||||
"## 9. Next Steps\n"
|
||||
"What should happen next, aligned with the user's most recent requests. "
|
||||
"Include verbatim quotes of recent instructions if relevant."
|
||||
),
|
||||
},
|
||||
{"role": "user", "content": f"Summarize:\n\n{conversation_text}"},
|
||||
],
|
||||
max_tokens=1500,
|
||||
temperature=0.3,
|
||||
)
|
||||
|
||||
return response.choices[0].message.content or "No summary available."
|
||||
|
||||
|
||||
async def compress_context(
|
||||
messages: list[dict],
|
||||
target_tokens: int = DEFAULT_TOKEN_THRESHOLD,
|
||||
*,
|
||||
model: str = "gpt-4o",
|
||||
client: AsyncOpenAI | None = None,
|
||||
keep_recent: int = DEFAULT_KEEP_RECENT,
|
||||
reserve: int = 2_048,
|
||||
start_cap: int = 8_192,
|
||||
floor_cap: int = 128,
|
||||
) -> CompressResult:
|
||||
"""
|
||||
Unified context compression that combines summarization and truncation strategies.
|
||||
|
||||
Strategy (in order):
|
||||
1. **LLM summarization** – If client provided, summarize old messages into a
|
||||
single context message while keeping recent messages intact. This is the
|
||||
primary strategy for chat service.
|
||||
2. **Content truncation** – Progressively halve a per-message cap and truncate
|
||||
bloated message content (tool outputs, large pastes). Preserves all messages
|
||||
but shortens their content. Primary strategy when client=None (LLM blocks).
|
||||
3. **Middle-out deletion** – Delete whole messages one at a time from the center
|
||||
outward, skipping tool messages and objective messages.
|
||||
4. **First/last trim** – Truncate first and last message content as last resort.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
messages Complete chat history (will be deep-copied).
|
||||
target_tokens Hard ceiling for prompt size.
|
||||
model Model name for tokenization and summarization.
|
||||
client AsyncOpenAI client. If provided, enables LLM summarization
|
||||
as the first strategy. If None, skips to truncation strategies.
|
||||
keep_recent Number of recent messages to preserve during summarization.
|
||||
reserve Tokens to reserve for model response.
|
||||
start_cap Initial per-message truncation ceiling (tokens).
|
||||
floor_cap Lowest cap before moving to deletions.
|
||||
|
||||
Returns
|
||||
-------
|
||||
CompressResult with compressed messages and metadata.
|
||||
"""
|
||||
# Guard clause for empty messages
|
||||
if not messages:
|
||||
return CompressResult(
|
||||
messages=[],
|
||||
token_count=0,
|
||||
was_compacted=False,
|
||||
original_token_count=0,
|
||||
)
|
||||
|
||||
token_model = _normalize_model_for_tokenizer(model)
|
||||
enc = encoding_for_model(token_model)
|
||||
msgs = deepcopy(messages)
|
||||
|
||||
def total_tokens() -> int:
|
||||
return sum(_msg_tokens(m, enc) for m in msgs)
|
||||
|
||||
original_count = total_tokens()
|
||||
|
||||
# Already under limit
|
||||
if original_count + reserve <= target_tokens:
|
||||
return CompressResult(
|
||||
messages=msgs,
|
||||
token_count=original_count,
|
||||
was_compacted=False,
|
||||
original_token_count=original_count,
|
||||
)
|
||||
|
||||
messages_summarized = 0
|
||||
messages_dropped = 0
|
||||
|
||||
# ---- STEP 1: LLM summarization (if client provided) -------------------
|
||||
# This is the primary compression strategy for chat service.
|
||||
# Summarize old messages while keeping recent ones intact.
|
||||
if client is not None:
|
||||
has_system = len(msgs) > 0 and msgs[0].get("role") == "system"
|
||||
system_msg = msgs[0] if has_system else None
|
||||
|
||||
# Calculate old vs recent messages
|
||||
if has_system:
|
||||
if len(msgs) > keep_recent + 1:
|
||||
old_msgs = msgs[1:-keep_recent]
|
||||
recent_msgs = msgs[-keep_recent:]
|
||||
else:
|
||||
old_msgs = []
|
||||
recent_msgs = msgs[1:] if len(msgs) > 1 else []
|
||||
else:
|
||||
if len(msgs) > keep_recent:
|
||||
old_msgs = msgs[:-keep_recent]
|
||||
recent_msgs = msgs[-keep_recent:]
|
||||
else:
|
||||
old_msgs = []
|
||||
recent_msgs = msgs
|
||||
|
||||
# Ensure tool pairs stay intact
|
||||
slice_start = max(0, len(msgs) - keep_recent)
|
||||
recent_msgs = _ensure_tool_pairs_intact(recent_msgs, msgs, slice_start)
|
||||
|
||||
if old_msgs:
|
||||
try:
|
||||
summary_text = await _summarize_messages_llm(old_msgs, client, model)
|
||||
summary_msg = {
|
||||
"role": "assistant",
|
||||
"content": f"[Previous conversation summary — for context only]: {summary_text}",
|
||||
}
|
||||
messages_summarized = len(old_msgs)
|
||||
|
||||
if has_system:
|
||||
msgs = [system_msg, summary_msg] + recent_msgs
|
||||
else:
|
||||
msgs = [summary_msg] + recent_msgs
|
||||
|
||||
logger.info(
|
||||
f"Context summarized: {original_count} -> {total_tokens()} tokens, "
|
||||
f"summarized {messages_summarized} messages"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Summarization failed, continuing with truncation: {e}")
|
||||
# Fall through to content truncation
|
||||
|
||||
# ---- STEP 2: Normalize content ----------------------------------------
|
||||
# Convert non-string payloads to strings so token counting is coherent.
|
||||
# Always run this before truncation to ensure consistent token counting.
|
||||
for i, m in enumerate(msgs):
|
||||
if not isinstance(m.get("content"), str) and m.get("content") is not None:
|
||||
if _is_tool_message(m):
|
||||
continue
|
||||
if i == 0 or i == len(msgs) - 1:
|
||||
continue
|
||||
content_str = json.dumps(m["content"], separators=(",", ":"))
|
||||
if len(content_str) > 20_000:
|
||||
content_str = _truncate_middle_tokens(content_str, enc, 20_000)
|
||||
m["content"] = content_str
|
||||
|
||||
# ---- STEP 3: Token-aware content truncation ---------------------------
|
||||
# Progressively halve per-message cap and truncate bloated content.
|
||||
# This preserves all messages but shortens their content.
|
||||
cap = start_cap
|
||||
while total_tokens() + reserve > target_tokens and cap >= floor_cap:
|
||||
for m in msgs[1:-1]:
|
||||
if _is_tool_message(m):
|
||||
_truncate_tool_message_content(m, enc, cap)
|
||||
continue
|
||||
if _is_objective_message(m):
|
||||
continue
|
||||
content = m.get("content") or ""
|
||||
if _tok_len(content, enc) > cap:
|
||||
m["content"] = _truncate_middle_tokens(content, enc, cap)
|
||||
cap //= 2
|
||||
|
||||
# ---- STEP 4: Middle-out deletion --------------------------------------
|
||||
# Delete messages one at a time from the center outward.
|
||||
# This is more granular than dropping all old messages at once.
|
||||
while total_tokens() + reserve > target_tokens and len(msgs) > 2:
|
||||
deletable: list[int] = []
|
||||
for i in range(1, len(msgs) - 1):
|
||||
msg = msgs[i]
|
||||
if (
|
||||
msg is not None
|
||||
and not _is_tool_message(msg)
|
||||
and not _is_objective_message(msg)
|
||||
):
|
||||
deletable.append(i)
|
||||
if not deletable:
|
||||
break
|
||||
centre = len(msgs) // 2
|
||||
to_delete = min(deletable, key=lambda i: abs(i - centre))
|
||||
del msgs[to_delete]
|
||||
messages_dropped += 1
|
||||
|
||||
# ---- STEP 5: Final trim on first/last ---------------------------------
|
||||
cap = start_cap
|
||||
while total_tokens() + reserve > target_tokens and cap >= floor_cap:
|
||||
for idx in (0, -1):
|
||||
msg = msgs[idx]
|
||||
if msg is None:
|
||||
continue
|
||||
if _is_tool_message(msg):
|
||||
_truncate_tool_message_content(msg, enc, cap)
|
||||
continue
|
||||
text = msg.get("content") or ""
|
||||
if _tok_len(text, enc) > cap:
|
||||
msg["content"] = _truncate_middle_tokens(text, enc, cap)
|
||||
cap //= 2
|
||||
|
||||
# Filter out any None values that may have been introduced
|
||||
final_msgs: list[dict] = [m for m in msgs if m is not None]
|
||||
final_count = sum(_msg_tokens(m, enc) for m in final_msgs)
|
||||
error = None
|
||||
if final_count + reserve > target_tokens:
|
||||
error = f"Could not compress below target ({final_count + reserve} > {target_tokens})"
|
||||
logger.warning(error)
|
||||
|
||||
return CompressResult(
|
||||
messages=final_msgs,
|
||||
token_count=final_count,
|
||||
was_compacted=True,
|
||||
error=error,
|
||||
original_token_count=original_count,
|
||||
messages_summarized=messages_summarized,
|
||||
messages_dropped=messages_dropped,
|
||||
)
|
||||
|
||||
@@ -1,10 +1,21 @@
|
||||
"""Tests for prompt utility functions, especially tool call token counting."""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from tiktoken import encoding_for_model
|
||||
|
||||
from backend.util import json
|
||||
from backend.util.prompt import _msg_tokens, estimate_token_count
|
||||
from backend.util.prompt import (
|
||||
CompressResult,
|
||||
_ensure_tool_pairs_intact,
|
||||
_msg_tokens,
|
||||
_normalize_model_for_tokenizer,
|
||||
_truncate_middle_tokens,
|
||||
_truncate_tool_message_content,
|
||||
compress_context,
|
||||
estimate_token_count,
|
||||
)
|
||||
|
||||
|
||||
class TestMsgTokens:
|
||||
@@ -276,3 +287,690 @@ class TestEstimateTokenCount:
|
||||
|
||||
assert total_tokens == expected_total
|
||||
assert total_tokens > 20 # Should be substantial
|
||||
|
||||
|
||||
class TestNormalizeModelForTokenizer:
|
||||
"""Test model name normalization for tiktoken."""
|
||||
|
||||
def test_openai_models_unchanged(self):
|
||||
"""Test that OpenAI models are returned as-is."""
|
||||
assert _normalize_model_for_tokenizer("gpt-4o") == "gpt-4o"
|
||||
assert _normalize_model_for_tokenizer("gpt-4") == "gpt-4"
|
||||
assert _normalize_model_for_tokenizer("gpt-3.5-turbo") == "gpt-3.5-turbo"
|
||||
|
||||
def test_claude_models_normalized(self):
|
||||
"""Test that Claude models are normalized to gpt-4o."""
|
||||
assert _normalize_model_for_tokenizer("claude-3-opus") == "gpt-4o"
|
||||
assert _normalize_model_for_tokenizer("claude-3-sonnet") == "gpt-4o"
|
||||
assert _normalize_model_for_tokenizer("anthropic/claude-3-haiku") == "gpt-4o"
|
||||
|
||||
def test_openrouter_paths_extracted(self):
|
||||
"""Test that OpenRouter model paths are handled."""
|
||||
assert _normalize_model_for_tokenizer("openai/gpt-4o") == "gpt-4o"
|
||||
assert _normalize_model_for_tokenizer("anthropic/claude-3-opus") == "gpt-4o"
|
||||
|
||||
def test_unknown_models_default_to_gpt4o(self):
|
||||
"""Test that unknown models default to gpt-4o."""
|
||||
assert _normalize_model_for_tokenizer("some-random-model") == "gpt-4o"
|
||||
assert _normalize_model_for_tokenizer("llama-3-70b") == "gpt-4o"
|
||||
|
||||
|
||||
class TestTruncateToolMessageContent:
|
||||
"""Test tool message content truncation."""
|
||||
|
||||
@pytest.fixture
|
||||
def enc(self):
|
||||
return encoding_for_model("gpt-4o")
|
||||
|
||||
def test_truncate_openai_tool_message(self, enc):
|
||||
"""Test truncation of OpenAI-style tool message with string content."""
|
||||
long_content = "x" * 10000
|
||||
msg = {"role": "tool", "tool_call_id": "call_123", "content": long_content}
|
||||
|
||||
_truncate_tool_message_content(msg, enc, max_tokens=100)
|
||||
|
||||
# Content should be truncated
|
||||
assert len(msg["content"]) < len(long_content)
|
||||
assert "…" in msg["content"] # Has ellipsis marker
|
||||
|
||||
def test_truncate_anthropic_tool_result(self, enc):
|
||||
"""Test truncation of Anthropic-style tool_result."""
|
||||
long_content = "y" * 10000
|
||||
msg = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_123",
|
||||
"content": long_content,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
_truncate_tool_message_content(msg, enc, max_tokens=100)
|
||||
|
||||
# Content should be truncated
|
||||
result_content = msg["content"][0]["content"]
|
||||
assert len(result_content) < len(long_content)
|
||||
assert "…" in result_content
|
||||
|
||||
def test_preserve_tool_use_blocks(self, enc):
|
||||
"""Test that tool_use blocks are not truncated."""
|
||||
msg = {
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_123",
|
||||
"name": "some_function",
|
||||
"input": {"key": "value" * 1000}, # Large input
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
original = json.dumps(msg["content"][0]["input"])
|
||||
_truncate_tool_message_content(msg, enc, max_tokens=10)
|
||||
|
||||
# tool_use should be unchanged
|
||||
assert json.dumps(msg["content"][0]["input"]) == original
|
||||
|
||||
def test_no_truncation_when_under_limit(self, enc):
|
||||
"""Test that short content is not modified."""
|
||||
msg = {"role": "tool", "tool_call_id": "call_123", "content": "Short content"}
|
||||
|
||||
original = msg["content"]
|
||||
_truncate_tool_message_content(msg, enc, max_tokens=1000)
|
||||
|
||||
assert msg["content"] == original
|
||||
|
||||
|
||||
class TestTruncateMiddleTokens:
|
||||
"""Test middle truncation of text."""
|
||||
|
||||
@pytest.fixture
|
||||
def enc(self):
|
||||
return encoding_for_model("gpt-4o")
|
||||
|
||||
def test_truncates_long_text(self, enc):
|
||||
"""Test that long text is truncated with ellipsis in middle."""
|
||||
long_text = "word " * 1000
|
||||
result = _truncate_middle_tokens(long_text, enc, max_tok=50)
|
||||
|
||||
assert len(enc.encode(result)) <= 52 # Allow some slack for ellipsis
|
||||
assert "…" in result
|
||||
assert result.startswith("word") # Head preserved
|
||||
assert result.endswith("word ") # Tail preserved
|
||||
|
||||
def test_preserves_short_text(self, enc):
|
||||
"""Test that short text is not modified."""
|
||||
short_text = "Hello world"
|
||||
result = _truncate_middle_tokens(short_text, enc, max_tok=100)
|
||||
|
||||
assert result == short_text
|
||||
|
||||
|
||||
class TestEnsureToolPairsIntact:
|
||||
"""Test tool call/response pair preservation for both OpenAI and Anthropic formats."""
|
||||
|
||||
# ---- OpenAI Format Tests ----
|
||||
|
||||
def test_openai_adds_missing_tool_call(self):
|
||||
"""Test that orphaned OpenAI tool_response gets its tool_call prepended."""
|
||||
all_msgs = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "call_1", "type": "function", "function": {"name": "f1"}}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "result"},
|
||||
{"role": "user", "content": "Thanks!"},
|
||||
]
|
||||
# Recent messages start at index 2 (the tool response)
|
||||
recent = [all_msgs[2], all_msgs[3]]
|
||||
start_index = 2
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
# Should prepend the tool_call message
|
||||
assert len(result) == 3
|
||||
assert result[0]["role"] == "assistant"
|
||||
assert "tool_calls" in result[0]
|
||||
|
||||
def test_openai_keeps_complete_pairs(self):
|
||||
"""Test that complete OpenAI pairs are unchanged."""
|
||||
all_msgs = [
|
||||
{"role": "system", "content": "System"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "call_1", "type": "function", "function": {"name": "f1"}}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "result"},
|
||||
]
|
||||
recent = all_msgs[1:] # Include both tool_call and response
|
||||
start_index = 1
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
assert len(result) == 2 # No messages added
|
||||
|
||||
def test_openai_multiple_tool_calls(self):
|
||||
"""Test multiple OpenAI tool calls in one assistant message."""
|
||||
all_msgs = [
|
||||
{"role": "system", "content": "System"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "call_1", "type": "function", "function": {"name": "f1"}},
|
||||
{"id": "call_2", "type": "function", "function": {"name": "f2"}},
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "result1"},
|
||||
{"role": "tool", "tool_call_id": "call_2", "content": "result2"},
|
||||
{"role": "user", "content": "Thanks!"},
|
||||
]
|
||||
# Recent messages start at index 2 (first tool response)
|
||||
recent = [all_msgs[2], all_msgs[3], all_msgs[4]]
|
||||
start_index = 2
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
# Should prepend the assistant message with both tool_calls
|
||||
assert len(result) == 4
|
||||
assert result[0]["role"] == "assistant"
|
||||
assert len(result[0]["tool_calls"]) == 2
|
||||
|
||||
# ---- Anthropic Format Tests ----
|
||||
|
||||
def test_anthropic_adds_missing_tool_use(self):
|
||||
"""Test that orphaned Anthropic tool_result gets its tool_use prepended."""
|
||||
all_msgs = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_123",
|
||||
"name": "get_weather",
|
||||
"input": {"location": "SF"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_123",
|
||||
"content": "22°C and sunny",
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "Thanks!"},
|
||||
]
|
||||
# Recent messages start at index 2 (the tool_result)
|
||||
recent = [all_msgs[2], all_msgs[3]]
|
||||
start_index = 2
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
# Should prepend the tool_use message
|
||||
assert len(result) == 3
|
||||
assert result[0]["role"] == "assistant"
|
||||
assert result[0]["content"][0]["type"] == "tool_use"
|
||||
|
||||
def test_anthropic_keeps_complete_pairs(self):
|
||||
"""Test that complete Anthropic pairs are unchanged."""
|
||||
all_msgs = [
|
||||
{"role": "system", "content": "System"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_456",
|
||||
"name": "calculator",
|
||||
"input": {"expr": "2+2"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_456",
|
||||
"content": "4",
|
||||
}
|
||||
],
|
||||
},
|
||||
]
|
||||
recent = all_msgs[1:] # Include both tool_use and result
|
||||
start_index = 1
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
assert len(result) == 2 # No messages added
|
||||
|
||||
def test_anthropic_multiple_tool_uses(self):
|
||||
"""Test multiple Anthropic tool_use blocks in one message."""
|
||||
all_msgs = [
|
||||
{"role": "system", "content": "System"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "Let me check both..."},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_1",
|
||||
"name": "get_weather",
|
||||
"input": {"city": "NYC"},
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_2",
|
||||
"name": "get_weather",
|
||||
"input": {"city": "LA"},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_1",
|
||||
"content": "Cold",
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_2",
|
||||
"content": "Warm",
|
||||
},
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "Thanks!"},
|
||||
]
|
||||
# Recent messages start at index 2 (tool_result)
|
||||
recent = [all_msgs[2], all_msgs[3]]
|
||||
start_index = 2
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
# Should prepend the assistant message with both tool_uses
|
||||
assert len(result) == 3
|
||||
assert result[0]["role"] == "assistant"
|
||||
tool_use_count = sum(
|
||||
1 for b in result[0]["content"] if b.get("type") == "tool_use"
|
||||
)
|
||||
assert tool_use_count == 2
|
||||
|
||||
# ---- Mixed/Edge Case Tests ----
|
||||
|
||||
def test_anthropic_with_type_message_field(self):
|
||||
"""Test Anthropic format with 'type': 'message' field (smart_decision_maker style)."""
|
||||
all_msgs = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_abc",
|
||||
"name": "search",
|
||||
"input": {"q": "test"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"type": "message", # Extra field from smart_decision_maker
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_abc",
|
||||
"content": "Found results",
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "user", "content": "Thanks!"},
|
||||
]
|
||||
# Recent messages start at index 2 (the tool_result with 'type': 'message')
|
||||
recent = [all_msgs[2], all_msgs[3]]
|
||||
start_index = 2
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
# Should prepend the tool_use message
|
||||
assert len(result) == 3
|
||||
assert result[0]["role"] == "assistant"
|
||||
assert result[0]["content"][0]["type"] == "tool_use"
|
||||
|
||||
def test_handles_no_tool_messages(self):
|
||||
"""Test messages without tool calls."""
|
||||
all_msgs = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
]
|
||||
recent = all_msgs
|
||||
start_index = 0
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
assert result == all_msgs
|
||||
|
||||
def test_handles_empty_messages(self):
|
||||
"""Test empty message list."""
|
||||
result = _ensure_tool_pairs_intact([], [], 0)
|
||||
assert result == []
|
||||
|
||||
def test_mixed_text_and_tool_content(self):
|
||||
"""Test Anthropic message with mixed text and tool_use content."""
|
||||
all_msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "I'll help you with that."},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "toolu_mixed",
|
||||
"name": "search",
|
||||
"input": {"q": "test"},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_mixed",
|
||||
"content": "Found results",
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "assistant", "content": "Here are the results..."},
|
||||
]
|
||||
# Start from tool_result
|
||||
recent = [all_msgs[1], all_msgs[2]]
|
||||
start_index = 1
|
||||
|
||||
result = _ensure_tool_pairs_intact(recent, all_msgs, start_index)
|
||||
|
||||
# Should prepend the assistant message with tool_use
|
||||
assert len(result) == 3
|
||||
assert result[0]["content"][0]["type"] == "text"
|
||||
assert result[0]["content"][1]["type"] == "tool_use"
|
||||
|
||||
|
||||
class TestCompressContext:
|
||||
"""Test the async compress_context function."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_compression_needed(self):
|
||||
"""Test messages under limit return without compression."""
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hello!"},
|
||||
]
|
||||
|
||||
result = await compress_context(messages, target_tokens=100000)
|
||||
|
||||
assert isinstance(result, CompressResult)
|
||||
assert result.was_compacted is False
|
||||
assert len(result.messages) == 2
|
||||
assert result.error is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncation_without_client(self):
|
||||
"""Test that truncation works without LLM client."""
|
||||
long_content = "x" * 50000
|
||||
messages = [
|
||||
{"role": "system", "content": "System"},
|
||||
{"role": "user", "content": long_content},
|
||||
{"role": "assistant", "content": "Response"},
|
||||
]
|
||||
|
||||
result = await compress_context(
|
||||
messages, target_tokens=1000, client=None, reserve=100
|
||||
)
|
||||
|
||||
assert result.was_compacted is True
|
||||
# Should have truncated without summarization
|
||||
assert result.messages_summarized == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_with_mocked_llm_client(self):
|
||||
"""Test summarization with mocked LLM client."""
|
||||
# Create many messages to trigger summarization
|
||||
messages = [{"role": "system", "content": "System prompt"}]
|
||||
for i in range(30):
|
||||
messages.append({"role": "user", "content": f"User message {i} " * 100})
|
||||
messages.append(
|
||||
{"role": "assistant", "content": f"Assistant response {i} " * 100}
|
||||
)
|
||||
|
||||
# Mock the AsyncOpenAI client
|
||||
mock_client = AsyncMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "Summary of conversation"
|
||||
mock_client.with_options.return_value.chat.completions.create = AsyncMock(
|
||||
return_value=mock_response
|
||||
)
|
||||
|
||||
result = await compress_context(
|
||||
messages,
|
||||
target_tokens=5000,
|
||||
client=mock_client,
|
||||
keep_recent=5,
|
||||
reserve=500,
|
||||
)
|
||||
|
||||
assert result.was_compacted is True
|
||||
# Should have attempted summarization
|
||||
assert mock_client.with_options.called or result.messages_summarized > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_preserves_tool_pairs(self):
|
||||
"""Test that tool call/response pairs stay together."""
|
||||
messages = [
|
||||
{"role": "system", "content": "System"},
|
||||
{"role": "user", "content": "Do something"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "call_1", "type": "function", "function": {"name": "func"}}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "Result " * 1000},
|
||||
{"role": "assistant", "content": "Done!"},
|
||||
]
|
||||
|
||||
result = await compress_context(
|
||||
messages, target_tokens=500, client=None, reserve=50
|
||||
)
|
||||
|
||||
# Check that if tool response exists, its call exists too
|
||||
tool_call_ids = set()
|
||||
tool_response_ids = set()
|
||||
for msg in result.messages:
|
||||
if "tool_calls" in msg:
|
||||
for tc in msg["tool_calls"]:
|
||||
tool_call_ids.add(tc["id"])
|
||||
if msg.get("role") == "tool":
|
||||
tool_response_ids.add(msg.get("tool_call_id"))
|
||||
|
||||
# All tool responses should have their calls
|
||||
assert tool_response_ids <= tool_call_ids
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_returns_error_when_cannot_compress(self):
|
||||
"""Test that error is returned when compression fails."""
|
||||
# Single huge message that can't be compressed enough
|
||||
messages = [
|
||||
{"role": "user", "content": "x" * 100000},
|
||||
]
|
||||
|
||||
result = await compress_context(
|
||||
messages, target_tokens=100, client=None, reserve=50
|
||||
)
|
||||
|
||||
# Should have an error since we can't get below 100 tokens
|
||||
assert result.error is not None
|
||||
assert result.was_compacted is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_messages(self):
|
||||
"""Test that empty messages list returns early without error."""
|
||||
result = await compress_context([], target_tokens=1000)
|
||||
|
||||
assert result.messages == []
|
||||
assert result.token_count == 0
|
||||
assert result.was_compacted is False
|
||||
assert result.error is None
|
||||
|
||||
|
||||
class TestRemoveOrphanToolResponses:
|
||||
"""Test _remove_orphan_tool_responses helper function."""
|
||||
|
||||
def test_removes_openai_orphan(self):
|
||||
"""Test removal of orphan OpenAI tool response."""
|
||||
from backend.util.prompt import _remove_orphan_tool_responses
|
||||
|
||||
messages = [
|
||||
{"role": "tool", "tool_call_id": "call_orphan", "content": "result"},
|
||||
{"role": "user", "content": "Hello"},
|
||||
]
|
||||
orphan_ids = {"call_orphan"}
|
||||
|
||||
result = _remove_orphan_tool_responses(messages, orphan_ids)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["role"] == "user"
|
||||
|
||||
def test_keeps_valid_openai_tool(self):
|
||||
"""Test that valid OpenAI tool responses are kept."""
|
||||
from backend.util.prompt import _remove_orphan_tool_responses
|
||||
|
||||
messages = [
|
||||
{"role": "tool", "tool_call_id": "call_valid", "content": "result"},
|
||||
]
|
||||
orphan_ids = {"call_other"}
|
||||
|
||||
result = _remove_orphan_tool_responses(messages, orphan_ids)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["tool_call_id"] == "call_valid"
|
||||
|
||||
def test_filters_anthropic_mixed_blocks(self):
|
||||
"""Test filtering individual orphan blocks from Anthropic message with mixed valid/orphan."""
|
||||
from backend.util.prompt import _remove_orphan_tool_responses
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_valid",
|
||||
"content": "valid result",
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_orphan",
|
||||
"content": "orphan result",
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
orphan_ids = {"toolu_orphan"}
|
||||
|
||||
result = _remove_orphan_tool_responses(messages, orphan_ids)
|
||||
|
||||
assert len(result) == 1
|
||||
# Should only have the valid tool_result, orphan filtered out
|
||||
assert len(result[0]["content"]) == 1
|
||||
assert result[0]["content"][0]["tool_use_id"] == "toolu_valid"
|
||||
|
||||
def test_removes_anthropic_all_orphan(self):
|
||||
"""Test removal of Anthropic message when all tool_results are orphans."""
|
||||
from backend.util.prompt import _remove_orphan_tool_responses
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_orphan1",
|
||||
"content": "result1",
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "toolu_orphan2",
|
||||
"content": "result2",
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
orphan_ids = {"toolu_orphan1", "toolu_orphan2"}
|
||||
|
||||
result = _remove_orphan_tool_responses(messages, orphan_ids)
|
||||
|
||||
# Message should be completely removed since no content left
|
||||
assert len(result) == 0
|
||||
|
||||
def test_preserves_non_tool_messages(self):
|
||||
"""Test that non-tool messages are preserved."""
|
||||
from backend.util.prompt import _remove_orphan_tool_responses
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
]
|
||||
orphan_ids = {"some_id"}
|
||||
|
||||
result = _remove_orphan_tool_responses(messages, orphan_ids)
|
||||
|
||||
assert result == messages
|
||||
|
||||
|
||||
class TestCompressResultDataclass:
|
||||
"""Test CompressResult dataclass."""
|
||||
|
||||
def test_default_values(self):
|
||||
"""Test default values are set correctly."""
|
||||
result = CompressResult(
|
||||
messages=[{"role": "user", "content": "test"}],
|
||||
token_count=10,
|
||||
was_compacted=False,
|
||||
)
|
||||
|
||||
assert result.error is None
|
||||
assert result.original_token_count == 0 # Defaults to 0, not None
|
||||
assert result.messages_summarized == 0
|
||||
assert result.messages_dropped == 0
|
||||
|
||||
def test_all_fields(self):
|
||||
"""Test all fields can be set."""
|
||||
result = CompressResult(
|
||||
messages=[{"role": "user", "content": "test"}],
|
||||
token_count=100,
|
||||
was_compacted=True,
|
||||
error="Some error",
|
||||
original_token_count=500,
|
||||
messages_summarized=10,
|
||||
messages_dropped=5,
|
||||
)
|
||||
|
||||
assert result.token_count == 100
|
||||
assert result.was_compacted is True
|
||||
assert result.error == "Some error"
|
||||
assert result.original_token_count == 500
|
||||
assert result.messages_summarized == 10
|
||||
assert result.messages_dropped == 5
|
||||
|
||||
@@ -102,7 +102,7 @@ class TestDecomposeGoalExternal:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_decompose_goal_with_context(self):
|
||||
"""Test decomposition with additional context."""
|
||||
"""Test decomposition with additional context enriched into description."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {
|
||||
"success": True,
|
||||
@@ -119,9 +119,12 @@ class TestDecomposeGoalExternal:
|
||||
"Build a chatbot", context="Use Python"
|
||||
)
|
||||
|
||||
expected_description = (
|
||||
"Build a chatbot\n\nAdditional context from user:\nUse Python"
|
||||
)
|
||||
mock_client.post.assert_called_once_with(
|
||||
"/api/decompose-description",
|
||||
json={"description": "Build a chatbot", "user_instruction": "Use Python"},
|
||||
json={"description": expected_description},
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
"use client";
|
||||
import { getV1OnboardingState } from "@/app/api/__generated__/endpoints/onboarding/onboarding";
|
||||
import { getOnboardingStatus, resolveResponse } from "@/app/api/helpers";
|
||||
import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { useEffect } from "react";
|
||||
import { resolveResponse, getOnboardingStatus } from "@/app/api/helpers";
|
||||
import { getV1OnboardingState } from "@/app/api/__generated__/endpoints/onboarding/onboarding";
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
|
||||
export default function OnboardingPage() {
|
||||
const router = useRouter();
|
||||
@@ -13,12 +12,10 @@ export default function OnboardingPage() {
|
||||
async function redirectToStep() {
|
||||
try {
|
||||
// Check if onboarding is enabled (also gets chat flag for redirect)
|
||||
const { shouldShowOnboarding, isChatEnabled } =
|
||||
await getOnboardingStatus();
|
||||
const homepageRoute = getHomepageRoute(isChatEnabled);
|
||||
const { shouldShowOnboarding } = await getOnboardingStatus();
|
||||
|
||||
if (!shouldShowOnboarding) {
|
||||
router.replace(homepageRoute);
|
||||
router.replace("/");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -26,7 +23,7 @@ export default function OnboardingPage() {
|
||||
|
||||
// Handle completed onboarding
|
||||
if (onboarding.completedSteps.includes("GET_RESULTS")) {
|
||||
router.replace(homepageRoute);
|
||||
router.replace("/");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import BackendAPI from "@/lib/autogpt-server-api";
|
||||
import { NextResponse } from "next/server";
|
||||
import { revalidatePath } from "next/cache";
|
||||
import { getOnboardingStatus } from "@/app/api/helpers";
|
||||
import BackendAPI from "@/lib/autogpt-server-api";
|
||||
import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
|
||||
import { revalidatePath } from "next/cache";
|
||||
import { NextResponse } from "next/server";
|
||||
|
||||
// Handle the callback to complete the user session login
|
||||
export async function GET(request: Request) {
|
||||
@@ -27,13 +26,12 @@ export async function GET(request: Request) {
|
||||
await api.createUser();
|
||||
|
||||
// Get onboarding status from backend (includes chat flag evaluated for this user)
|
||||
const { shouldShowOnboarding, isChatEnabled } =
|
||||
await getOnboardingStatus();
|
||||
const { shouldShowOnboarding } = await getOnboardingStatus();
|
||||
if (shouldShowOnboarding) {
|
||||
next = "/onboarding";
|
||||
revalidatePath("/onboarding", "layout");
|
||||
} else {
|
||||
next = getHomepageRoute(isChatEnabled);
|
||||
next = "/";
|
||||
revalidatePath(next, "layout");
|
||||
}
|
||||
} catch (createUserError) {
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
import type { ReactNode } from "react";
|
||||
"use client";
|
||||
import { FeatureFlagPage } from "@/services/feature-flags/FeatureFlagPage";
|
||||
import { Flag } from "@/services/feature-flags/use-get-flag";
|
||||
import { type ReactNode } from "react";
|
||||
import { CopilotShell } from "./components/CopilotShell/CopilotShell";
|
||||
|
||||
export default function CopilotLayout({ children }: { children: ReactNode }) {
|
||||
return <CopilotShell>{children}</CopilotShell>;
|
||||
return (
|
||||
<FeatureFlagPage flag={Flag.CHAT} whenDisabled="/library">
|
||||
<CopilotShell>{children}</CopilotShell>
|
||||
</FeatureFlagPage>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -14,14 +14,8 @@ export default function CopilotPage() {
|
||||
const isInterruptModalOpen = useCopilotStore((s) => s.isInterruptModalOpen);
|
||||
const confirmInterrupt = useCopilotStore((s) => s.confirmInterrupt);
|
||||
const cancelInterrupt = useCopilotStore((s) => s.cancelInterrupt);
|
||||
const {
|
||||
greetingName,
|
||||
quickActions,
|
||||
isLoading,
|
||||
hasSession,
|
||||
initialPrompt,
|
||||
isReady,
|
||||
} = state;
|
||||
const { greetingName, quickActions, isLoading, hasSession, initialPrompt } =
|
||||
state;
|
||||
const {
|
||||
handleQuickAction,
|
||||
startChatWithPrompt,
|
||||
@@ -29,8 +23,6 @@ export default function CopilotPage() {
|
||||
handleStreamingChange,
|
||||
} = handlers;
|
||||
|
||||
if (!isReady) return null;
|
||||
|
||||
if (hasSession) {
|
||||
return (
|
||||
<div className="flex h-full flex-col">
|
||||
|
||||
@@ -3,18 +3,11 @@ import {
|
||||
postV2CreateSession,
|
||||
} from "@/app/api/__generated__/endpoints/chat/chat";
|
||||
import { useToast } from "@/components/molecules/Toast/use-toast";
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
|
||||
import { useOnboarding } from "@/providers/onboarding/onboarding-provider";
|
||||
import {
|
||||
Flag,
|
||||
type FlagValues,
|
||||
useGetFlag,
|
||||
} from "@/services/feature-flags/use-get-flag";
|
||||
import { SessionKey, sessionStorage } from "@/services/storage/session-storage";
|
||||
import * as Sentry from "@sentry/nextjs";
|
||||
import { useQueryClient } from "@tanstack/react-query";
|
||||
import { useFlags } from "launchdarkly-react-client-sdk";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { useEffect } from "react";
|
||||
import { useCopilotStore } from "./copilot-page-store";
|
||||
@@ -33,22 +26,6 @@ export function useCopilotPage() {
|
||||
const isCreating = useCopilotStore((s) => s.isCreatingSession);
|
||||
const setIsCreating = useCopilotStore((s) => s.setIsCreatingSession);
|
||||
|
||||
// Complete VISIT_COPILOT onboarding step to grant $5 welcome bonus
|
||||
useEffect(() => {
|
||||
if (isLoggedIn) {
|
||||
completeStep("VISIT_COPILOT");
|
||||
}
|
||||
}, [completeStep, isLoggedIn]);
|
||||
|
||||
const isChatEnabled = useGetFlag(Flag.CHAT);
|
||||
const flags = useFlags<FlagValues>();
|
||||
const homepageRoute = getHomepageRoute(isChatEnabled);
|
||||
const envEnabled = process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true";
|
||||
const clientId = process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
|
||||
const isLaunchDarklyConfigured = envEnabled && Boolean(clientId);
|
||||
const isFlagReady =
|
||||
!isLaunchDarklyConfigured || flags[Flag.CHAT] !== undefined;
|
||||
|
||||
const greetingName = getGreetingName(user);
|
||||
const quickActions = getQuickActions();
|
||||
|
||||
@@ -58,11 +35,8 @@ export function useCopilotPage() {
|
||||
: undefined;
|
||||
|
||||
useEffect(() => {
|
||||
if (!isFlagReady) return;
|
||||
if (isChatEnabled === false) {
|
||||
router.replace(homepageRoute);
|
||||
}
|
||||
}, [homepageRoute, isChatEnabled, isFlagReady, router]);
|
||||
if (isLoggedIn) completeStep("VISIT_COPILOT");
|
||||
}, [completeStep, isLoggedIn]);
|
||||
|
||||
async function startChatWithPrompt(prompt: string) {
|
||||
if (!prompt?.trim()) return;
|
||||
@@ -116,7 +90,6 @@ export function useCopilotPage() {
|
||||
isLoading: isUserLoading,
|
||||
hasSession,
|
||||
initialPrompt,
|
||||
isReady: isFlagReady && isChatEnabled !== false && isLoggedIn,
|
||||
},
|
||||
handlers: {
|
||||
handleQuickAction,
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { ErrorCard } from "@/components/molecules/ErrorCard/ErrorCard";
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
|
||||
import { useSearchParams } from "next/navigation";
|
||||
import { Suspense } from "react";
|
||||
import { getErrorDetails } from "./helpers";
|
||||
@@ -11,8 +9,6 @@ function ErrorPageContent() {
|
||||
const searchParams = useSearchParams();
|
||||
const errorMessage = searchParams.get("message");
|
||||
const errorDetails = getErrorDetails(errorMessage);
|
||||
const isChatEnabled = useGetFlag(Flag.CHAT);
|
||||
const homepageRoute = getHomepageRoute(isChatEnabled);
|
||||
|
||||
function handleRetry() {
|
||||
// Auth-related errors should redirect to login
|
||||
@@ -30,7 +26,7 @@ function ErrorPageContent() {
|
||||
}, 2000);
|
||||
} else {
|
||||
// For server/network errors, go to home
|
||||
window.location.href = homepageRoute;
|
||||
window.location.href = "/";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,13 +23,10 @@ export function LibrarySortMenu({ setLibrarySort }: Props) {
|
||||
<Select onValueChange={handleSortChange}>
|
||||
<SelectTrigger className="ml-1 w-fit space-x-1 border-none px-0 text-base underline underline-offset-4 shadow-none">
|
||||
<ArrowDownNarrowWideIcon className="h-4 w-4 sm:hidden" />
|
||||
<SelectValue placeholder="Last Executed" />
|
||||
<SelectValue placeholder="Last Modified" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
<SelectGroup>
|
||||
<SelectItem value={LibraryAgentSort.lastExecuted}>
|
||||
Last Executed
|
||||
</SelectItem>
|
||||
<SelectItem value={LibraryAgentSort.createdAt}>
|
||||
Creation Date
|
||||
</SelectItem>
|
||||
|
||||
@@ -11,14 +11,12 @@ export function useLibrarySortMenu({ setLibrarySort }: Props) {
|
||||
|
||||
const getSortLabel = (sort: LibraryAgentSort) => {
|
||||
switch (sort) {
|
||||
case LibraryAgentSort.lastExecuted:
|
||||
return "Last Executed";
|
||||
case LibraryAgentSort.createdAt:
|
||||
return "Creation Date";
|
||||
case LibraryAgentSort.updatedAt:
|
||||
return "Last Modified";
|
||||
default:
|
||||
return "Last Executed";
|
||||
return "Last Modified";
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import { LibraryAgentSort } from "@/app/api/__generated__/models/libraryAgentSort";
|
||||
import { parseAsStringEnum, useQueryState } from "nuqs";
|
||||
import { useCallback, useMemo, useState } from "react";
|
||||
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||
|
||||
const sortParser = parseAsStringEnum(Object.values(LibraryAgentSort));
|
||||
|
||||
@@ -11,7 +11,14 @@ export function useLibraryListPage() {
|
||||
const [uploadedFile, setUploadedFile] = useState<File | null>(null);
|
||||
const [librarySortRaw, setLibrarySortRaw] = useQueryState("sort", sortParser);
|
||||
|
||||
const librarySort = librarySortRaw || LibraryAgentSort.lastExecuted;
|
||||
// Ensure sort param is always present in URL (even if default)
|
||||
useEffect(() => {
|
||||
if (!librarySortRaw) {
|
||||
setLibrarySortRaw(LibraryAgentSort.updatedAt, { shallow: false });
|
||||
}
|
||||
}, [librarySortRaw, setLibrarySortRaw]);
|
||||
|
||||
const librarySort = librarySortRaw || LibraryAgentSort.updatedAt;
|
||||
|
||||
const setLibrarySort = useCallback(
|
||||
(value: LibraryAgentSort) => {
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"use server";
|
||||
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import BackendAPI from "@/lib/autogpt-server-api";
|
||||
import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
|
||||
import { loginFormSchema } from "@/types/auth";
|
||||
@@ -38,10 +37,8 @@ export async function login(email: string, password: string) {
|
||||
await api.createUser();
|
||||
|
||||
// Get onboarding status from backend (includes chat flag evaluated for this user)
|
||||
const { shouldShowOnboarding, isChatEnabled } = await getOnboardingStatus();
|
||||
const next = shouldShowOnboarding
|
||||
? "/onboarding"
|
||||
: getHomepageRoute(isChatEnabled);
|
||||
const { shouldShowOnboarding } = await getOnboardingStatus();
|
||||
const next = shouldShowOnboarding ? "/onboarding" : "/";
|
||||
|
||||
return {
|
||||
success: true,
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import { useToast } from "@/components/molecules/Toast/use-toast";
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
|
||||
import { environment } from "@/services/environment";
|
||||
import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
|
||||
import { loginFormSchema, LoginProvider } from "@/types/auth";
|
||||
import { zodResolver } from "@hookform/resolvers/zod";
|
||||
import { useRouter, useSearchParams } from "next/navigation";
|
||||
@@ -22,17 +20,15 @@ export function useLoginPage() {
|
||||
const [isGoogleLoading, setIsGoogleLoading] = useState(false);
|
||||
const [showNotAllowedModal, setShowNotAllowedModal] = useState(false);
|
||||
const isCloudEnv = environment.isCloud();
|
||||
const isChatEnabled = useGetFlag(Flag.CHAT);
|
||||
const homepageRoute = getHomepageRoute(isChatEnabled);
|
||||
|
||||
// Get redirect destination from 'next' query parameter
|
||||
const nextUrl = searchParams.get("next");
|
||||
|
||||
useEffect(() => {
|
||||
if (isLoggedIn && !isLoggingIn) {
|
||||
router.push(nextUrl || homepageRoute);
|
||||
router.push(nextUrl || "/");
|
||||
}
|
||||
}, [homepageRoute, isLoggedIn, isLoggingIn, nextUrl, router]);
|
||||
}, [isLoggedIn, isLoggingIn, nextUrl, router]);
|
||||
|
||||
const form = useForm<z.infer<typeof loginFormSchema>>({
|
||||
resolver: zodResolver(loginFormSchema),
|
||||
@@ -98,7 +94,7 @@ export function useLoginPage() {
|
||||
}
|
||||
|
||||
// Prefer URL's next parameter, then use backend-determined route
|
||||
router.replace(nextUrl || result.next || homepageRoute);
|
||||
router.replace(nextUrl || result.next || "/");
|
||||
} catch (error) {
|
||||
toast({
|
||||
title:
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"use server";
|
||||
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
|
||||
import { signupFormSchema } from "@/types/auth";
|
||||
import * as Sentry from "@sentry/nextjs";
|
||||
@@ -59,10 +58,8 @@ export async function signup(
|
||||
}
|
||||
|
||||
// Get onboarding status from backend (includes chat flag evaluated for this user)
|
||||
const { shouldShowOnboarding, isChatEnabled } = await getOnboardingStatus();
|
||||
const next = shouldShowOnboarding
|
||||
? "/onboarding"
|
||||
: getHomepageRoute(isChatEnabled);
|
||||
const { shouldShowOnboarding } = await getOnboardingStatus();
|
||||
const next = shouldShowOnboarding ? "/onboarding" : "/";
|
||||
|
||||
return { success: true, next };
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import { useToast } from "@/components/molecules/Toast/use-toast";
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
|
||||
import { environment } from "@/services/environment";
|
||||
import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
|
||||
import { LoginProvider, signupFormSchema } from "@/types/auth";
|
||||
import { zodResolver } from "@hookform/resolvers/zod";
|
||||
import { useRouter, useSearchParams } from "next/navigation";
|
||||
@@ -22,17 +20,15 @@ export function useSignupPage() {
|
||||
const [isGoogleLoading, setIsGoogleLoading] = useState(false);
|
||||
const [showNotAllowedModal, setShowNotAllowedModal] = useState(false);
|
||||
const isCloudEnv = environment.isCloud();
|
||||
const isChatEnabled = useGetFlag(Flag.CHAT);
|
||||
const homepageRoute = getHomepageRoute(isChatEnabled);
|
||||
|
||||
// Get redirect destination from 'next' query parameter
|
||||
const nextUrl = searchParams.get("next");
|
||||
|
||||
useEffect(() => {
|
||||
if (isLoggedIn && !isSigningUp) {
|
||||
router.push(nextUrl || homepageRoute);
|
||||
router.push(nextUrl || "/");
|
||||
}
|
||||
}, [homepageRoute, isLoggedIn, isSigningUp, nextUrl, router]);
|
||||
}, [isLoggedIn, isSigningUp, nextUrl, router]);
|
||||
|
||||
const form = useForm<z.infer<typeof signupFormSchema>>({
|
||||
resolver: zodResolver(signupFormSchema),
|
||||
@@ -133,7 +129,7 @@ export function useSignupPage() {
|
||||
}
|
||||
|
||||
// Prefer the URL's next parameter, then result.next (for onboarding), then default
|
||||
const redirectTo = nextUrl || result.next || homepageRoute;
|
||||
const redirectTo = nextUrl || result.next || "/";
|
||||
router.replace(redirectTo);
|
||||
} catch (error) {
|
||||
setIsLoading(false);
|
||||
|
||||
@@ -181,6 +181,5 @@ export async function getOnboardingStatus() {
|
||||
const isCompleted = onboarding.completedSteps.includes("CONGRATS");
|
||||
return {
|
||||
shouldShowOnboarding: status.is_onboarding_enabled && !isCompleted,
|
||||
isChatEnabled: status.is_chat_enabled,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3361,7 +3361,7 @@
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/LibraryAgentSort",
|
||||
"description": "Criteria to sort results by",
|
||||
"default": "lastExecuted"
|
||||
"default": "updatedAt"
|
||||
},
|
||||
"description": "Criteria to sort results by"
|
||||
},
|
||||
@@ -8239,7 +8239,7 @@
|
||||
},
|
||||
"LibraryAgentSort": {
|
||||
"type": "string",
|
||||
"enum": ["createdAt", "updatedAt", "lastExecuted"],
|
||||
"enum": ["createdAt", "updatedAt"],
|
||||
"title": "LibraryAgentSort",
|
||||
"description": "Possible sort options for sorting library agents."
|
||||
},
|
||||
|
||||
@@ -1,27 +1,15 @@
|
||||
"use client";
|
||||
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
|
||||
import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { useEffect } from "react";
|
||||
|
||||
export default function Page() {
|
||||
const isChatEnabled = useGetFlag(Flag.CHAT);
|
||||
const router = useRouter();
|
||||
const homepageRoute = getHomepageRoute(isChatEnabled);
|
||||
const envEnabled = process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true";
|
||||
const clientId = process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
|
||||
const isLaunchDarklyConfigured = envEnabled && Boolean(clientId);
|
||||
const isFlagReady =
|
||||
!isLaunchDarklyConfigured || typeof isChatEnabled === "boolean";
|
||||
|
||||
useEffect(
|
||||
function redirectToHomepage() {
|
||||
if (!isFlagReady) return;
|
||||
router.replace(homepageRoute);
|
||||
},
|
||||
[homepageRoute, isFlagReady, router],
|
||||
);
|
||||
useEffect(() => {
|
||||
router.replace("/copilot");
|
||||
}, [router]);
|
||||
|
||||
return null;
|
||||
return <LoadingSpinner size="large" cover />;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { IconLaptop } from "@/components/__legacy__/ui/icons";
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
|
||||
import { ListChecksIcon } from "@phosphor-icons/react/dist/ssr";
|
||||
@@ -24,11 +23,11 @@ interface Props {
|
||||
export function NavbarLink({ name, href }: Props) {
|
||||
const pathname = usePathname();
|
||||
const isChatEnabled = useGetFlag(Flag.CHAT);
|
||||
const homepageRoute = getHomepageRoute(isChatEnabled);
|
||||
const expectedHomeRoute = isChatEnabled ? "/copilot" : "/library";
|
||||
|
||||
const isActive =
|
||||
href === homepageRoute
|
||||
? pathname === "/" || pathname.startsWith(homepageRoute)
|
||||
href === expectedHomeRoute
|
||||
? pathname === "/" || pathname.startsWith(expectedHomeRoute)
|
||||
: pathname.includes(href);
|
||||
|
||||
return (
|
||||
|
||||
@@ -80,7 +80,7 @@ export default function WrapIfAdditionalTemplate(
|
||||
uiSchema={uiSchema}
|
||||
/>
|
||||
{!isHandleConnected && (
|
||||
<div className="flex flex-1 items-center gap-2">
|
||||
<div className="nodrag flex flex-1 items-center gap-2">
|
||||
<Input
|
||||
label={""}
|
||||
hideLabel={true}
|
||||
|
||||
@@ -66,7 +66,7 @@ export default function useAgentGraph(
|
||||
>(null);
|
||||
const [xyNodes, setXYNodes] = useState<CustomNode[]>([]);
|
||||
const [xyEdges, setXYEdges] = useState<CustomEdge[]>([]);
|
||||
const betaBlocks = useGetFlag(Flag.BETA_BLOCKS);
|
||||
const betaBlocks = useGetFlag(Flag.BETA_BLOCKS) as string[];
|
||||
|
||||
// Filter blocks based on beta flags
|
||||
const availableBlocks = useMemo(() => {
|
||||
|
||||
@@ -612,7 +612,6 @@ export type LibraryAgentPresetUpdatable = Partial<
|
||||
export enum LibraryAgentSortEnum {
|
||||
CREATED_AT = "createdAt",
|
||||
UPDATED_AT = "updatedAt",
|
||||
LAST_EXECUTED = "lastExecuted",
|
||||
}
|
||||
|
||||
/* *** CREDENTIALS *** */
|
||||
|
||||
@@ -11,10 +11,3 @@ export const API_KEY_HEADER_NAME = "X-API-Key";
|
||||
|
||||
// Layout
|
||||
export const NAVBAR_HEIGHT_PX = 60;
|
||||
|
||||
// Routes
|
||||
export function getHomepageRoute(isChatEnabled?: boolean | null): string {
|
||||
if (isChatEnabled === true) return "/copilot";
|
||||
if (isChatEnabled === false) return "/library";
|
||||
return "/";
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { environment } from "@/services/environment";
|
||||
import { Key, storage } from "@/services/storage/local-storage";
|
||||
import { type CookieOptions } from "@supabase/ssr";
|
||||
@@ -71,7 +70,7 @@ export function getRedirectPath(
|
||||
}
|
||||
|
||||
if (isAdminPage(path) && userRole !== "admin") {
|
||||
return getHomepageRoute();
|
||||
return "/";
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { environment } from "@/services/environment";
|
||||
import { createServerClient } from "@supabase/ssr";
|
||||
import { NextResponse, type NextRequest } from "next/server";
|
||||
@@ -67,7 +66,7 @@ export async function updateSession(request: NextRequest) {
|
||||
|
||||
// 2. Check if user is authenticated but lacks admin role when accessing admin pages
|
||||
if (user && userRole !== "admin" && isAdminPage(pathname)) {
|
||||
url.pathname = getHomepageRoute();
|
||||
url.pathname = "/";
|
||||
return NextResponse.redirect(url);
|
||||
}
|
||||
|
||||
|
||||
@@ -23,9 +23,7 @@ import {
|
||||
WebSocketNotification,
|
||||
} from "@/lib/autogpt-server-api";
|
||||
import { useBackendAPI } from "@/lib/autogpt-server-api/context";
|
||||
import { getHomepageRoute } from "@/lib/constants";
|
||||
import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
|
||||
import { Flag, useGetFlag } from "@/services/feature-flags/use-get-flag";
|
||||
import Link from "next/link";
|
||||
import { usePathname, useRouter } from "next/navigation";
|
||||
import {
|
||||
@@ -104,8 +102,6 @@ export default function OnboardingProvider({
|
||||
const pathname = usePathname();
|
||||
const router = useRouter();
|
||||
const { isLoggedIn } = useSupabase();
|
||||
const isChatEnabled = useGetFlag(Flag.CHAT);
|
||||
const homepageRoute = getHomepageRoute(isChatEnabled);
|
||||
|
||||
useOnboardingTimezoneDetection();
|
||||
|
||||
@@ -150,7 +146,7 @@ export default function OnboardingProvider({
|
||||
if (isOnOnboardingRoute) {
|
||||
const enabled = await resolveResponse(getV1IsOnboardingEnabled());
|
||||
if (!enabled) {
|
||||
router.push(homepageRoute);
|
||||
router.push("/");
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -162,7 +158,7 @@ export default function OnboardingProvider({
|
||||
isOnOnboardingRoute &&
|
||||
shouldRedirectFromOnboarding(onboarding.completedSteps, pathname)
|
||||
) {
|
||||
router.push(homepageRoute);
|
||||
router.push("/");
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Failed to initialize onboarding:", error);
|
||||
@@ -177,7 +173,7 @@ export default function OnboardingProvider({
|
||||
}
|
||||
|
||||
initializeOnboarding();
|
||||
}, [api, homepageRoute, isOnOnboardingRoute, router, isLoggedIn, pathname]);
|
||||
}, [api, isOnOnboardingRoute, router, isLoggedIn, pathname]);
|
||||
|
||||
const handleOnboardingNotification = useCallback(
|
||||
(notification: WebSocketNotification) => {
|
||||
|
||||
@@ -83,6 +83,10 @@ function getPostHogCredentials() {
|
||||
};
|
||||
}
|
||||
|
||||
function getLaunchDarklyClientId() {
|
||||
return process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
|
||||
}
|
||||
|
||||
function isProductionBuild() {
|
||||
return process.env.NODE_ENV === "production";
|
||||
}
|
||||
@@ -120,7 +124,10 @@ function isVercelPreview() {
|
||||
}
|
||||
|
||||
function areFeatureFlagsEnabled() {
|
||||
return process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "enabled";
|
||||
return (
|
||||
process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true" &&
|
||||
Boolean(process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID)
|
||||
);
|
||||
}
|
||||
|
||||
function isPostHogEnabled() {
|
||||
@@ -143,6 +150,7 @@ export const environment = {
|
||||
getSupabaseAnonKey,
|
||||
getPreviewStealingDev,
|
||||
getPostHogCredentials,
|
||||
getLaunchDarklyClientId,
|
||||
// Assertions
|
||||
isServerSide,
|
||||
isClientSide,
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
"use client";
|
||||
|
||||
import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
|
||||
import { useLDClient } from "launchdarkly-react-client-sdk";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { ReactNode, useEffect, useState } from "react";
|
||||
import { environment } from "../environment";
|
||||
import { Flag, useGetFlag } from "./use-get-flag";
|
||||
|
||||
interface FeatureFlagRedirectProps {
|
||||
flag: Flag;
|
||||
whenDisabled: string;
|
||||
children: ReactNode;
|
||||
}
|
||||
|
||||
export function FeatureFlagPage({
|
||||
flag,
|
||||
whenDisabled,
|
||||
children,
|
||||
}: FeatureFlagRedirectProps) {
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
const router = useRouter();
|
||||
const flagValue = useGetFlag(flag);
|
||||
const ldClient = useLDClient();
|
||||
const ldEnabled = environment.areFeatureFlagsEnabled();
|
||||
const ldReady = Boolean(ldClient);
|
||||
const flagEnabled = Boolean(flagValue);
|
||||
|
||||
useEffect(() => {
|
||||
const initialize = async () => {
|
||||
if (!ldEnabled) {
|
||||
router.replace(whenDisabled);
|
||||
setIsLoading(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait for LaunchDarkly to initialize when enabled to prevent race conditions
|
||||
if (ldEnabled && !ldReady) return;
|
||||
|
||||
try {
|
||||
await ldClient?.waitForInitialization();
|
||||
if (!flagEnabled) router.replace(whenDisabled);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
router.replace(whenDisabled);
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
initialize();
|
||||
}, [ldReady, flagEnabled]);
|
||||
|
||||
return isLoading || !flagEnabled ? (
|
||||
<LoadingSpinner size="large" cover />
|
||||
) : (
|
||||
<>{children}</>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
"use client";
|
||||
|
||||
import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
|
||||
import { useLDClient } from "launchdarkly-react-client-sdk";
|
||||
import { useRouter } from "next/navigation";
|
||||
import { useEffect } from "react";
|
||||
import { environment } from "../environment";
|
||||
import { Flag, useGetFlag } from "./use-get-flag";
|
||||
|
||||
interface FeatureFlagRedirectProps {
|
||||
flag: Flag;
|
||||
whenEnabled: string;
|
||||
whenDisabled: string;
|
||||
}
|
||||
|
||||
export function FeatureFlagRedirect({
|
||||
flag,
|
||||
whenEnabled,
|
||||
whenDisabled,
|
||||
}: FeatureFlagRedirectProps) {
|
||||
const router = useRouter();
|
||||
const flagValue = useGetFlag(flag);
|
||||
const ldEnabled = environment.areFeatureFlagsEnabled();
|
||||
const ldClient = useLDClient();
|
||||
const ldReady = Boolean(ldClient);
|
||||
const flagEnabled = Boolean(flagValue);
|
||||
|
||||
useEffect(() => {
|
||||
const initialize = async () => {
|
||||
if (!ldEnabled) {
|
||||
router.replace(whenDisabled);
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait for LaunchDarkly to initialize when enabled to prevent race conditions
|
||||
if (ldEnabled && !ldReady) return;
|
||||
|
||||
try {
|
||||
await ldClient?.waitForInitialization();
|
||||
router.replace(flagEnabled ? whenEnabled : whenDisabled);
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
router.replace(whenDisabled);
|
||||
}
|
||||
};
|
||||
|
||||
initialize();
|
||||
}, [ldReady, flagEnabled]);
|
||||
|
||||
return <LoadingSpinner size="large" cover />;
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { LoadingSpinner } from "@/components/atoms/LoadingSpinner/LoadingSpinner";
|
||||
import { useSupabase } from "@/lib/supabase/hooks/useSupabase";
|
||||
import * as Sentry from "@sentry/nextjs";
|
||||
import { LDProvider } from "launchdarkly-react-client-sdk";
|
||||
@@ -7,17 +8,17 @@ import type { ReactNode } from "react";
|
||||
import { useMemo } from "react";
|
||||
import { environment } from "../environment";
|
||||
|
||||
const clientId = process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
|
||||
const envEnabled = process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true";
|
||||
const LAUNCHDARKLY_INIT_TIMEOUT_MS = 5000;
|
||||
|
||||
export function LaunchDarklyProvider({ children }: { children: ReactNode }) {
|
||||
const { user, isUserLoading } = useSupabase();
|
||||
const isCloud = environment.isCloud();
|
||||
const isLaunchDarklyConfigured = isCloud && envEnabled && clientId;
|
||||
const envEnabled = environment.areFeatureFlagsEnabled();
|
||||
const clientId = environment.getLaunchDarklyClientId();
|
||||
|
||||
const context = useMemo(() => {
|
||||
if (isUserLoading || !user) {
|
||||
if (isUserLoading) return;
|
||||
|
||||
if (!user) {
|
||||
return {
|
||||
kind: "user" as const,
|
||||
key: "anonymous",
|
||||
@@ -36,15 +37,17 @@ export function LaunchDarklyProvider({ children }: { children: ReactNode }) {
|
||||
};
|
||||
}, [user, isUserLoading]);
|
||||
|
||||
if (!isLaunchDarklyConfigured) {
|
||||
if (!envEnabled) {
|
||||
return <>{children}</>;
|
||||
}
|
||||
|
||||
if (isUserLoading) {
|
||||
return <LoadingSpinner size="large" cover />;
|
||||
}
|
||||
|
||||
return (
|
||||
<LDProvider
|
||||
// Add this key prop. It will be 'anonymous' when logged out,
|
||||
key={context.key}
|
||||
clientSideID={clientId}
|
||||
clientSideID={clientId ?? ""}
|
||||
context={context}
|
||||
timeout={LAUNCHDARKLY_INIT_TIMEOUT_MS}
|
||||
reactOptions={{ useCamelCaseFlagKeys: false }}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"use client";
|
||||
|
||||
import { DEFAULT_SEARCH_TERMS } from "@/app/(platform)/marketplace/components/HeroSection/helpers";
|
||||
import { environment } from "@/services/environment";
|
||||
import { useFlags } from "launchdarkly-react-client-sdk";
|
||||
|
||||
export enum Flag {
|
||||
@@ -18,24 +19,9 @@ export enum Flag {
|
||||
CHAT = "chat",
|
||||
}
|
||||
|
||||
export type FlagValues = {
|
||||
[Flag.BETA_BLOCKS]: string[];
|
||||
[Flag.NEW_BLOCK_MENU]: boolean;
|
||||
[Flag.NEW_AGENT_RUNS]: boolean;
|
||||
[Flag.GRAPH_SEARCH]: boolean;
|
||||
[Flag.ENABLE_ENHANCED_OUTPUT_HANDLING]: boolean;
|
||||
[Flag.NEW_FLOW_EDITOR]: boolean;
|
||||
[Flag.BUILDER_VIEW_SWITCH]: boolean;
|
||||
[Flag.SHARE_EXECUTION_RESULTS]: boolean;
|
||||
[Flag.AGENT_FAVORITING]: boolean;
|
||||
[Flag.MARKETPLACE_SEARCH_TERMS]: string[];
|
||||
[Flag.ENABLE_PLATFORM_PAYMENT]: boolean;
|
||||
[Flag.CHAT]: boolean;
|
||||
};
|
||||
|
||||
const isPwMockEnabled = process.env.NEXT_PUBLIC_PW_TEST === "true";
|
||||
|
||||
const mockFlags = {
|
||||
const defaultFlags = {
|
||||
[Flag.BETA_BLOCKS]: [],
|
||||
[Flag.NEW_BLOCK_MENU]: false,
|
||||
[Flag.NEW_AGENT_RUNS]: false,
|
||||
@@ -50,17 +36,16 @@ const mockFlags = {
|
||||
[Flag.CHAT]: false,
|
||||
};
|
||||
|
||||
export function useGetFlag<T extends Flag>(flag: T): FlagValues[T] | null {
|
||||
type FlagValues = typeof defaultFlags;
|
||||
|
||||
export function useGetFlag<T extends Flag>(flag: T): FlagValues[T] {
|
||||
const currentFlags = useFlags<FlagValues>();
|
||||
const flagValue = currentFlags[flag];
|
||||
const areFlagsEnabled = environment.areFeatureFlagsEnabled();
|
||||
|
||||
const envEnabled = process.env.NEXT_PUBLIC_LAUNCHDARKLY_ENABLED === "true";
|
||||
const clientId = process.env.NEXT_PUBLIC_LAUNCHDARKLY_CLIENT_ID;
|
||||
const isLaunchDarklyConfigured = envEnabled && Boolean(clientId);
|
||||
|
||||
if (!isLaunchDarklyConfigured || isPwMockEnabled) {
|
||||
return mockFlags[flag];
|
||||
if (!areFlagsEnabled || isPwMockEnabled) {
|
||||
return defaultFlags[flag];
|
||||
}
|
||||
|
||||
return flagValue ?? mockFlags[flag];
|
||||
return flagValue ?? defaultFlags[flag];
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ export class LibraryPage extends BasePage {
|
||||
|
||||
async selectSortOption(
|
||||
page: Page,
|
||||
sortOption: "Last Executed" | "Creation Date" | "Last Modified",
|
||||
sortOption: "Creation Date" | "Last Modified",
|
||||
): Promise<void> {
|
||||
const { getRole } = getSelectors(page);
|
||||
await getRole("combobox").click();
|
||||
|
||||
@@ -182,7 +182,7 @@ test("logged in user is redirected from /login to /library", async ({
|
||||
await hasUrl(page, "/marketplace");
|
||||
|
||||
await page.goto("/login");
|
||||
await hasUrl(page, "/library");
|
||||
await hasUrl(page, "/library?sort=updatedAt");
|
||||
});
|
||||
|
||||
test("logged in user is redirected from /signup to /library", async ({
|
||||
@@ -195,5 +195,5 @@ test("logged in user is redirected from /signup to /library", async ({
|
||||
await hasUrl(page, "/marketplace");
|
||||
|
||||
await page.goto("/signup");
|
||||
await hasUrl(page, "/library");
|
||||
await hasUrl(page, "/library?sort=updatedAt");
|
||||
});
|
||||
|
||||
1
classic/frontend/.gitignore
vendored
1
classic/frontend/.gitignore
vendored
@@ -8,6 +8,7 @@
|
||||
.buildlog/
|
||||
.history
|
||||
.svn/
|
||||
.next/
|
||||
migrate_working_dir/
|
||||
|
||||
# IntelliJ related
|
||||
|
||||
Reference in New Issue
Block a user