mirror of
https://github.com/Significant-Gravitas/AutoGPT.git
synced 2026-03-17 03:00:27 -04:00
Compare commits
12 Commits
fix/transc
...
fix/claude
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
383898a2da | ||
|
|
be18436e8f | ||
|
|
ea0333c1fc | ||
|
|
aa7a2f0a48 | ||
|
|
3665015647 | ||
|
|
7bc08672fa | ||
|
|
e8b8cad97a | ||
|
|
be35c626ad | ||
|
|
719c4ee1d1 | ||
|
|
411c399e03 | ||
|
|
6ac011e36c | ||
|
|
5e554526e2 |
@@ -187,9 +187,11 @@ class ClaudeCodeBlock(Block):
|
||||
)
|
||||
files: list[SandboxFileOutput] = SchemaField(
|
||||
description=(
|
||||
"List of text files created/modified by Claude Code during this execution. "
|
||||
"List of files created/modified by Claude Code during this execution. "
|
||||
"Includes text files and binary files (images, PDFs, etc.). "
|
||||
"Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. "
|
||||
"workspace_ref contains a workspace:// URI if the file was stored to workspace."
|
||||
"workspace_ref contains a workspace:// URI for workspace storage. "
|
||||
"For binary files, content contains a placeholder; use workspace_ref to access the file."
|
||||
)
|
||||
)
|
||||
conversation_history: str = SchemaField(
|
||||
@@ -452,13 +454,15 @@ class ClaudeCodeBlock(Block):
|
||||
else:
|
||||
new_conversation_history = turn_entry
|
||||
|
||||
# Extract files created/modified during this run and store to workspace
|
||||
# Extract files created/modified during this run and store to workspace.
|
||||
# Binary files (images, PDFs, etc.) are stored via store_media_file
|
||||
# which handles virus scanning and workspace storage.
|
||||
sandbox_files = await extract_and_store_sandbox_files(
|
||||
sandbox=sandbox,
|
||||
working_directory=working_directory,
|
||||
execution_context=execution_context,
|
||||
since_timestamp=start_timestamp,
|
||||
text_only=True,
|
||||
text_only=False,
|
||||
)
|
||||
|
||||
return (
|
||||
|
||||
@@ -331,10 +331,10 @@ async def upload_transcript(
|
||||
) -> None:
|
||||
"""Strip progress entries and upload transcript to bucket storage.
|
||||
|
||||
Safety: only overwrites when the new (stripped) transcript is larger than
|
||||
what is already stored. Since JSONL is append-only, the latest transcript
|
||||
is always the longest. This prevents a slow/stale background task from
|
||||
clobbering a newer upload from a concurrent turn.
|
||||
The executor holds a cluster lock per session, so concurrent uploads for
|
||||
the same session cannot happen. We always overwrite — with ``--resume``
|
||||
the CLI may compact old tool results, so neither byte size nor line count
|
||||
is a reliable proxy for "newer".
|
||||
|
||||
Args:
|
||||
message_count: ``len(session.messages)`` at upload time — used by
|
||||
@@ -353,33 +353,16 @@ async def upload_transcript(
|
||||
storage = await get_workspace_storage()
|
||||
wid, fid, fname = _storage_path_parts(user_id, session_id)
|
||||
encoded = stripped.encode("utf-8")
|
||||
new_size = len(encoded)
|
||||
|
||||
# Check existing transcript size to avoid overwriting newer with older
|
||||
path = _build_storage_path(user_id, session_id, storage)
|
||||
content_skipped = False
|
||||
try:
|
||||
existing = await storage.retrieve(path)
|
||||
if len(existing) >= new_size:
|
||||
logger.info(
|
||||
f"[Transcript] Skipping content upload — existing ({len(existing)}B) "
|
||||
f">= new ({new_size}B) for session {session_id}"
|
||||
)
|
||||
content_skipped = True
|
||||
except (FileNotFoundError, Exception):
|
||||
pass # No existing transcript or retrieval error — proceed with upload
|
||||
await storage.store(
|
||||
workspace_id=wid,
|
||||
file_id=fid,
|
||||
filename=fname,
|
||||
content=encoded,
|
||||
)
|
||||
|
||||
if not content_skipped:
|
||||
await storage.store(
|
||||
workspace_id=wid,
|
||||
file_id=fid,
|
||||
filename=fname,
|
||||
content=encoded,
|
||||
)
|
||||
|
||||
# Always update metadata (even when content is skipped) so message_count
|
||||
# stays current. The gap-fill logic in _build_query_message relies on
|
||||
# message_count to avoid re-compressing the same messages every turn.
|
||||
# Update metadata so message_count stays current. The gap-fill logic
|
||||
# in _build_query_message relies on it to avoid re-compressing messages.
|
||||
try:
|
||||
meta = {"message_count": message_count, "uploaded_at": time.time()}
|
||||
mwid, mfid, mfname = _meta_storage_path_parts(user_id, session_id)
|
||||
@@ -393,9 +376,8 @@ async def upload_transcript(
|
||||
logger.warning(f"[Transcript] Failed to write metadata for {session_id}: {e}")
|
||||
|
||||
logger.info(
|
||||
f"[Transcript] Uploaded {new_size}B "
|
||||
f"(stripped from {len(content)}B, msg_count={message_count}, "
|
||||
f"content_skipped={content_skipped}) "
|
||||
f"[Transcript] Uploaded {len(encoded)}B "
|
||||
f"(stripped from {len(content)}B, msg_count={message_count}) "
|
||||
f"for session {session_id}"
|
||||
)
|
||||
|
||||
|
||||
@@ -74,8 +74,50 @@ TEXT_EXTENSIONS = {
|
||||
".tex",
|
||||
".csv",
|
||||
".log",
|
||||
".svg", # SVG is XML-based text
|
||||
}
|
||||
|
||||
# Binary file extensions we explicitly support extracting
|
||||
BINARY_EXTENSIONS = {
|
||||
# Images
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".gif",
|
||||
".webp",
|
||||
".ico",
|
||||
".bmp",
|
||||
".tiff",
|
||||
".tif",
|
||||
# Documents
|
||||
".pdf",
|
||||
# Archives
|
||||
".zip",
|
||||
".tar",
|
||||
".gz",
|
||||
".7z",
|
||||
# Audio
|
||||
".mp3",
|
||||
".wav",
|
||||
".ogg",
|
||||
".flac",
|
||||
# Video
|
||||
".mp4",
|
||||
".webm",
|
||||
".mov",
|
||||
".avi",
|
||||
# Fonts
|
||||
".woff",
|
||||
".woff2",
|
||||
".ttf",
|
||||
".otf",
|
||||
".eot",
|
||||
}
|
||||
|
||||
# Maximum file size for binary extraction (50MB)
|
||||
# Prevents OOM from accidentally extracting huge files
|
||||
MAX_BINARY_FILE_SIZE = 50 * 1024 * 1024
|
||||
|
||||
|
||||
class SandboxFileOutput(BaseModel):
|
||||
"""A file extracted from a sandbox and optionally stored in workspace."""
|
||||
@@ -120,7 +162,8 @@ async def extract_sandbox_files(
|
||||
sandbox: The E2B sandbox instance
|
||||
working_directory: Directory to search for files
|
||||
since_timestamp: ISO timestamp - only return files modified after this time
|
||||
text_only: If True, only extract text files (default). If False, extract all files.
|
||||
text_only: If True, only extract text files. If False, also extract
|
||||
supported binary files (images, PDFs, etc.).
|
||||
|
||||
Returns:
|
||||
List of ExtractedFile objects with path, content, and metadata
|
||||
@@ -149,15 +192,48 @@ async def extract_sandbox_files(
|
||||
if not file_path:
|
||||
continue
|
||||
|
||||
# Check if it's a text file
|
||||
is_text = any(file_path.endswith(ext) for ext in TEXT_EXTENSIONS)
|
||||
# Check file type (case-insensitive for extensions)
|
||||
file_path_lower = file_path.lower()
|
||||
is_text = any(
|
||||
file_path_lower.endswith(ext.lower()) for ext in TEXT_EXTENSIONS
|
||||
)
|
||||
is_binary = any(
|
||||
file_path_lower.endswith(ext.lower()) for ext in BINARY_EXTENSIONS
|
||||
)
|
||||
|
||||
# Skip non-text files if text_only mode
|
||||
# Skip files with unrecognized extensions
|
||||
if not is_text and not is_binary:
|
||||
continue
|
||||
|
||||
# In text_only mode, skip binary files
|
||||
if text_only and not is_text:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Read file content as bytes
|
||||
# Check file size before reading to prevent OOM
|
||||
stat_result = await sandbox.commands.run(
|
||||
f"stat -c %s {shlex.quote(file_path)} 2>/dev/null"
|
||||
)
|
||||
if stat_result.exit_code != 0 or not stat_result.stdout:
|
||||
logger.debug(f"Skipping {file_path}: could not determine file size")
|
||||
continue
|
||||
|
||||
try:
|
||||
file_size = int(stat_result.stdout.strip())
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
f"Skipping {file_path}: unexpected stat output "
|
||||
f"{stat_result.stdout.strip()!r}"
|
||||
)
|
||||
continue
|
||||
|
||||
if file_size > MAX_BINARY_FILE_SIZE:
|
||||
logger.info(
|
||||
f"Skipping {file_path}: size {file_size} bytes "
|
||||
f"exceeds limit {MAX_BINARY_FILE_SIZE}"
|
||||
)
|
||||
continue
|
||||
|
||||
content = await sandbox.files.read(file_path, format="bytes")
|
||||
if isinstance(content, str):
|
||||
content = content.encode("utf-8")
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
"use server";
|
||||
|
||||
import { postV1GetOrCreateUser } from "@/app/api/__generated__/endpoints/auth/auth";
|
||||
import { getOnboardingStatus, resolveResponse } from "@/app/api/helpers";
|
||||
import { getServerSupabase } from "@/lib/supabase/server/getServerSupabase";
|
||||
import { signupFormSchema } from "@/types/auth";
|
||||
import * as Sentry from "@sentry/nextjs";
|
||||
import { isWaitlistError, logWaitlistError } from "../../api/auth/utils";
|
||||
import { getOnboardingStatus } from "../../api/helpers";
|
||||
|
||||
export async function signup(
|
||||
email: string,
|
||||
@@ -57,6 +58,17 @@ export async function signup(
|
||||
await supabase.auth.setSession(data.session);
|
||||
}
|
||||
|
||||
try {
|
||||
await resolveResponse(postV1GetOrCreateUser());
|
||||
} catch (createUserError) {
|
||||
console.error("Error creating user during signup:", createUserError);
|
||||
Sentry.captureException(createUserError);
|
||||
return {
|
||||
success: false,
|
||||
error: "Failed to complete account setup. Please try again.",
|
||||
};
|
||||
}
|
||||
|
||||
// Get onboarding status from backend (includes chat flag evaluated for this user)
|
||||
const { shouldShowOnboarding } = await getOnboardingStatus();
|
||||
const next = shouldShowOnboarding ? "/onboarding" : "/";
|
||||
|
||||
@@ -16,7 +16,7 @@ When activated, the block:
|
||||
- Install dependencies (npm, pip, etc.)
|
||||
- Run terminal commands
|
||||
- Build and test applications
|
||||
5. Extracts all text files created/modified during execution
|
||||
5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
|
||||
6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks
|
||||
|
||||
The block supports conversation continuation through three mechanisms:
|
||||
@@ -42,7 +42,7 @@ The block supports conversation continuation through three mechanisms:
|
||||
| Output | Description |
|
||||
|--------|-------------|
|
||||
| Response | The output/response from Claude Code execution |
|
||||
| Files | List of text files created/modified during execution. Each file includes path, relative_path, name, and content fields |
|
||||
| Files | List of files (text and binary) created/modified during execution. Includes images, PDFs, and other supported formats. Each file has path, relative_path, name, content, and workspace_ref fields. Binary files are stored in workspace and accessible via workspace_ref |
|
||||
| Conversation History | Full conversation history including this turn. Use to restore context on a fresh sandbox |
|
||||
| Session ID | Session ID for this conversation. Pass back with sandbox_id to continue the conversation |
|
||||
| Sandbox ID | ID of the sandbox instance (null if disposed). Pass back with session_id to continue the conversation |
|
||||
|
||||
@@ -535,7 +535,7 @@ When activated, the block:
|
||||
2. Installs the latest version of Claude Code in the sandbox
|
||||
3. Optionally runs setup commands to prepare the environment
|
||||
4. Executes your prompt using Claude Code, which can create/edit files, install dependencies, run terminal commands, and build applications
|
||||
5. Extracts all text files created/modified during execution
|
||||
5. Extracts all files created/modified during execution (text files and binary files like images, PDFs, etc.)
|
||||
6. Returns the response and files, optionally keeping the sandbox alive for follow-up tasks
|
||||
|
||||
The block supports conversation continuation through three mechanisms:
|
||||
@@ -563,7 +563,7 @@ The block supports conversation continuation through three mechanisms:
|
||||
|--------|-------------|------|
|
||||
| error | Error message if execution failed | str |
|
||||
| response | The output/response from Claude Code execution | str |
|
||||
| files | List of text files created/modified by Claude Code during this execution. Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI if the file was stored to workspace. | List[SandboxFileOutput] |
|
||||
| files | List of files created/modified by Claude Code during this execution. Includes text files and binary files (images, PDFs, etc.). Each file has 'path', 'relative_path', 'name', 'content', and 'workspace_ref' fields. workspace_ref contains a workspace:// URI for workspace storage. For binary files, content contains a placeholder; use workspace_ref to access the file. | List[SandboxFileOutput] |
|
||||
| conversation_history | Full conversation history including this turn. Pass this to conversation_history input to continue on a fresh sandbox if the previous sandbox timed out. | str |
|
||||
| session_id | Session ID for this conversation. Pass this back along with sandbox_id to continue the conversation. | str |
|
||||
| sandbox_id | ID of the sandbox instance. Pass this back along with session_id to continue the conversation. This is None if dispose_sandbox was True (sandbox was disposed). | str |
|
||||
|
||||
Reference in New Issue
Block a user