From 704e2959baadefd3e192aaeddd3908a10123b6b3 Mon Sep 17 00:00:00 2001 From: Nicholas Tindle Date: Wed, 28 Jan 2026 03:53:25 -0600 Subject: [PATCH] fix(backend): add file extension from Content-Type header for URLs When downloading files from URLs without file extensions (like Unsplash images), detect the extension from the Content-Type response header. For example, `image/jpeg` becomes `.jpg`. Also updates FileStoreBlock description to clarify behavior in both CoPilot (saves to workspace) and graphs (outputs data URI). Co-Authored-By: Claude Opus 4.5 --- .../backend/backend/blocks/basic.py | 13 +++++++--- autogpt_platform/backend/backend/util/file.py | 26 ++++++++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/autogpt_platform/backend/backend/blocks/basic.py b/autogpt_platform/backend/backend/blocks/basic.py index 8aae005007..fceb345ef1 100644 --- a/autogpt_platform/backend/backend/blocks/basic.py +++ b/autogpt_platform/backend/backend/blocks/basic.py @@ -18,10 +18,10 @@ from backend.util.type import MediaFileType, convert class FileStoreBlock(Block): class Input(BlockSchemaInput): file_in: MediaFileType = SchemaField( - description="The file to store, it can be a URL, data URI, or local path." + description="The file to download and store. Can be a URL (https://...), data URI, or local path." ) base_64: bool = SchemaField( - description="Whether produce an output in base64 format (not recommended, you can pass the string path just fine accross blocks).", + description="Whether to produce output in base64 format (not recommended, you can pass the file reference across blocks).", default=False, advanced=True, title="Produce Base64 Output", @@ -29,13 +29,18 @@ class FileStoreBlock(Block): class Output(BlockSchemaOutput): file_out: MediaFileType = SchemaField( - description="Reference to the stored file (workspace:// in CoPilot, data URI in graphs)." + description="Reference to the stored file. In CoPilot: workspace:// URI (visible in list_workspace_files). In graphs: data URI for passing to other blocks." ) def __init__(self): super().__init__( id="cbb50872-625b-42f0-8203-a2ae78242d8a", - description="Stores the input file. In CoPilot, saves to your workspace. In graphs, returns a data URI.", + description=( + "Downloads and stores a file from a URL, data URI, or local path. " + "Use this to fetch images, documents, or other files for processing. " + "In CoPilot: saves to workspace (use list_workspace_files to see it). " + "In graphs: outputs a data URI to pass to other blocks." + ), categories={BlockCategory.BASIC, BlockCategory.MULTIMEDIA}, input_schema=FileStoreBlock.Input, output_schema=FileStoreBlock.Output, diff --git a/autogpt_platform/backend/backend/util/file.py b/autogpt_platform/backend/backend/util/file.py index 27a99794f3..7dbf30bee7 100644 --- a/autogpt_platform/backend/backend/util/file.py +++ b/autogpt_platform/backend/backend/util/file.py @@ -276,15 +276,7 @@ async def store_media_file( target_path.write_bytes(content) elif file.startswith(("http://", "https://")): - # URL - parsed_url = urlparse(file) - filename = sanitize_filename(Path(parsed_url.path).name or f"{uuid.uuid4()}") - try: - target_path = _ensure_inside_base(base_path / filename, base_path) - except OSError as e: - raise ValueError(f"Invalid file path '{filename}': {e}") from e - - # Download and save + # URL - download first to get Content-Type header resp = await Requests().get(file) # Check file size limit @@ -293,6 +285,22 @@ async def store_media_file( f"File too large: {len(resp.content)} bytes > {MAX_FILE_SIZE_BYTES} bytes" ) + # Extract filename from URL path + parsed_url = urlparse(file) + filename = sanitize_filename(Path(parsed_url.path).name or f"{uuid.uuid4()}") + + # If filename lacks extension, add one from Content-Type header + if "." not in filename: + content_type = resp.headers.get("Content-Type", "").split(";")[0].strip() + if content_type: + ext = _extension_from_mime(content_type) + filename = f"{filename}{ext}" + + try: + target_path = _ensure_inside_base(base_path / filename, base_path) + except OSError as e: + raise ValueError(f"Invalid file path '{filename}': {e}") from e + # Virus scan the downloaded content before writing await scan_content_safe(resp.content, filename=filename) target_path.write_bytes(resp.content)