From 704e2959baadefd3e192aaeddd3908a10123b6b3 Mon Sep 17 00:00:00 2001
From: Nicholas Tindle <nicholas.tindle@agpt.co>
Date: Wed, 28 Jan 2026 03:53:25 -0600
Subject: [PATCH] fix(backend): add file extension from Content-Type header for
 URLs

When downloading files from URLs without file extensions (like Unsplash
images), detect the extension from the Content-Type response header.
For example, `image/jpeg` becomes `.jpg`.

Also updates FileStoreBlock description to clarify behavior in both
CoPilot (saves to workspace) and graphs (outputs data URI).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../backend/backend/blocks/basic.py           | 13 +++++++---
 autogpt_platform/backend/backend/util/file.py | 26 ++++++++++++-------
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/autogpt_platform/backend/backend/blocks/basic.py b/autogpt_platform/backend/backend/blocks/basic.py
index 8aae005007..fceb345ef1 100644
--- a/autogpt_platform/backend/backend/blocks/basic.py
+++ b/autogpt_platform/backend/backend/blocks/basic.py
@@ -18,10 +18,10 @@ from backend.util.type import MediaFileType, convert
 class FileStoreBlock(Block):
     class Input(BlockSchemaInput):
         file_in: MediaFileType = SchemaField(
-            description="The file to store, it can be a URL, data URI, or local path."
+            description="The file to download and store. Can be a URL (https://...), data URI, or local path."
         )
         base_64: bool = SchemaField(
-            description="Whether produce an output in base64 format (not recommended, you can pass the string path just fine accross blocks).",
+            description="Whether to produce output in base64 format (not recommended, you can pass the file reference across blocks).",
             default=False,
             advanced=True,
             title="Produce Base64 Output",
@@ -29,13 +29,18 @@ class FileStoreBlock(Block):
 
     class Output(BlockSchemaOutput):
         file_out: MediaFileType = SchemaField(
-            description="Reference to the stored file (workspace:// in CoPilot, data URI in graphs)."
+            description="Reference to the stored file. In CoPilot: workspace:// URI (visible in list_workspace_files). In graphs: data URI for passing to other blocks."
         )
 
     def __init__(self):
         super().__init__(
             id="cbb50872-625b-42f0-8203-a2ae78242d8a",
-            description="Stores the input file. In CoPilot, saves to your workspace. In graphs, returns a data URI.",
+            description=(
+                "Downloads and stores a file from a URL, data URI, or local path. "
+                "Use this to fetch images, documents, or other files for processing. "
+                "In CoPilot: saves to workspace (use list_workspace_files to see it). "
+                "In graphs: outputs a data URI to pass to other blocks."
+            ),
             categories={BlockCategory.BASIC, BlockCategory.MULTIMEDIA},
             input_schema=FileStoreBlock.Input,
             output_schema=FileStoreBlock.Output,
diff --git a/autogpt_platform/backend/backend/util/file.py b/autogpt_platform/backend/backend/util/file.py
index 27a99794f3..7dbf30bee7 100644
--- a/autogpt_platform/backend/backend/util/file.py
+++ b/autogpt_platform/backend/backend/util/file.py
@@ -276,15 +276,7 @@ async def store_media_file(
         target_path.write_bytes(content)
 
     elif file.startswith(("http://", "https://")):
-        # URL
-        parsed_url = urlparse(file)
-        filename = sanitize_filename(Path(parsed_url.path).name or f"{uuid.uuid4()}")
-        try:
-            target_path = _ensure_inside_base(base_path / filename, base_path)
-        except OSError as e:
-            raise ValueError(f"Invalid file path '{filename}': {e}") from e
-
-        # Download and save
+        # URL - download first to get Content-Type header
         resp = await Requests().get(file)
 
         # Check file size limit
@@ -293,6 +285,22 @@ async def store_media_file(
                 f"File too large: {len(resp.content)} bytes > {MAX_FILE_SIZE_BYTES} bytes"
             )
 
+        # Extract filename from URL path
+        parsed_url = urlparse(file)
+        filename = sanitize_filename(Path(parsed_url.path).name or f"{uuid.uuid4()}")
+
+        # If filename lacks extension, add one from Content-Type header
+        if "." not in filename:
+            content_type = resp.headers.get("Content-Type", "").split(";")[0].strip()
+            if content_type:
+                ext = _extension_from_mime(content_type)
+                filename = f"{filename}{ext}"
+
+        try:
+            target_path = _ensure_inside_base(base_path / filename, base_path)
+        except OSError as e:
+            raise ValueError(f"Invalid file path '{filename}': {e}") from e
+
         # Virus scan the downloaded content before writing
         await scan_content_safe(resp.content, filename=filename)
         target_path.write_bytes(resp.content)