fix(backend): add file extension from Content-Type header for URLs

When downloading files from URLs without file extensions (like Unsplash
images), detect the extension from the Content-Type response header.
For example, `image/jpeg` becomes `.jpg`.

Also updates FileStoreBlock description to clarify behavior in both
CoPilot (saves to workspace) and graphs (outputs data URI).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Nicholas Tindle
2026-01-28 03:53:25 -06:00
parent cc95f1dbd6
commit 704e2959ba
2 changed files with 26 additions and 13 deletions

View File

@@ -18,10 +18,10 @@ from backend.util.type import MediaFileType, convert
class FileStoreBlock(Block):
class Input(BlockSchemaInput):
file_in: MediaFileType = SchemaField(
description="The file to store, it can be a URL, data URI, or local path."
description="The file to download and store. Can be a URL (https://...), data URI, or local path."
)
base_64: bool = SchemaField(
description="Whether produce an output in base64 format (not recommended, you can pass the string path just fine accross blocks).",
description="Whether to produce output in base64 format (not recommended, you can pass the file reference across blocks).",
default=False,
advanced=True,
title="Produce Base64 Output",
@@ -29,13 +29,18 @@ class FileStoreBlock(Block):
class Output(BlockSchemaOutput):
file_out: MediaFileType = SchemaField(
description="Reference to the stored file (workspace:// in CoPilot, data URI in graphs)."
description="Reference to the stored file. In CoPilot: workspace:// URI (visible in list_workspace_files). In graphs: data URI for passing to other blocks."
)
def __init__(self):
super().__init__(
id="cbb50872-625b-42f0-8203-a2ae78242d8a",
description="Stores the input file. In CoPilot, saves to your workspace. In graphs, returns a data URI.",
description=(
"Downloads and stores a file from a URL, data URI, or local path. "
"Use this to fetch images, documents, or other files for processing. "
"In CoPilot: saves to workspace (use list_workspace_files to see it). "
"In graphs: outputs a data URI to pass to other blocks."
),
categories={BlockCategory.BASIC, BlockCategory.MULTIMEDIA},
input_schema=FileStoreBlock.Input,
output_schema=FileStoreBlock.Output,

View File

@@ -276,15 +276,7 @@ async def store_media_file(
target_path.write_bytes(content)
elif file.startswith(("http://", "https://")):
# URL
parsed_url = urlparse(file)
filename = sanitize_filename(Path(parsed_url.path).name or f"{uuid.uuid4()}")
try:
target_path = _ensure_inside_base(base_path / filename, base_path)
except OSError as e:
raise ValueError(f"Invalid file path '{filename}': {e}") from e
# Download and save
# URL - download first to get Content-Type header
resp = await Requests().get(file)
# Check file size limit
@@ -293,6 +285,22 @@ async def store_media_file(
f"File too large: {len(resp.content)} bytes > {MAX_FILE_SIZE_BYTES} bytes"
)
# Extract filename from URL path
parsed_url = urlparse(file)
filename = sanitize_filename(Path(parsed_url.path).name or f"{uuid.uuid4()}")
# If filename lacks extension, add one from Content-Type header
if "." not in filename:
content_type = resp.headers.get("Content-Type", "").split(";")[0].strip()
if content_type:
ext = _extension_from_mime(content_type)
filename = f"{filename}{ext}"
try:
target_path = _ensure_inside_base(base_path / filename, base_path)
except OSError as e:
raise ValueError(f"Invalid file path '{filename}': {e}") from e
# Virus scan the downloaded content before writing
await scan_content_safe(resp.content, filename=filename)
target_path.write_bytes(resp.content)