From 2944579d21a75eeb1d480bbfe8c8d37e28a1d6cc Mon Sep 17 00:00:00 2001 From: Waleed Date: Thu, 12 Feb 2026 10:59:22 -0800 Subject: [PATCH] fix(s3): support get-object region override and robust S3 URL parsing (#3206) * fix(s3): support get-object region override and robust S3 URL parsing * ack pr comments --- apps/sim/blocks/blocks/s3.ts | 44 +++++++++--------------- apps/sim/tools/s3/get_object.ts | 15 ++++++--- apps/sim/tools/s3/utils.ts | 60 +++++++++++++++++++++++++++++---- 3 files changed, 81 insertions(+), 38 deletions(-) diff --git a/apps/sim/blocks/blocks/s3.ts b/apps/sim/blocks/blocks/s3.ts index 10491a078..30fabd9d3 100644 --- a/apps/sim/blocks/blocks/s3.ts +++ b/apps/sim/blocks/blocks/s3.ts @@ -58,6 +58,16 @@ export const S3Block: BlockConfig = { }, required: true, }, + { + id: 'getObjectRegion', + title: 'AWS Region', + type: 'short-input', + placeholder: 'Used when S3 URL does not include region', + condition: { + field: 'operation', + value: ['get_object'], + }, + }, { id: 'bucketName', title: 'Bucket Name', @@ -291,34 +301,11 @@ export const S3Block: BlockConfig = { if (!params.s3Uri) { throw new Error('S3 Object URL is required') } - - // Parse S3 URI for get_object - try { - const url = new URL(params.s3Uri) - const hostname = url.hostname - const bucketName = hostname.split('.')[0] - const regionMatch = hostname.match(/s3[.-]([^.]+)\.amazonaws\.com/) - const region = regionMatch ? regionMatch[1] : params.region - const objectKey = url.pathname.startsWith('/') - ? url.pathname.substring(1) - : url.pathname - - if (!bucketName || !objectKey) { - throw new Error('Could not parse S3 URL') - } - - return { - accessKeyId: params.accessKeyId, - secretAccessKey: params.secretAccessKey, - region, - bucketName, - objectKey, - s3Uri: params.s3Uri, - } - } catch (_error) { - throw new Error( - 'Invalid S3 Object URL format. Expected: https://bucket-name.s3.region.amazonaws.com/path/to/file' - ) + return { + accessKeyId: params.accessKeyId, + secretAccessKey: params.secretAccessKey, + region: params.getObjectRegion || params.region, + s3Uri: params.s3Uri, } } @@ -401,6 +388,7 @@ export const S3Block: BlockConfig = { acl: { type: 'string', description: 'Access control list' }, // Download inputs s3Uri: { type: 'string', description: 'S3 object URL' }, + getObjectRegion: { type: 'string', description: 'Optional AWS region override for downloads' }, // List inputs prefix: { type: 'string', description: 'Prefix filter' }, maxKeys: { type: 'number', description: 'Maximum results' }, diff --git a/apps/sim/tools/s3/get_object.ts b/apps/sim/tools/s3/get_object.ts index 585604265..1e83ecc8b 100644 --- a/apps/sim/tools/s3/get_object.ts +++ b/apps/sim/tools/s3/get_object.ts @@ -26,6 +26,13 @@ export const s3GetObjectTool: ToolConfig = { visibility: 'user-only', description: 'Your AWS Secret Access Key', }, + region: { + type: 'string', + required: false, + visibility: 'user-only', + description: + 'Optional region override when URL does not include region (e.g., us-east-1, eu-west-1)', + }, s3Uri: { type: 'string', required: true, @@ -37,7 +44,7 @@ export const s3GetObjectTool: ToolConfig = { request: { url: (params) => { try { - const { bucketName, region, objectKey } = parseS3Uri(params.s3Uri) + const { bucketName, region, objectKey } = parseS3Uri(params.s3Uri, params.region) params.bucketName = bucketName params.region = region @@ -46,7 +53,7 @@ export const s3GetObjectTool: ToolConfig = { return `https://${bucketName}.s3.${region}.amazonaws.com/${encodeS3PathComponent(objectKey)}` } catch (_error) { throw new Error( - 'Invalid S3 Object URL format. Expected format: https://bucket-name.s3.region.amazonaws.com/path/to/file' + 'Invalid S3 Object URL. Use a valid S3 URL and optionally provide region if the URL omits it.' ) } }, @@ -55,7 +62,7 @@ export const s3GetObjectTool: ToolConfig = { try { // Parse S3 URI if not already parsed if (!params.bucketName || !params.region || !params.objectKey) { - const { bucketName, region, objectKey } = parseS3Uri(params.s3Uri) + const { bucketName, region, objectKey } = parseS3Uri(params.s3Uri, params.region) params.bucketName = bucketName params.region = region params.objectKey = objectKey @@ -102,7 +109,7 @@ export const s3GetObjectTool: ToolConfig = { transformResponse: async (response: Response, params) => { // Parse S3 URI if not already parsed if (!params.bucketName || !params.region || !params.objectKey) { - const { bucketName, region, objectKey } = parseS3Uri(params.s3Uri) + const { bucketName, region, objectKey } = parseS3Uri(params.s3Uri, params.region) params.bucketName = bucketName params.region = region params.objectKey = objectKey diff --git a/apps/sim/tools/s3/utils.ts b/apps/sim/tools/s3/utils.ts index a0815a878..8d5f5ad65 100644 --- a/apps/sim/tools/s3/utils.ts +++ b/apps/sim/tools/s3/utils.ts @@ -20,7 +20,10 @@ export function getSignatureKey( return kSigning } -export function parseS3Uri(s3Uri: string): { +export function parseS3Uri( + s3Uri: string, + fallbackRegion?: string +): { bucketName: string region: string objectKey: string @@ -28,10 +31,55 @@ export function parseS3Uri(s3Uri: string): { try { const url = new URL(s3Uri) const hostname = url.hostname - const bucketName = hostname.split('.')[0] - const regionMatch = hostname.match(/s3[.-]([^.]+)\.amazonaws\.com/) - const region = regionMatch ? regionMatch[1] : 'us-east-1' - const objectKey = url.pathname.startsWith('/') ? url.pathname.substring(1) : url.pathname + const normalizedPath = url.pathname.startsWith('/') ? url.pathname.slice(1) : url.pathname + + const virtualHostedDualstackMatch = hostname.match( + /^(.+)\.s3\.dualstack\.([^.]+)\.amazonaws\.com(?:\.cn)?$/ + ) + const virtualHostedRegionalMatch = hostname.match( + /^(.+)\.s3[.-]([^.]+)\.amazonaws\.com(?:\.cn)?$/ + ) + const virtualHostedGlobalMatch = hostname.match(/^(.+)\.s3\.amazonaws\.com(?:\.cn)?$/) + + const pathStyleDualstackMatch = hostname.match( + /^s3\.dualstack\.([^.]+)\.amazonaws\.com(?:\.cn)?$/ + ) + const pathStyleRegionalMatch = hostname.match(/^s3[.-]([^.]+)\.amazonaws\.com(?:\.cn)?$/) + const pathStyleGlobalMatch = hostname.match(/^s3\.amazonaws\.com(?:\.cn)?$/) + + const isPathStyleHost = Boolean( + pathStyleDualstackMatch || pathStyleRegionalMatch || pathStyleGlobalMatch + ) + + const firstSlashIndex = normalizedPath.indexOf('/') + const pathStyleBucketName = + firstSlashIndex === -1 ? normalizedPath : normalizedPath.slice(0, firstSlashIndex) + const pathStyleObjectKey = + firstSlashIndex === -1 ? '' : normalizedPath.slice(firstSlashIndex + 1) + + const bucketName = isPathStyleHost + ? pathStyleBucketName + : (virtualHostedDualstackMatch?.[1] ?? + virtualHostedRegionalMatch?.[1] ?? + virtualHostedGlobalMatch?.[1] ?? + '') + + const rawObjectKey = isPathStyleHost ? pathStyleObjectKey : normalizedPath + const objectKey = (() => { + try { + return decodeURIComponent(rawObjectKey) + } catch { + return rawObjectKey + } + })() + + const normalizedFallbackRegion = fallbackRegion?.trim() + const regionFromHost = + virtualHostedDualstackMatch?.[2] ?? + virtualHostedRegionalMatch?.[2] ?? + pathStyleDualstackMatch?.[1] ?? + pathStyleRegionalMatch?.[1] + const region = regionFromHost || normalizedFallbackRegion || 'us-east-1' if (!bucketName || !objectKey) { throw new Error('Invalid S3 URI format') @@ -40,7 +88,7 @@ export function parseS3Uri(s3Uri: string): { return { bucketName, region, objectKey } } catch (_error) { throw new Error( - 'Invalid S3 Object URL format. Expected format: https://bucket-name.s3.region.amazonaws.com/path/to/file' + 'Invalid S3 Object URL format. Expected S3 virtual-hosted or path-style URL with object key.' ) } }