From c5d3405c7aa08417a6a39ece7779ab258a170f30 Mon Sep 17 00:00:00 2001 From: waleed Date: Tue, 20 Jan 2026 11:44:24 -0800 Subject: [PATCH] removed upload for textract async version --- apps/docs/content/docs/en/tools/s3.mdx | 2 + .../sim/app/api/tools/s3/copy-object/route.ts | 2 + apps/sim/app/api/tools/s3/put-object/route.ts | 2 + .../sim/app/api/tools/textract/parse/route.ts | 33 +-------- apps/sim/blocks/blocks/s3.ts | 4 ++ apps/sim/blocks/blocks/textract.ts | 52 ++------------ .../core/security/input-validation.test.ts | 68 ++++++++++++++++++- .../sim/lib/core/security/input-validation.ts | 20 ++++-- apps/sim/tools/s3/copy_object.ts | 5 ++ apps/sim/tools/s3/put_object.ts | 5 ++ 10 files changed, 109 insertions(+), 84 deletions(-) diff --git a/apps/docs/content/docs/en/tools/s3.mdx b/apps/docs/content/docs/en/tools/s3.mdx index 615ba08c1..55ca4be64 100644 --- a/apps/docs/content/docs/en/tools/s3.mdx +++ b/apps/docs/content/docs/en/tools/s3.mdx @@ -58,6 +58,7 @@ Upload a file to an AWS S3 bucket | Parameter | Type | Description | | --------- | ---- | ----------- | | `url` | string | URL of the uploaded S3 object | +| `uri` | string | S3 URI of the uploaded object \(s3://bucket/key\) | | `metadata` | object | Upload metadata including ETag and location | ### `s3_get_object` @@ -149,6 +150,7 @@ Copy an object within or between AWS S3 buckets | Parameter | Type | Description | | --------- | ---- | ----------- | | `url` | string | URL of the copied S3 object | +| `uri` | string | S3 URI of the copied object \(s3://bucket/key\) | | `metadata` | object | Copy operation metadata | diff --git a/apps/sim/app/api/tools/s3/copy-object/route.ts b/apps/sim/app/api/tools/s3/copy-object/route.ts index 888aaf630..74b0d9ee5 100644 --- a/apps/sim/app/api/tools/s3/copy-object/route.ts +++ b/apps/sim/app/api/tools/s3/copy-object/route.ts @@ -79,11 +79,13 @@ export async function POST(request: NextRequest) { // Generate public URL for destination (properly encode the destination key) const encodedDestKey = validatedData.destinationKey.split('/').map(encodeURIComponent).join('/') const url = `https://${validatedData.destinationBucket}.s3.${validatedData.region}.amazonaws.com/${encodedDestKey}` + const uri = `s3://${validatedData.destinationBucket}/${validatedData.destinationKey}` return NextResponse.json({ success: true, output: { url, + uri, copySourceVersionId: result.CopySourceVersionId, versionId: result.VersionId, etag: result.CopyObjectResult?.ETag, diff --git a/apps/sim/app/api/tools/s3/put-object/route.ts b/apps/sim/app/api/tools/s3/put-object/route.ts index 2f7aced28..bd2bab3a6 100644 --- a/apps/sim/app/api/tools/s3/put-object/route.ts +++ b/apps/sim/app/api/tools/s3/put-object/route.ts @@ -117,11 +117,13 @@ export async function POST(request: NextRequest) { const encodedKey = validatedData.objectKey.split('/').map(encodeURIComponent).join('/') const url = `https://${validatedData.bucketName}.s3.${validatedData.region}.amazonaws.com/${encodedKey}` + const uri = `s3://${validatedData.bucketName}/${validatedData.objectKey}` return NextResponse.json({ success: true, output: { url, + uri, etag: result.ETag, location: url, key: validatedData.objectKey, diff --git a/apps/sim/app/api/tools/textract/parse/route.ts b/apps/sim/app/api/tools/textract/parse/route.ts index 2aae06fd1..8e2696e48 100644 --- a/apps/sim/app/api/tools/textract/parse/route.ts +++ b/apps/sim/app/api/tools/textract/parse/route.ts @@ -322,44 +322,17 @@ export async function POST(request: NextRequest) { }) if (processingMode === 'async') { - if (!validatedData.s3Uri && !validatedData.filePath) { + if (!validatedData.s3Uri) { return NextResponse.json( { success: false, - error: 'S3 URI or file path is required for multi-page processing', + error: 'S3 URI is required for multi-page processing (s3://bucket/key)', }, { status: 400 } ) } - let s3Bucket: string - let s3Key: string - - if (validatedData.s3Uri) { - const parsed = parseS3Uri(validatedData.s3Uri) - s3Bucket = parsed.bucket - s3Key = parsed.key - } else if (validatedData.filePath?.includes('/api/files/serve/')) { - const storageKey = extractStorageKey(validatedData.filePath) - const context = inferContextFromKey(storageKey) - - const hasAccess = await verifyFileAccess(storageKey, userId, undefined, context, false) - if (!hasAccess) { - return NextResponse.json({ success: false, error: 'File not found' }, { status: 404 }) - } - - const s3Info = StorageService.getS3InfoForKey(storageKey, context) - s3Bucket = s3Info.bucket - s3Key = s3Info.key - } else { - return NextResponse.json( - { - success: false, - error: 'Multi-page mode requires an S3 URI (s3://bucket/key) or an uploaded file', - }, - { status: 400 } - ) - } + const { bucket: s3Bucket, key: s3Key } = parseS3Uri(validatedData.s3Uri) logger.info(`[${requestId}] Starting async Textract job`, { s3Bucket, s3Key }) diff --git a/apps/sim/blocks/blocks/s3.ts b/apps/sim/blocks/blocks/s3.ts index ff0a0d53e..6dba63175 100644 --- a/apps/sim/blocks/blocks/s3.ts +++ b/apps/sim/blocks/blocks/s3.ts @@ -414,6 +414,10 @@ export const S3Block: BlockConfig = { }, outputs: { url: { type: 'string', description: 'URL of S3 object' }, + uri: { + type: 'string', + description: 'S3 URI (s3://bucket/key) for use with other AWS services', + }, objects: { type: 'json', description: 'List of objects (for list operation)' }, deleted: { type: 'boolean', description: 'Deletion status' }, metadata: { type: 'json', description: 'Operation metadata' }, diff --git a/apps/sim/blocks/blocks/textract.ts b/apps/sim/blocks/blocks/textract.ts index 12104ae83..ee76504e4 100644 --- a/apps/sim/blocks/blocks/textract.ts +++ b/apps/sim/blocks/blocks/textract.ts @@ -36,19 +36,6 @@ export const TextractBlock: BlockConfig = { not: true, }, }, - { - id: 'asyncInputMethod', - title: 'Select Input Method', - type: 'dropdown' as SubBlockType, - options: [ - { id: 's3', label: 'S3 URI' }, - { id: 'upload', label: 'Upload Document' }, - ], - condition: { - field: 'processingMode', - value: 'async', - }, - }, { id: 'filePath', title: 'Document URL', @@ -70,12 +57,8 @@ export const TextractBlock: BlockConfig = { type: 'short-input' as SubBlockType, placeholder: 's3://bucket-name/path/to/document.pdf', condition: { - field: 'asyncInputMethod', - value: 's3', - and: { - field: 'processingMode', - value: 'async', - }, + field: 'processingMode', + value: 'async', }, }, { @@ -94,21 +77,6 @@ export const TextractBlock: BlockConfig = { }, maxSize: 10, }, - { - id: 'asyncFileUpload', - title: 'Upload Document', - type: 'file-upload' as SubBlockType, - acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff', - condition: { - field: 'asyncInputMethod', - value: 'upload', - and: { - field: 'processingMode', - value: 'async', - }, - }, - maxSize: 50, - }, { id: 'region', title: 'AWS Region', @@ -177,18 +145,10 @@ export const TextractBlock: BlockConfig = { } if (processingMode === 'async') { - const asyncInputMethod = params.asyncInputMethod || 's3' - if (asyncInputMethod === 's3') { - if (!params.s3Uri || params.s3Uri.trim() === '') { - throw new Error('S3 URI is required for multi-page processing') - } - parameters.s3Uri = params.s3Uri.trim() - } else if (asyncInputMethod === 'upload') { - if (!params.asyncFileUpload) { - throw new Error('Please upload a document') - } - parameters.fileUpload = params.asyncFileUpload + if (!params.s3Uri || params.s3Uri.trim() === '') { + throw new Error('S3 URI is required for multi-page processing') } + parameters.s3Uri = params.s3Uri.trim() } else { const inputMethod = params.inputMethod || 'url' if (inputMethod === 'url') { @@ -221,11 +181,9 @@ export const TextractBlock: BlockConfig = { inputs: { processingMode: { type: 'string', description: 'Document type: single-page or multi-page' }, inputMethod: { type: 'string', description: 'Input method selection for single-page mode' }, - asyncInputMethod: { type: 'string', description: 'Input method selection for multi-page mode' }, filePath: { type: 'string', description: 'Document URL' }, s3Uri: { type: 'string', description: 'S3 URI for multi-page processing (s3://bucket/key)' }, fileUpload: { type: 'json', description: 'Uploaded document file for single-page mode' }, - asyncFileUpload: { type: 'json', description: 'Uploaded document file for multi-page mode' }, extractTables: { type: 'boolean', description: 'Extract tables from document' }, extractForms: { type: 'boolean', description: 'Extract form key-value pairs' }, detectSignatures: { type: 'boolean', description: 'Detect signatures' }, diff --git a/apps/sim/lib/core/security/input-validation.test.ts b/apps/sim/lib/core/security/input-validation.test.ts index f15259a93..1a5d50d47 100644 --- a/apps/sim/lib/core/security/input-validation.test.ts +++ b/apps/sim/lib/core/security/input-validation.test.ts @@ -1196,7 +1196,7 @@ describe('validateAirtableId', () => { }) describe('validateAwsRegion', () => { - describe('valid regions', () => { + describe('valid standard regions', () => { it.concurrent('should accept us-east-1', () => { const result = validateAwsRegion('us-east-1') expect(result.isValid).toBe(true) @@ -1213,11 +1213,21 @@ describe('validateAwsRegion', () => { expect(result.isValid).toBe(true) }) + it.concurrent('should accept eu-central-1', () => { + const result = validateAwsRegion('eu-central-1') + expect(result.isValid).toBe(true) + }) + it.concurrent('should accept ap-southeast-1', () => { const result = validateAwsRegion('ap-southeast-1') expect(result.isValid).toBe(true) }) + it.concurrent('should accept ap-northeast-1', () => { + const result = validateAwsRegion('ap-northeast-1') + expect(result.isValid).toBe(true) + }) + it.concurrent('should accept sa-east-1', () => { const result = validateAwsRegion('sa-east-1') expect(result.isValid).toBe(true) @@ -1233,12 +1243,58 @@ describe('validateAwsRegion', () => { expect(result.isValid).toBe(true) }) + it.concurrent('should accept ca-central-1', () => { + const result = validateAwsRegion('ca-central-1') + expect(result.isValid).toBe(true) + }) + + it.concurrent('should accept il-central-1', () => { + const result = validateAwsRegion('il-central-1') + expect(result.isValid).toBe(true) + }) + it.concurrent('should accept regions with double-digit numbers', () => { const result = validateAwsRegion('ap-northeast-12') expect(result.isValid).toBe(true) }) }) + describe('valid GovCloud regions', () => { + it.concurrent('should accept us-gov-west-1', () => { + const result = validateAwsRegion('us-gov-west-1') + expect(result.isValid).toBe(true) + }) + + it.concurrent('should accept us-gov-east-1', () => { + const result = validateAwsRegion('us-gov-east-1') + expect(result.isValid).toBe(true) + }) + }) + + describe('valid China regions', () => { + it.concurrent('should accept cn-north-1', () => { + const result = validateAwsRegion('cn-north-1') + expect(result.isValid).toBe(true) + }) + + it.concurrent('should accept cn-northwest-1', () => { + const result = validateAwsRegion('cn-northwest-1') + expect(result.isValid).toBe(true) + }) + }) + + describe('valid ISO regions', () => { + it.concurrent('should accept us-iso-east-1', () => { + const result = validateAwsRegion('us-iso-east-1') + expect(result.isValid).toBe(true) + }) + + it.concurrent('should accept us-isob-east-1', () => { + const result = validateAwsRegion('us-isob-east-1') + expect(result.isValid).toBe(true) + }) + }) + describe('invalid regions', () => { it.concurrent('should reject null', () => { const result = validateAwsRegion(null) @@ -1282,6 +1338,16 @@ describe('validateAwsRegion', () => { expect(result.isValid).toBe(false) }) + it.concurrent('should reject invalid prefix', () => { + const result = validateAwsRegion('xx-east-1') + expect(result.isValid).toBe(false) + }) + + it.concurrent('should reject invalid direction', () => { + const result = validateAwsRegion('us-middle-1') + expect(result.isValid).toBe(false) + }) + it.concurrent('should use custom param name in errors', () => { const result = validateAwsRegion('', 'awsRegion') expect(result.error).toContain('awsRegion') diff --git a/apps/sim/lib/core/security/input-validation.ts b/apps/sim/lib/core/security/input-validation.ts index 36493b42f..331f2b7fc 100644 --- a/apps/sim/lib/core/security/input-validation.ts +++ b/apps/sim/lib/core/security/input-validation.ts @@ -937,8 +937,12 @@ export function validateAirtableId( /** * Validates an AWS region identifier * - * AWS regions follow the pattern: {area}-{sub-area}-{number} - * Examples: us-east-1, eu-west-2, ap-southeast-1, sa-east-1 + * Supported region formats: + * - Standard: us-east-1, eu-west-2, ap-southeast-1, sa-east-1, af-south-1 + * - GovCloud: us-gov-east-1, us-gov-west-1 + * - China: cn-north-1, cn-northwest-1 + * - Israel: il-central-1 + * - ISO partitions: us-iso-east-1, us-isob-east-1 * * @param value - The AWS region to validate * @param paramName - Name of the parameter for error messages @@ -963,9 +967,13 @@ export function validateAwsRegion( } } - // AWS region format: {area}-{sub-area}-{number} - // Examples: us-east-1, eu-west-2, ap-southeast-1, me-south-1, af-south-1 - const awsRegionPattern = /^[a-z]{2}-[a-z]+-\d{1,2}$/ + // AWS region patterns: + // - Standard: af|ap|ca|eu|me|sa|us|il followed by direction and number + // - GovCloud: us-gov-east-1, us-gov-west-1 + // - China: cn-north-1, cn-northwest-1 + // - ISO: us-iso-east-1, us-iso-west-1, us-isob-east-1 + const awsRegionPattern = + /^(af|ap|ca|cn|eu|il|me|sa|us|us-gov|us-iso|us-isob)-(central|north|northeast|northwest|south|southeast|southwest|east|west)-\d{1,2}$/ if (!awsRegionPattern.test(value)) { logger.warn('Invalid AWS region format', { @@ -974,7 +982,7 @@ export function validateAwsRegion( }) return { isValid: false, - error: `${paramName} must be a valid AWS region (e.g., us-east-1, eu-west-2)`, + error: `${paramName} must be a valid AWS region (e.g., us-east-1, eu-west-2, us-gov-west-1)`, } } diff --git a/apps/sim/tools/s3/copy_object.ts b/apps/sim/tools/s3/copy_object.ts index da583ca30..339106612 100644 --- a/apps/sim/tools/s3/copy_object.ts +++ b/apps/sim/tools/s3/copy_object.ts @@ -95,6 +95,7 @@ export const s3CopyObjectTool: ToolConfig = { success: true, output: { url: data.output.url, + uri: data.output.uri, metadata: { copySourceVersionId: data.output.copySourceVersionId, versionId: data.output.versionId, @@ -109,6 +110,10 @@ export const s3CopyObjectTool: ToolConfig = { type: 'string', description: 'URL of the copied S3 object', }, + uri: { + type: 'string', + description: 'S3 URI of the copied object (s3://bucket/key)', + }, metadata: { type: 'object', description: 'Copy operation metadata', diff --git a/apps/sim/tools/s3/put_object.ts b/apps/sim/tools/s3/put_object.ts index 6a1f596b6..92ffdf07d 100644 --- a/apps/sim/tools/s3/put_object.ts +++ b/apps/sim/tools/s3/put_object.ts @@ -102,6 +102,7 @@ export const s3PutObjectTool: ToolConfig = { success: true, output: { url: data.output.url, + uri: data.output.uri, metadata: { etag: data.output.etag, location: data.output.location, @@ -117,6 +118,10 @@ export const s3PutObjectTool: ToolConfig = { type: 'string', description: 'URL of the uploaded S3 object', }, + uri: { + type: 'string', + description: 'S3 URI of the uploaded object (s3://bucket/key)', + }, metadata: { type: 'object', description: 'Upload metadata including ETag and location',