removed upload for textract async version

This commit is contained in:
waleed
2026-01-20 11:44:24 -08:00
parent 0ac6fec0a5
commit c5d3405c7a
10 changed files with 109 additions and 84 deletions

View File

@@ -58,6 +58,7 @@ Upload a file to an AWS S3 bucket
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `url` | string | URL of the uploaded S3 object |
| `uri` | string | S3 URI of the uploaded object \(s3://bucket/key\) |
| `metadata` | object | Upload metadata including ETag and location |
### `s3_get_object`
@@ -149,6 +150,7 @@ Copy an object within or between AWS S3 buckets
| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `url` | string | URL of the copied S3 object |
| `uri` | string | S3 URI of the copied object \(s3://bucket/key\) |
| `metadata` | object | Copy operation metadata |

View File

@@ -79,11 +79,13 @@ export async function POST(request: NextRequest) {
// Generate public URL for destination (properly encode the destination key)
const encodedDestKey = validatedData.destinationKey.split('/').map(encodeURIComponent).join('/')
const url = `https://${validatedData.destinationBucket}.s3.${validatedData.region}.amazonaws.com/${encodedDestKey}`
const uri = `s3://${validatedData.destinationBucket}/${validatedData.destinationKey}`
return NextResponse.json({
success: true,
output: {
url,
uri,
copySourceVersionId: result.CopySourceVersionId,
versionId: result.VersionId,
etag: result.CopyObjectResult?.ETag,

View File

@@ -117,11 +117,13 @@ export async function POST(request: NextRequest) {
const encodedKey = validatedData.objectKey.split('/').map(encodeURIComponent).join('/')
const url = `https://${validatedData.bucketName}.s3.${validatedData.region}.amazonaws.com/${encodedKey}`
const uri = `s3://${validatedData.bucketName}/${validatedData.objectKey}`
return NextResponse.json({
success: true,
output: {
url,
uri,
etag: result.ETag,
location: url,
key: validatedData.objectKey,

View File

@@ -322,44 +322,17 @@ export async function POST(request: NextRequest) {
})
if (processingMode === 'async') {
if (!validatedData.s3Uri && !validatedData.filePath) {
if (!validatedData.s3Uri) {
return NextResponse.json(
{
success: false,
error: 'S3 URI or file path is required for multi-page processing',
error: 'S3 URI is required for multi-page processing (s3://bucket/key)',
},
{ status: 400 }
)
}
let s3Bucket: string
let s3Key: string
if (validatedData.s3Uri) {
const parsed = parseS3Uri(validatedData.s3Uri)
s3Bucket = parsed.bucket
s3Key = parsed.key
} else if (validatedData.filePath?.includes('/api/files/serve/')) {
const storageKey = extractStorageKey(validatedData.filePath)
const context = inferContextFromKey(storageKey)
const hasAccess = await verifyFileAccess(storageKey, userId, undefined, context, false)
if (!hasAccess) {
return NextResponse.json({ success: false, error: 'File not found' }, { status: 404 })
}
const s3Info = StorageService.getS3InfoForKey(storageKey, context)
s3Bucket = s3Info.bucket
s3Key = s3Info.key
} else {
return NextResponse.json(
{
success: false,
error: 'Multi-page mode requires an S3 URI (s3://bucket/key) or an uploaded file',
},
{ status: 400 }
)
}
const { bucket: s3Bucket, key: s3Key } = parseS3Uri(validatedData.s3Uri)
logger.info(`[${requestId}] Starting async Textract job`, { s3Bucket, s3Key })

View File

@@ -414,6 +414,10 @@ export const S3Block: BlockConfig<S3Response> = {
},
outputs: {
url: { type: 'string', description: 'URL of S3 object' },
uri: {
type: 'string',
description: 'S3 URI (s3://bucket/key) for use with other AWS services',
},
objects: { type: 'json', description: 'List of objects (for list operation)' },
deleted: { type: 'boolean', description: 'Deletion status' },
metadata: { type: 'json', description: 'Operation metadata' },

View File

@@ -36,19 +36,6 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
not: true,
},
},
{
id: 'asyncInputMethod',
title: 'Select Input Method',
type: 'dropdown' as SubBlockType,
options: [
{ id: 's3', label: 'S3 URI' },
{ id: 'upload', label: 'Upload Document' },
],
condition: {
field: 'processingMode',
value: 'async',
},
},
{
id: 'filePath',
title: 'Document URL',
@@ -70,12 +57,8 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
type: 'short-input' as SubBlockType,
placeholder: 's3://bucket-name/path/to/document.pdf',
condition: {
field: 'asyncInputMethod',
value: 's3',
and: {
field: 'processingMode',
value: 'async',
},
field: 'processingMode',
value: 'async',
},
},
{
@@ -94,21 +77,6 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
},
maxSize: 10,
},
{
id: 'asyncFileUpload',
title: 'Upload Document',
type: 'file-upload' as SubBlockType,
acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff',
condition: {
field: 'asyncInputMethod',
value: 'upload',
and: {
field: 'processingMode',
value: 'async',
},
},
maxSize: 50,
},
{
id: 'region',
title: 'AWS Region',
@@ -177,18 +145,10 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
}
if (processingMode === 'async') {
const asyncInputMethod = params.asyncInputMethod || 's3'
if (asyncInputMethod === 's3') {
if (!params.s3Uri || params.s3Uri.trim() === '') {
throw new Error('S3 URI is required for multi-page processing')
}
parameters.s3Uri = params.s3Uri.trim()
} else if (asyncInputMethod === 'upload') {
if (!params.asyncFileUpload) {
throw new Error('Please upload a document')
}
parameters.fileUpload = params.asyncFileUpload
if (!params.s3Uri || params.s3Uri.trim() === '') {
throw new Error('S3 URI is required for multi-page processing')
}
parameters.s3Uri = params.s3Uri.trim()
} else {
const inputMethod = params.inputMethod || 'url'
if (inputMethod === 'url') {
@@ -221,11 +181,9 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
inputs: {
processingMode: { type: 'string', description: 'Document type: single-page or multi-page' },
inputMethod: { type: 'string', description: 'Input method selection for single-page mode' },
asyncInputMethod: { type: 'string', description: 'Input method selection for multi-page mode' },
filePath: { type: 'string', description: 'Document URL' },
s3Uri: { type: 'string', description: 'S3 URI for multi-page processing (s3://bucket/key)' },
fileUpload: { type: 'json', description: 'Uploaded document file for single-page mode' },
asyncFileUpload: { type: 'json', description: 'Uploaded document file for multi-page mode' },
extractTables: { type: 'boolean', description: 'Extract tables from document' },
extractForms: { type: 'boolean', description: 'Extract form key-value pairs' },
detectSignatures: { type: 'boolean', description: 'Detect signatures' },

View File

@@ -1196,7 +1196,7 @@ describe('validateAirtableId', () => {
})
describe('validateAwsRegion', () => {
describe('valid regions', () => {
describe('valid standard regions', () => {
it.concurrent('should accept us-east-1', () => {
const result = validateAwsRegion('us-east-1')
expect(result.isValid).toBe(true)
@@ -1213,11 +1213,21 @@ describe('validateAwsRegion', () => {
expect(result.isValid).toBe(true)
})
it.concurrent('should accept eu-central-1', () => {
const result = validateAwsRegion('eu-central-1')
expect(result.isValid).toBe(true)
})
it.concurrent('should accept ap-southeast-1', () => {
const result = validateAwsRegion('ap-southeast-1')
expect(result.isValid).toBe(true)
})
it.concurrent('should accept ap-northeast-1', () => {
const result = validateAwsRegion('ap-northeast-1')
expect(result.isValid).toBe(true)
})
it.concurrent('should accept sa-east-1', () => {
const result = validateAwsRegion('sa-east-1')
expect(result.isValid).toBe(true)
@@ -1233,12 +1243,58 @@ describe('validateAwsRegion', () => {
expect(result.isValid).toBe(true)
})
it.concurrent('should accept ca-central-1', () => {
const result = validateAwsRegion('ca-central-1')
expect(result.isValid).toBe(true)
})
it.concurrent('should accept il-central-1', () => {
const result = validateAwsRegion('il-central-1')
expect(result.isValid).toBe(true)
})
it.concurrent('should accept regions with double-digit numbers', () => {
const result = validateAwsRegion('ap-northeast-12')
expect(result.isValid).toBe(true)
})
})
describe('valid GovCloud regions', () => {
it.concurrent('should accept us-gov-west-1', () => {
const result = validateAwsRegion('us-gov-west-1')
expect(result.isValid).toBe(true)
})
it.concurrent('should accept us-gov-east-1', () => {
const result = validateAwsRegion('us-gov-east-1')
expect(result.isValid).toBe(true)
})
})
describe('valid China regions', () => {
it.concurrent('should accept cn-north-1', () => {
const result = validateAwsRegion('cn-north-1')
expect(result.isValid).toBe(true)
})
it.concurrent('should accept cn-northwest-1', () => {
const result = validateAwsRegion('cn-northwest-1')
expect(result.isValid).toBe(true)
})
})
describe('valid ISO regions', () => {
it.concurrent('should accept us-iso-east-1', () => {
const result = validateAwsRegion('us-iso-east-1')
expect(result.isValid).toBe(true)
})
it.concurrent('should accept us-isob-east-1', () => {
const result = validateAwsRegion('us-isob-east-1')
expect(result.isValid).toBe(true)
})
})
describe('invalid regions', () => {
it.concurrent('should reject null', () => {
const result = validateAwsRegion(null)
@@ -1282,6 +1338,16 @@ describe('validateAwsRegion', () => {
expect(result.isValid).toBe(false)
})
it.concurrent('should reject invalid prefix', () => {
const result = validateAwsRegion('xx-east-1')
expect(result.isValid).toBe(false)
})
it.concurrent('should reject invalid direction', () => {
const result = validateAwsRegion('us-middle-1')
expect(result.isValid).toBe(false)
})
it.concurrent('should use custom param name in errors', () => {
const result = validateAwsRegion('', 'awsRegion')
expect(result.error).toContain('awsRegion')

View File

@@ -937,8 +937,12 @@ export function validateAirtableId(
/**
* Validates an AWS region identifier
*
* AWS regions follow the pattern: {area}-{sub-area}-{number}
* Examples: us-east-1, eu-west-2, ap-southeast-1, sa-east-1
* Supported region formats:
* - Standard: us-east-1, eu-west-2, ap-southeast-1, sa-east-1, af-south-1
* - GovCloud: us-gov-east-1, us-gov-west-1
* - China: cn-north-1, cn-northwest-1
* - Israel: il-central-1
* - ISO partitions: us-iso-east-1, us-isob-east-1
*
* @param value - The AWS region to validate
* @param paramName - Name of the parameter for error messages
@@ -963,9 +967,13 @@ export function validateAwsRegion(
}
}
// AWS region format: {area}-{sub-area}-{number}
// Examples: us-east-1, eu-west-2, ap-southeast-1, me-south-1, af-south-1
const awsRegionPattern = /^[a-z]{2}-[a-z]+-\d{1,2}$/
// AWS region patterns:
// - Standard: af|ap|ca|eu|me|sa|us|il followed by direction and number
// - GovCloud: us-gov-east-1, us-gov-west-1
// - China: cn-north-1, cn-northwest-1
// - ISO: us-iso-east-1, us-iso-west-1, us-isob-east-1
const awsRegionPattern =
/^(af|ap|ca|cn|eu|il|me|sa|us|us-gov|us-iso|us-isob)-(central|north|northeast|northwest|south|southeast|southwest|east|west)-\d{1,2}$/
if (!awsRegionPattern.test(value)) {
logger.warn('Invalid AWS region format', {
@@ -974,7 +982,7 @@ export function validateAwsRegion(
})
return {
isValid: false,
error: `${paramName} must be a valid AWS region (e.g., us-east-1, eu-west-2)`,
error: `${paramName} must be a valid AWS region (e.g., us-east-1, eu-west-2, us-gov-west-1)`,
}
}

View File

@@ -95,6 +95,7 @@ export const s3CopyObjectTool: ToolConfig = {
success: true,
output: {
url: data.output.url,
uri: data.output.uri,
metadata: {
copySourceVersionId: data.output.copySourceVersionId,
versionId: data.output.versionId,
@@ -109,6 +110,10 @@ export const s3CopyObjectTool: ToolConfig = {
type: 'string',
description: 'URL of the copied S3 object',
},
uri: {
type: 'string',
description: 'S3 URI of the copied object (s3://bucket/key)',
},
metadata: {
type: 'object',
description: 'Copy operation metadata',

View File

@@ -102,6 +102,7 @@ export const s3PutObjectTool: ToolConfig = {
success: true,
output: {
url: data.output.url,
uri: data.output.uri,
metadata: {
etag: data.output.etag,
location: data.output.location,
@@ -117,6 +118,10 @@ export const s3PutObjectTool: ToolConfig = {
type: 'string',
description: 'URL of the uploaded S3 object',
},
uri: {
type: 'string',
description: 'S3 URI of the uploaded object (s3://bucket/key)',
},
metadata: {
type: 'object',
description: 'Upload metadata including ETag and location',