mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-20 20:38:16 -05:00
removed upload for textract async version
This commit is contained in:
@@ -58,6 +58,7 @@ Upload a file to an AWS S3 bucket
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `url` | string | URL of the uploaded S3 object |
|
||||
| `uri` | string | S3 URI of the uploaded object \(s3://bucket/key\) |
|
||||
| `metadata` | object | Upload metadata including ETag and location |
|
||||
|
||||
### `s3_get_object`
|
||||
@@ -149,6 +150,7 @@ Copy an object within or between AWS S3 buckets
|
||||
| Parameter | Type | Description |
|
||||
| --------- | ---- | ----------- |
|
||||
| `url` | string | URL of the copied S3 object |
|
||||
| `uri` | string | S3 URI of the copied object \(s3://bucket/key\) |
|
||||
| `metadata` | object | Copy operation metadata |
|
||||
|
||||
|
||||
|
||||
@@ -79,11 +79,13 @@ export async function POST(request: NextRequest) {
|
||||
// Generate public URL for destination (properly encode the destination key)
|
||||
const encodedDestKey = validatedData.destinationKey.split('/').map(encodeURIComponent).join('/')
|
||||
const url = `https://${validatedData.destinationBucket}.s3.${validatedData.region}.amazonaws.com/${encodedDestKey}`
|
||||
const uri = `s3://${validatedData.destinationBucket}/${validatedData.destinationKey}`
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
output: {
|
||||
url,
|
||||
uri,
|
||||
copySourceVersionId: result.CopySourceVersionId,
|
||||
versionId: result.VersionId,
|
||||
etag: result.CopyObjectResult?.ETag,
|
||||
|
||||
@@ -117,11 +117,13 @@ export async function POST(request: NextRequest) {
|
||||
|
||||
const encodedKey = validatedData.objectKey.split('/').map(encodeURIComponent).join('/')
|
||||
const url = `https://${validatedData.bucketName}.s3.${validatedData.region}.amazonaws.com/${encodedKey}`
|
||||
const uri = `s3://${validatedData.bucketName}/${validatedData.objectKey}`
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
output: {
|
||||
url,
|
||||
uri,
|
||||
etag: result.ETag,
|
||||
location: url,
|
||||
key: validatedData.objectKey,
|
||||
|
||||
@@ -322,44 +322,17 @@ export async function POST(request: NextRequest) {
|
||||
})
|
||||
|
||||
if (processingMode === 'async') {
|
||||
if (!validatedData.s3Uri && !validatedData.filePath) {
|
||||
if (!validatedData.s3Uri) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'S3 URI or file path is required for multi-page processing',
|
||||
error: 'S3 URI is required for multi-page processing (s3://bucket/key)',
|
||||
},
|
||||
{ status: 400 }
|
||||
)
|
||||
}
|
||||
|
||||
let s3Bucket: string
|
||||
let s3Key: string
|
||||
|
||||
if (validatedData.s3Uri) {
|
||||
const parsed = parseS3Uri(validatedData.s3Uri)
|
||||
s3Bucket = parsed.bucket
|
||||
s3Key = parsed.key
|
||||
} else if (validatedData.filePath?.includes('/api/files/serve/')) {
|
||||
const storageKey = extractStorageKey(validatedData.filePath)
|
||||
const context = inferContextFromKey(storageKey)
|
||||
|
||||
const hasAccess = await verifyFileAccess(storageKey, userId, undefined, context, false)
|
||||
if (!hasAccess) {
|
||||
return NextResponse.json({ success: false, error: 'File not found' }, { status: 404 })
|
||||
}
|
||||
|
||||
const s3Info = StorageService.getS3InfoForKey(storageKey, context)
|
||||
s3Bucket = s3Info.bucket
|
||||
s3Key = s3Info.key
|
||||
} else {
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: 'Multi-page mode requires an S3 URI (s3://bucket/key) or an uploaded file',
|
||||
},
|
||||
{ status: 400 }
|
||||
)
|
||||
}
|
||||
const { bucket: s3Bucket, key: s3Key } = parseS3Uri(validatedData.s3Uri)
|
||||
|
||||
logger.info(`[${requestId}] Starting async Textract job`, { s3Bucket, s3Key })
|
||||
|
||||
|
||||
@@ -414,6 +414,10 @@ export const S3Block: BlockConfig<S3Response> = {
|
||||
},
|
||||
outputs: {
|
||||
url: { type: 'string', description: 'URL of S3 object' },
|
||||
uri: {
|
||||
type: 'string',
|
||||
description: 'S3 URI (s3://bucket/key) for use with other AWS services',
|
||||
},
|
||||
objects: { type: 'json', description: 'List of objects (for list operation)' },
|
||||
deleted: { type: 'boolean', description: 'Deletion status' },
|
||||
metadata: { type: 'json', description: 'Operation metadata' },
|
||||
|
||||
@@ -36,19 +36,6 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
not: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'asyncInputMethod',
|
||||
title: 'Select Input Method',
|
||||
type: 'dropdown' as SubBlockType,
|
||||
options: [
|
||||
{ id: 's3', label: 'S3 URI' },
|
||||
{ id: 'upload', label: 'Upload Document' },
|
||||
],
|
||||
condition: {
|
||||
field: 'processingMode',
|
||||
value: 'async',
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 'filePath',
|
||||
title: 'Document URL',
|
||||
@@ -70,12 +57,8 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
type: 'short-input' as SubBlockType,
|
||||
placeholder: 's3://bucket-name/path/to/document.pdf',
|
||||
condition: {
|
||||
field: 'asyncInputMethod',
|
||||
value: 's3',
|
||||
and: {
|
||||
field: 'processingMode',
|
||||
value: 'async',
|
||||
},
|
||||
field: 'processingMode',
|
||||
value: 'async',
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -94,21 +77,6 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
},
|
||||
maxSize: 10,
|
||||
},
|
||||
{
|
||||
id: 'asyncFileUpload',
|
||||
title: 'Upload Document',
|
||||
type: 'file-upload' as SubBlockType,
|
||||
acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff',
|
||||
condition: {
|
||||
field: 'asyncInputMethod',
|
||||
value: 'upload',
|
||||
and: {
|
||||
field: 'processingMode',
|
||||
value: 'async',
|
||||
},
|
||||
},
|
||||
maxSize: 50,
|
||||
},
|
||||
{
|
||||
id: 'region',
|
||||
title: 'AWS Region',
|
||||
@@ -177,18 +145,10 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
}
|
||||
|
||||
if (processingMode === 'async') {
|
||||
const asyncInputMethod = params.asyncInputMethod || 's3'
|
||||
if (asyncInputMethod === 's3') {
|
||||
if (!params.s3Uri || params.s3Uri.trim() === '') {
|
||||
throw new Error('S3 URI is required for multi-page processing')
|
||||
}
|
||||
parameters.s3Uri = params.s3Uri.trim()
|
||||
} else if (asyncInputMethod === 'upload') {
|
||||
if (!params.asyncFileUpload) {
|
||||
throw new Error('Please upload a document')
|
||||
}
|
||||
parameters.fileUpload = params.asyncFileUpload
|
||||
if (!params.s3Uri || params.s3Uri.trim() === '') {
|
||||
throw new Error('S3 URI is required for multi-page processing')
|
||||
}
|
||||
parameters.s3Uri = params.s3Uri.trim()
|
||||
} else {
|
||||
const inputMethod = params.inputMethod || 'url'
|
||||
if (inputMethod === 'url') {
|
||||
@@ -221,11 +181,9 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
|
||||
inputs: {
|
||||
processingMode: { type: 'string', description: 'Document type: single-page or multi-page' },
|
||||
inputMethod: { type: 'string', description: 'Input method selection for single-page mode' },
|
||||
asyncInputMethod: { type: 'string', description: 'Input method selection for multi-page mode' },
|
||||
filePath: { type: 'string', description: 'Document URL' },
|
||||
s3Uri: { type: 'string', description: 'S3 URI for multi-page processing (s3://bucket/key)' },
|
||||
fileUpload: { type: 'json', description: 'Uploaded document file for single-page mode' },
|
||||
asyncFileUpload: { type: 'json', description: 'Uploaded document file for multi-page mode' },
|
||||
extractTables: { type: 'boolean', description: 'Extract tables from document' },
|
||||
extractForms: { type: 'boolean', description: 'Extract form key-value pairs' },
|
||||
detectSignatures: { type: 'boolean', description: 'Detect signatures' },
|
||||
|
||||
@@ -1196,7 +1196,7 @@ describe('validateAirtableId', () => {
|
||||
})
|
||||
|
||||
describe('validateAwsRegion', () => {
|
||||
describe('valid regions', () => {
|
||||
describe('valid standard regions', () => {
|
||||
it.concurrent('should accept us-east-1', () => {
|
||||
const result = validateAwsRegion('us-east-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
@@ -1213,11 +1213,21 @@ describe('validateAwsRegion', () => {
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept eu-central-1', () => {
|
||||
const result = validateAwsRegion('eu-central-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept ap-southeast-1', () => {
|
||||
const result = validateAwsRegion('ap-southeast-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept ap-northeast-1', () => {
|
||||
const result = validateAwsRegion('ap-northeast-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept sa-east-1', () => {
|
||||
const result = validateAwsRegion('sa-east-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
@@ -1233,12 +1243,58 @@ describe('validateAwsRegion', () => {
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept ca-central-1', () => {
|
||||
const result = validateAwsRegion('ca-central-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept il-central-1', () => {
|
||||
const result = validateAwsRegion('il-central-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept regions with double-digit numbers', () => {
|
||||
const result = validateAwsRegion('ap-northeast-12')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('valid GovCloud regions', () => {
|
||||
it.concurrent('should accept us-gov-west-1', () => {
|
||||
const result = validateAwsRegion('us-gov-west-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept us-gov-east-1', () => {
|
||||
const result = validateAwsRegion('us-gov-east-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('valid China regions', () => {
|
||||
it.concurrent('should accept cn-north-1', () => {
|
||||
const result = validateAwsRegion('cn-north-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept cn-northwest-1', () => {
|
||||
const result = validateAwsRegion('cn-northwest-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('valid ISO regions', () => {
|
||||
it.concurrent('should accept us-iso-east-1', () => {
|
||||
const result = validateAwsRegion('us-iso-east-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
|
||||
it.concurrent('should accept us-isob-east-1', () => {
|
||||
const result = validateAwsRegion('us-isob-east-1')
|
||||
expect(result.isValid).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe('invalid regions', () => {
|
||||
it.concurrent('should reject null', () => {
|
||||
const result = validateAwsRegion(null)
|
||||
@@ -1282,6 +1338,16 @@ describe('validateAwsRegion', () => {
|
||||
expect(result.isValid).toBe(false)
|
||||
})
|
||||
|
||||
it.concurrent('should reject invalid prefix', () => {
|
||||
const result = validateAwsRegion('xx-east-1')
|
||||
expect(result.isValid).toBe(false)
|
||||
})
|
||||
|
||||
it.concurrent('should reject invalid direction', () => {
|
||||
const result = validateAwsRegion('us-middle-1')
|
||||
expect(result.isValid).toBe(false)
|
||||
})
|
||||
|
||||
it.concurrent('should use custom param name in errors', () => {
|
||||
const result = validateAwsRegion('', 'awsRegion')
|
||||
expect(result.error).toContain('awsRegion')
|
||||
|
||||
@@ -937,8 +937,12 @@ export function validateAirtableId(
|
||||
/**
|
||||
* Validates an AWS region identifier
|
||||
*
|
||||
* AWS regions follow the pattern: {area}-{sub-area}-{number}
|
||||
* Examples: us-east-1, eu-west-2, ap-southeast-1, sa-east-1
|
||||
* Supported region formats:
|
||||
* - Standard: us-east-1, eu-west-2, ap-southeast-1, sa-east-1, af-south-1
|
||||
* - GovCloud: us-gov-east-1, us-gov-west-1
|
||||
* - China: cn-north-1, cn-northwest-1
|
||||
* - Israel: il-central-1
|
||||
* - ISO partitions: us-iso-east-1, us-isob-east-1
|
||||
*
|
||||
* @param value - The AWS region to validate
|
||||
* @param paramName - Name of the parameter for error messages
|
||||
@@ -963,9 +967,13 @@ export function validateAwsRegion(
|
||||
}
|
||||
}
|
||||
|
||||
// AWS region format: {area}-{sub-area}-{number}
|
||||
// Examples: us-east-1, eu-west-2, ap-southeast-1, me-south-1, af-south-1
|
||||
const awsRegionPattern = /^[a-z]{2}-[a-z]+-\d{1,2}$/
|
||||
// AWS region patterns:
|
||||
// - Standard: af|ap|ca|eu|me|sa|us|il followed by direction and number
|
||||
// - GovCloud: us-gov-east-1, us-gov-west-1
|
||||
// - China: cn-north-1, cn-northwest-1
|
||||
// - ISO: us-iso-east-1, us-iso-west-1, us-isob-east-1
|
||||
const awsRegionPattern =
|
||||
/^(af|ap|ca|cn|eu|il|me|sa|us|us-gov|us-iso|us-isob)-(central|north|northeast|northwest|south|southeast|southwest|east|west)-\d{1,2}$/
|
||||
|
||||
if (!awsRegionPattern.test(value)) {
|
||||
logger.warn('Invalid AWS region format', {
|
||||
@@ -974,7 +982,7 @@ export function validateAwsRegion(
|
||||
})
|
||||
return {
|
||||
isValid: false,
|
||||
error: `${paramName} must be a valid AWS region (e.g., us-east-1, eu-west-2)`,
|
||||
error: `${paramName} must be a valid AWS region (e.g., us-east-1, eu-west-2, us-gov-west-1)`,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -95,6 +95,7 @@ export const s3CopyObjectTool: ToolConfig = {
|
||||
success: true,
|
||||
output: {
|
||||
url: data.output.url,
|
||||
uri: data.output.uri,
|
||||
metadata: {
|
||||
copySourceVersionId: data.output.copySourceVersionId,
|
||||
versionId: data.output.versionId,
|
||||
@@ -109,6 +110,10 @@ export const s3CopyObjectTool: ToolConfig = {
|
||||
type: 'string',
|
||||
description: 'URL of the copied S3 object',
|
||||
},
|
||||
uri: {
|
||||
type: 'string',
|
||||
description: 'S3 URI of the copied object (s3://bucket/key)',
|
||||
},
|
||||
metadata: {
|
||||
type: 'object',
|
||||
description: 'Copy operation metadata',
|
||||
|
||||
@@ -102,6 +102,7 @@ export const s3PutObjectTool: ToolConfig = {
|
||||
success: true,
|
||||
output: {
|
||||
url: data.output.url,
|
||||
uri: data.output.uri,
|
||||
metadata: {
|
||||
etag: data.output.etag,
|
||||
location: data.output.location,
|
||||
@@ -117,6 +118,10 @@ export const s3PutObjectTool: ToolConfig = {
|
||||
type: 'string',
|
||||
description: 'URL of the uploaded S3 object',
|
||||
},
|
||||
uri: {
|
||||
type: 'string',
|
||||
description: 'S3 URI of the uploaded object (s3://bucket/key)',
|
||||
},
|
||||
metadata: {
|
||||
type: 'object',
|
||||
description: 'Upload metadata including ETag and location',
|
||||
|
||||
Reference in New Issue
Block a user