diff --git a/apps/sim/app/api/table/[tableId]/rows/route.ts b/apps/sim/app/api/table/[tableId]/rows/route.ts index d218c753a..eecc0c342 100644 --- a/apps/sim/app/api/table/[tableId]/rows/route.ts +++ b/apps/sim/app/api/table/[tableId]/rows/route.ts @@ -8,16 +8,17 @@ import { checkHybridAuth } from '@/lib/auth/hybrid' import { generateRequestId } from '@/lib/core/utils/request' import type { Filter, RowData, Sort, TableSchema } from '@/lib/table' import { + checkUniqueConstraintsDb, getUniqueColumns, TABLE_LIMITS, + USER_TABLE_ROWS_SQL_NAME, validateBatchRows, validateRowAgainstSchema, validateRowData, validateRowSize, - validateUniqueConstraints, } from '@/lib/table' import { buildFilterClause, buildSortClause } from '@/lib/table/sql' -import { accessError, checkAccess, verifyTableWorkspace } from '../../utils' +import { accessError, checkAccess } from '../../utils' const logger = createLogger('TableRowsAPI') @@ -295,8 +296,7 @@ export async function GET(request: NextRequest, { params }: TableRowsRouteParams const { table } = accessResult - const isValidWorkspace = await verifyTableWorkspace(tableId, validated.workspaceId) - if (!isValidWorkspace) { + if (validated.workspaceId !== table.workspaceId) { logger.warn( `[${requestId}] Workspace ID mismatch for table ${tableId}. Provided: ${validated.workspaceId}, Actual: ${table.workspaceId}` ) @@ -309,7 +309,7 @@ export async function GET(request: NextRequest, { params }: TableRowsRouteParams ] if (validated.filter) { - const filterClause = buildFilterClause(validated.filter as Filter, 'user_table_rows') + const filterClause = buildFilterClause(validated.filter as Filter, USER_TABLE_ROWS_SQL_NAME) if (filterClause) { baseConditions.push(filterClause) } @@ -327,7 +327,7 @@ export async function GET(request: NextRequest, { params }: TableRowsRouteParams if (validated.sort) { const schema = table.schema as TableSchema - const sortClause = buildSortClause(validated.sort, 'user_table_rows', schema.columns) + const sortClause = buildSortClause(validated.sort, USER_TABLE_ROWS_SQL_NAME, schema.columns) if (sortClause) { query = query.orderBy(sortClause) as typeof query } @@ -417,7 +417,7 @@ export async function PUT(request: NextRequest, { params }: TableRowsRouteParams eq(userTableRows.workspaceId, validated.workspaceId), ] - const filterClause = buildFilterClause(validated.filter as Filter, 'user_table_rows') + const filterClause = buildFilterClause(validated.filter as Filter, USER_TABLE_ROWS_SQL_NAME) if (filterClause) { baseConditions.push(filterClause) } @@ -469,23 +469,16 @@ export async function PUT(request: NextRequest, { params }: TableRowsRouteParams } } + // Check unique constraints using optimized database query const uniqueColumns = getUniqueColumns(table.schema as TableSchema) if (uniqueColumns.length > 0) { - const allRows = await db - .select({ - id: userTableRows.id, - data: userTableRows.data, - }) - .from(userTableRows) - .where(eq(userTableRows.tableId, tableId)) - for (const row of matchingRows) { const existingData = row.data as RowData const mergedData = { ...existingData, ...updateData } - const uniqueValidation = validateUniqueConstraints( + const uniqueValidation = await checkUniqueConstraintsDb( + tableId, mergedData, table.schema as TableSchema, - allRows.map((r) => ({ id: r.id, data: r.data as RowData })), row.id ) @@ -573,8 +566,7 @@ export async function DELETE(request: NextRequest, { params }: TableRowsRoutePar const { table } = accessResult - const isValidWorkspace = await verifyTableWorkspace(tableId, validated.workspaceId) - if (!isValidWorkspace) { + if (validated.workspaceId !== table.workspaceId) { logger.warn( `[${requestId}] Workspace ID mismatch for table ${tableId}. Provided: ${validated.workspaceId}, Actual: ${table.workspaceId}` ) @@ -586,7 +578,7 @@ export async function DELETE(request: NextRequest, { params }: TableRowsRoutePar eq(userTableRows.workspaceId, validated.workspaceId), ] - const filterClause = buildFilterClause(validated.filter as Filter, 'user_table_rows') + const filterClause = buildFilterClause(validated.filter as Filter, USER_TABLE_ROWS_SQL_NAME) if (filterClause) { baseConditions.push(filterClause) } diff --git a/apps/sim/lib/table/constants.ts b/apps/sim/lib/table/constants.ts index 835370bd8..8b0a081f4 100644 --- a/apps/sim/lib/table/constants.ts +++ b/apps/sim/lib/table/constants.ts @@ -26,3 +26,6 @@ export const TABLE_LIMITS = { export const COLUMN_TYPES = ['string', 'number', 'boolean', 'date', 'json'] as const export const NAME_PATTERN = /^[a-z_][a-z0-9_]*$/i + +/** Database table name for user table rows (used in SQL query building) */ +export const USER_TABLE_ROWS_SQL_NAME = 'user_table_rows' diff --git a/apps/sim/lib/table/service.ts b/apps/sim/lib/table/service.ts index 032cec365..cb9a063c8 100644 --- a/apps/sim/lib/table/service.ts +++ b/apps/sim/lib/table/service.ts @@ -11,7 +11,7 @@ import { db } from '@sim/db' import { userTableDefinitions, userTableRows } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { and, count, eq, sql } from 'drizzle-orm' -import { TABLE_LIMITS } from './constants' +import { TABLE_LIMITS, USER_TABLE_ROWS_SQL_NAME } from './constants' import { buildFilterClause, buildSortClause } from './sql' import type { BatchInsertData, @@ -29,12 +29,13 @@ import type { UpdateRowData, } from './types' import { + checkBatchUniqueConstraintsDb, + checkUniqueConstraintsDb, getUniqueColumns, validateRowAgainstSchema, validateRowSize, validateTableName, validateTableSchema, - validateUniqueConstraints, } from './validation' const logger = createLogger('TableService') @@ -227,19 +228,10 @@ export async function insertRow( throw new Error(`Schema validation failed: ${schemaValidation.errors.join(', ')}`) } - // Check unique constraints + // Check unique constraints using optimized database query const uniqueColumns = getUniqueColumns(table.schema) if (uniqueColumns.length > 0) { - const existingRows = await db - .select({ id: userTableRows.id, data: userTableRows.data }) - .from(userTableRows) - .where(eq(userTableRows.tableId, data.tableId)) - - const uniqueValidation = validateUniqueConstraints( - data.data, - table.schema, - existingRows.map((r) => ({ id: r.id, data: r.data as RowData })) - ) + const uniqueValidation = await checkUniqueConstraintsDb(data.tableId, data.data, table.schema) if (!uniqueValidation.valid) { throw new Error(uniqueValidation.errors.join(', ')) } @@ -306,23 +298,16 @@ export async function batchInsertRows( } } - // Check unique constraints across all rows + // Check unique constraints across all rows using optimized database query const uniqueColumns = getUniqueColumns(table.schema) if (uniqueColumns.length > 0) { - const existingRows = await db - .select({ id: userTableRows.id, data: userTableRows.data }) - .from(userTableRows) - .where(eq(userTableRows.tableId, data.tableId)) - - const allRows = existingRows.map((r) => ({ id: r.id, data: r.data as RowData })) - - for (let i = 0; i < data.rows.length; i++) { - const uniqueValidation = validateUniqueConstraints(data.rows[i], table.schema, allRows) - if (!uniqueValidation.valid) { - throw new Error(`Row ${i + 1}: ${uniqueValidation.errors.join(', ')}`) - } - // Add to allRows for checking subsequent rows in batch - allRows.push({ id: `pending_${i}`, data: data.rows[i] }) + const uniqueResult = await checkBatchUniqueConstraintsDb(data.tableId, data.rows, table.schema) + if (!uniqueResult.valid) { + // Format errors for batch insert + const errorMessages = uniqueResult.errors + .map((e) => `Row ${e.row + 1}: ${e.errors.join(', ')}`) + .join('; ') + throw new Error(errorMessages) } } @@ -365,7 +350,7 @@ export async function queryRows( ): Promise { const { filter, sort, limit = TABLE_LIMITS.DEFAULT_QUERY_LIMIT, offset = 0 } = options - const tableName = 'user_table_rows' + const tableName = USER_TABLE_ROWS_SQL_NAME // Build WHERE clause const baseConditions = and( @@ -493,18 +478,13 @@ export async function updateRow( throw new Error(`Schema validation failed: ${schemaValidation.errors.join(', ')}`) } - // Check unique constraints + // Check unique constraints using optimized database query const uniqueColumns = getUniqueColumns(table.schema) if (uniqueColumns.length > 0) { - const existingRows = await db - .select({ id: userTableRows.id, data: userTableRows.data }) - .from(userTableRows) - .where(eq(userTableRows.tableId, data.tableId)) - - const uniqueValidation = validateUniqueConstraints( + const uniqueValidation = await checkUniqueConstraintsDb( + data.tableId, data.data, table.schema, - existingRows.map((r) => ({ id: r.id, data: r.data as RowData })), data.rowId // Exclude current row ) if (!uniqueValidation.valid) { @@ -567,7 +547,7 @@ export async function updateRowsByFilter( table: TableDefinition, requestId: string ): Promise { - const tableName = 'user_table_rows' + const tableName = USER_TABLE_ROWS_SQL_NAME // Build filter clause const filterClause = buildFilterClause(data.filter, tableName) @@ -651,7 +631,7 @@ export async function deleteRowsByFilter( data: BulkDeleteData, requestId: string ): Promise { - const tableName = 'user_table_rows' + const tableName = USER_TABLE_ROWS_SQL_NAME // Build filter clause const filterClause = buildFilterClause(data.filter, tableName) diff --git a/apps/sim/lib/table/validation.ts b/apps/sim/lib/table/validation.ts index 6d5ea4996..3cb77c752 100644 --- a/apps/sim/lib/table/validation.ts +++ b/apps/sim/lib/table/validation.ts @@ -4,7 +4,7 @@ import { db } from '@sim/db' import { userTableRows } from '@sim/db/schema' -import { eq } from 'drizzle-orm' +import { and, eq, or, sql } from 'drizzle-orm' import { NextResponse } from 'next/server' import { COLUMN_TYPES, NAME_PATTERN, TABLE_LIMITS } from './constants' import type { ColumnDefinition, RowData, TableSchema, ValidationResult } from './types' @@ -39,6 +39,7 @@ export interface ValidateBatchRowsOptions { /** * Validates a single row (size, schema, unique constraints) and returns a formatted response on failure. + * Uses optimized database queries for unique constraint checks to avoid loading all rows into memory. */ export async function validateRowData( options: ValidateRowOptions @@ -68,28 +69,16 @@ export async function validateRowData( } if (checkUnique) { - const uniqueColumns = getUniqueColumns(schema) - if (uniqueColumns.length > 0) { - const existingRows = await db - .select({ id: userTableRows.id, data: userTableRows.data }) - .from(userTableRows) - .where(eq(userTableRows.tableId, tableId)) + // Use optimized database query instead of loading all rows + const uniqueValidation = await checkUniqueConstraintsDb(tableId, rowData, schema, excludeRowId) - const uniqueValidation = validateUniqueConstraints( - rowData, - schema, - existingRows.map((r) => ({ id: r.id, data: r.data as RowData })), - excludeRowId - ) - - if (!uniqueValidation.valid) { - return { - valid: false, - response: NextResponse.json( - { error: 'Unique constraint violation', details: uniqueValidation.errors }, - { status: 400 } - ), - } + if (!uniqueValidation.valid) { + return { + valid: false, + response: NextResponse.json( + { error: 'Unique constraint violation', details: uniqueValidation.errors }, + { status: 400 } + ), } } } @@ -99,6 +88,7 @@ export async function validateRowData( /** * Validates multiple rows for batch insert (size, schema, unique constraints including within batch). + * Uses optimized database queries for unique constraint checks to avoid loading all rows into memory. */ export async function validateBatchRows( options: ValidateBatchRowsOptions @@ -134,30 +124,14 @@ export async function validateBatchRows( if (checkUnique) { const uniqueColumns = getUniqueColumns(schema) if (uniqueColumns.length > 0) { - const existingRows = await db - .select({ id: userTableRows.id, data: userTableRows.data }) - .from(userTableRows) - .where(eq(userTableRows.tableId, tableId)) + // Use optimized batch unique constraint check + const uniqueResult = await checkBatchUniqueConstraintsDb(tableId, rows, schema) - for (let i = 0; i < rows.length; i++) { - const rowData = rows[i] - const batchRows = rows.slice(0, i).map((data, idx) => ({ id: `batch_${idx}`, data })) - - const uniqueValidation = validateUniqueConstraints(rowData, schema, [ - ...existingRows.map((r) => ({ id: r.id, data: r.data as RowData })), - ...batchRows, - ]) - - if (!uniqueValidation.valid) { - errors.push({ row: i, errors: uniqueValidation.errors }) - } - } - - if (errors.length > 0) { + if (!uniqueResult.valid) { return { valid: false, response: NextResponse.json( - { error: 'Unique constraint violations in batch', details: errors }, + { error: 'Unique constraint violations in batch', details: uniqueResult.errors }, { status: 400 } ), } @@ -298,7 +272,7 @@ export function getUniqueColumns(schema: TableSchema): ColumnDefinition[] { return schema.columns.filter((col) => col.unique === true) } -/** Validates unique constraints against existing rows. */ +/** Validates unique constraints against existing rows (in-memory version for batch validation within a batch). */ export function validateUniqueConstraints( data: RowData, schema: TableSchema, @@ -332,6 +306,202 @@ export function validateUniqueConstraints( return { valid: errors.length === 0, errors } } +/** + * Checks unique constraints using targeted database queries. + * Only queries for specific conflicting values instead of loading all rows. + * This reduces memory usage from O(n) to O(1) where n is the number of rows. + */ +export async function checkUniqueConstraintsDb( + tableId: string, + data: RowData, + schema: TableSchema, + excludeRowId?: string +): Promise { + const errors: string[] = [] + const uniqueColumns = getUniqueColumns(schema) + + if (uniqueColumns.length === 0) { + return { valid: true, errors: [] } + } + + // Build conditions for each unique column value + const conditions = [] + + for (const column of uniqueColumns) { + const value = data[column.name] + if (value === null || value === undefined) continue + + // Use JSONB operators to check for existing values + // For strings, use case-insensitive comparison + if (typeof value === 'string') { + conditions.push({ + column, + value, + sql: sql`lower(${userTableRows.data}->>${sql.raw(`'${column.name}'`)}) = ${value.toLowerCase()}`, + }) + } else { + // For other types, use direct JSONB comparison + conditions.push({ + column, + value, + sql: sql`(${userTableRows.data}->${sql.raw(`'${column.name}'`)})::jsonb = ${JSON.stringify(value)}::jsonb`, + }) + } + } + + if (conditions.length === 0) { + return { valid: true, errors: [] } + } + + // Query for each unique column separately to provide specific error messages + for (const condition of conditions) { + const baseCondition = and(eq(userTableRows.tableId, tableId), condition.sql) + + const whereClause = excludeRowId + ? and(baseCondition, sql`${userTableRows.id} != ${excludeRowId}`) + : baseCondition + + const conflictingRow = await db + .select({ id: userTableRows.id }) + .from(userTableRows) + .where(whereClause) + .limit(1) + + if (conflictingRow.length > 0) { + errors.push( + `Column "${condition.column.name}" must be unique. Value "${condition.value}" already exists in row ${conflictingRow[0].id}` + ) + } + } + + return { valid: errors.length === 0, errors } +} + +/** + * Checks unique constraints for a batch of rows using targeted database queries. + * Validates both against existing database rows and within the batch itself. + */ +export async function checkBatchUniqueConstraintsDb( + tableId: string, + rows: RowData[], + schema: TableSchema +): Promise<{ valid: boolean; errors: Array<{ row: number; errors: string[] }> }> { + const uniqueColumns = getUniqueColumns(schema) + const rowErrors: Array<{ row: number; errors: string[] }> = [] + + if (uniqueColumns.length === 0) { + return { valid: true, errors: [] } + } + + // Build a set of all unique values for each column to check against DB + const valuesByColumn = new Map; column: ColumnDefinition }>() + + for (const column of uniqueColumns) { + valuesByColumn.set(column.name, { values: new Set(), column }) + } + + // Collect all unique values from the batch and check for duplicates within the batch + const batchValueMap = new Map>() // columnName -> (normalizedValue -> firstRowIndex) + + for (const column of uniqueColumns) { + batchValueMap.set(column.name, new Map()) + } + + for (let i = 0; i < rows.length; i++) { + const rowData = rows[i] + const currentRowErrors: string[] = [] + + for (const column of uniqueColumns) { + const value = rowData[column.name] + if (value === null || value === undefined) continue + + const normalizedValue = + typeof value === 'string' ? value.toLowerCase() : JSON.stringify(value) + + // Check for duplicate within batch + const columnValueMap = batchValueMap.get(column.name)! + if (columnValueMap.has(normalizedValue)) { + const firstRowIndex = columnValueMap.get(normalizedValue)! + currentRowErrors.push( + `Column "${column.name}" must be unique. Value "${value}" duplicates row ${firstRowIndex + 1} in batch` + ) + } else { + columnValueMap.set(normalizedValue, i) + valuesByColumn.get(column.name)!.values.add(normalizedValue) + } + } + + if (currentRowErrors.length > 0) { + rowErrors.push({ row: i, errors: currentRowErrors }) + } + } + + // Now check against database for all unique values at once + for (const [columnName, { values, column }] of valuesByColumn) { + if (values.size === 0) continue + + // Build OR conditions for all values of this column + const valueArray = Array.from(values) + const valueConditions = valueArray.map((normalizedValue) => { + // Check if the original values are strings (normalized values for strings are lowercase) + // We need to determine the type from the column definition or the first row that has this value + const isStringColumn = column.type === 'string' + + if (isStringColumn) { + return sql`lower(${userTableRows.data}->>${sql.raw(`'${columnName}'`)}) = ${normalizedValue}` + } + return sql`(${userTableRows.data}->${sql.raw(`'${columnName}'`)})::jsonb = ${normalizedValue}::jsonb` + }) + + const conflictingRows = await db + .select({ + id: userTableRows.id, + data: userTableRows.data, + }) + .from(userTableRows) + .where(and(eq(userTableRows.tableId, tableId), or(...valueConditions))) + .limit(valueArray.length) // We only need up to one conflict per value + + // Map conflicts back to batch rows + for (const conflict of conflictingRows) { + const conflictData = conflict.data as RowData + const conflictValue = conflictData[columnName] + const normalizedConflictValue = + typeof conflictValue === 'string' + ? conflictValue.toLowerCase() + : JSON.stringify(conflictValue) + + // Find which batch rows have this conflicting value + for (let i = 0; i < rows.length; i++) { + const rowValue = rows[i][columnName] + if (rowValue === null || rowValue === undefined) continue + + const normalizedRowValue = + typeof rowValue === 'string' ? rowValue.toLowerCase() : JSON.stringify(rowValue) + + if (normalizedRowValue === normalizedConflictValue) { + // Check if this row already has errors for this column + let rowError = rowErrors.find((e) => e.row === i) + if (!rowError) { + rowError = { row: i, errors: [] } + rowErrors.push(rowError) + } + + const errorMsg = `Column "${columnName}" must be unique. Value "${rowValue}" already exists in row ${conflict.id}` + if (!rowError.errors.includes(errorMsg)) { + rowError.errors.push(errorMsg) + } + } + } + } + } + + // Sort errors by row index + rowErrors.sort((a, b) => a.row - b.row) + + return { valid: rowErrors.length === 0, errors: rowErrors } +} + /** Validates column definition format and type. */ export function validateColumnDefinition(column: ColumnDefinition): ValidationResult { const errors: string[] = []