Compare commits

..

2 Commits

Author SHA1 Message Date
Vikhyath Mondreti
fd57927746 bugbot comment 2026-02-02 10:55:10 -08:00
Vikhyath Mondreti
a213ad98cf improvement(rooms): redis client closed should fail fast 2026-02-02 09:59:22 -08:00
10 changed files with 116 additions and 20 deletions

View File

@@ -8,7 +8,6 @@ import { verifyCronAuth } from '@/lib/auth/internal'
const logger = createLogger('CleanupStaleExecutions')
const STALE_THRESHOLD_MINUTES = 30
const MAX_INT32 = 2_147_483_647
export async function GET(request: NextRequest) {
try {
@@ -46,14 +45,13 @@ export async function GET(request: NextRequest) {
try {
const staleDurationMs = Date.now() - new Date(execution.startedAt).getTime()
const staleDurationMinutes = Math.round(staleDurationMs / 60000)
const totalDurationMs = Math.min(staleDurationMs, MAX_INT32)
await db
.update(workflowExecutionLogs)
.set({
status: 'failed',
endedAt: new Date(),
totalDurationMs,
totalDurationMs: staleDurationMs,
executionData: sql`jsonb_set(
COALESCE(execution_data, '{}'::jsonb),
ARRAY['error'],

View File

@@ -14,6 +14,7 @@ import { createLogger } from '@sim/logger'
import { useParams } from 'next/navigation'
import { io, type Socket } from 'socket.io-client'
import { getEnv } from '@/lib/core/config/env'
import { useOperationQueueStore } from '@/stores/operation-queue/store'
const logger = createLogger('SocketContext')
@@ -138,6 +139,7 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
const [authFailed, setAuthFailed] = useState(false)
const initializedRef = useRef(false)
const socketRef = useRef<Socket | null>(null)
const triggerOfflineMode = useOperationQueueStore((state) => state.triggerOfflineMode)
const params = useParams()
const urlWorkflowId = params?.workflowId as string | undefined
@@ -341,9 +343,12 @@ export function SocketProvider({ children, user }: SocketProviderProps) {
})
})
socketInstance.on('join-workflow-error', ({ error }) => {
socketInstance.on('join-workflow-error', ({ error, code }) => {
isRejoiningRef.current = false
logger.error('Failed to join workflow:', error)
logger.error('Failed to join workflow:', { error, code })
if (code === 'ROOM_MANAGER_UNAVAILABLE') {
triggerOfflineMode()
}
})
socketInstance.on('workflow-operation', (data) => {

View File

@@ -12,15 +12,49 @@ import {
import { persistWorkflowOperation } from '@/socket/database/operations'
import type { AuthenticatedSocket } from '@/socket/middleware/auth'
import { checkRolePermission } from '@/socket/middleware/permissions'
import type { IRoomManager } from '@/socket/rooms'
import type { IRoomManager, UserSession } from '@/socket/rooms'
import { WorkflowOperationSchema } from '@/socket/validation/schemas'
const logger = createLogger('OperationsHandlers')
export function setupOperationsHandlers(socket: AuthenticatedSocket, roomManager: IRoomManager) {
socket.on('workflow-operation', async (data) => {
const workflowId = await roomManager.getWorkflowIdForSocket(socket.id)
const session = await roomManager.getUserSession(socket.id)
if (!roomManager.isReady()) {
socket.emit('operation-forbidden', {
type: 'ROOM_MANAGER_UNAVAILABLE',
message: 'Realtime unavailable',
})
if (data?.operationId) {
socket.emit('operation-failed', {
operationId: data.operationId,
error: 'Realtime unavailable',
retryable: true,
})
}
return
}
let workflowId: string | null = null
let session: UserSession | null = null
try {
workflowId = await roomManager.getWorkflowIdForSocket(socket.id)
session = await roomManager.getUserSession(socket.id)
} catch (error) {
logger.error('Error loading session for workflow operation:', error)
socket.emit('operation-forbidden', {
type: 'ROOM_MANAGER_UNAVAILABLE',
message: 'Realtime unavailable',
})
if (data?.operationId) {
socket.emit('operation-failed', {
operationId: data.operationId,
error: 'Realtime unavailable',
retryable: true,
})
}
return
}
if (!workflowId || !session) {
socket.emit('operation-forbidden', {

View File

@@ -48,6 +48,21 @@ export function setupSubblocksHandlers(socket: AuthenticatedSocket, roomManager:
operationId,
} = data
if (!roomManager.isReady()) {
socket.emit('operation-forbidden', {
type: 'ROOM_MANAGER_UNAVAILABLE',
message: 'Realtime unavailable',
})
if (operationId) {
socket.emit('operation-failed', {
operationId,
error: 'Realtime unavailable',
retryable: true,
})
}
return
}
try {
const sessionWorkflowId = await roomManager.getWorkflowIdForSocket(socket.id)
const session = await roomManager.getUserSession(socket.id)

View File

@@ -37,6 +37,21 @@ export function setupVariablesHandlers(socket: AuthenticatedSocket, roomManager:
socket.on('variable-update', async (data) => {
const { workflowId: payloadWorkflowId, variableId, field, value, timestamp, operationId } = data
if (!roomManager.isReady()) {
socket.emit('operation-forbidden', {
type: 'ROOM_MANAGER_UNAVAILABLE',
message: 'Realtime unavailable',
})
if (operationId) {
socket.emit('operation-failed', {
operationId,
error: 'Realtime unavailable',
retryable: true,
})
}
return
}
try {
const sessionWorkflowId = await roomManager.getWorkflowIdForSocket(socket.id)
const session = await roomManager.getUserSession(socket.id)

View File

@@ -20,6 +20,15 @@ export function setupWorkflowHandlers(socket: AuthenticatedSocket, roomManager:
return
}
if (!roomManager.isReady()) {
logger.warn(`Join workflow rejected: Room manager unavailable`)
socket.emit('join-workflow-error', {
error: 'Realtime unavailable',
code: 'ROOM_MANAGER_UNAVAILABLE',
})
return
}
logger.info(`Join workflow request from ${userId} (${userName}) for workflow ${workflowId}`)
// Verify workflow access
@@ -128,12 +137,20 @@ export function setupWorkflowHandlers(socket: AuthenticatedSocket, roomManager:
// Undo socket.join and room manager entry if any operation failed
socket.leave(workflowId)
await roomManager.removeUserFromRoom(socket.id)
socket.emit('join-workflow-error', { error: 'Failed to join workflow' })
const isReady = roomManager.isReady()
socket.emit('join-workflow-error', {
error: isReady ? 'Failed to join workflow' : 'Realtime unavailable',
code: isReady ? undefined : 'ROOM_MANAGER_UNAVAILABLE',
})
}
})
socket.on('leave-workflow', async () => {
try {
if (!roomManager.isReady()) {
return
}
const workflowId = await roomManager.getWorkflowIdForSocket(socket.id)
const session = await roomManager.getUserSession(socket.id)

View File

@@ -26,6 +26,10 @@ export class MemoryRoomManager implements IRoomManager {
logger.info('MemoryRoomManager initialized (single-pod mode)')
}
isReady(): boolean {
return true
}
async shutdown(): Promise<void> {
this.workflowRooms.clear()
this.socketToWorkflow.clear()

View File

@@ -96,17 +96,6 @@ export class RedisRoomManager implements IRoomManager {
this._io = io
this.redis = createClient({
url: redisUrl,
socket: {
reconnectStrategy: (retries) => {
if (retries > 10) {
logger.error('Redis reconnection failed after 10 attempts')
return new Error('Redis reconnection failed')
}
const delay = Math.min(retries * 100, 3000)
logger.warn(`Redis reconnecting in ${delay}ms (attempt ${retries})`)
return delay
},
},
})
this.redis.on('error', (err) => {
@@ -122,12 +111,21 @@ export class RedisRoomManager implements IRoomManager {
logger.info('Redis client ready')
this.isConnected = true
})
this.redis.on('end', () => {
logger.warn('Redis client connection closed')
this.isConnected = false
})
}
get io(): Server {
return this._io
}
isReady(): boolean {
return this.isConnected
}
async initialize(): Promise<void> {
if (this.isConnected) return

View File

@@ -48,6 +48,11 @@ export interface IRoomManager {
*/
initialize(): Promise<void>
/**
* Whether the room manager is ready to serve requests
*/
isReady(): boolean
/**
* Clean shutdown
*/

View File

@@ -85,6 +85,11 @@ export function createHttpHandler(roomManager: IRoomManager, logger: Logger) {
res.end(JSON.stringify({ error: authResult.error }))
return
}
if (!roomManager.isReady()) {
sendError(res, 'Room manager unavailable', 503)
return
}
}
// Handle workflow deletion notifications from the main API