fix(worker): dockerfile + helm updates (#3818)

* fix(worker): dockerfile + helm updates

* address comments
This commit is contained in:
Vikhyath Mondreti
2026-03-27 18:28:36 -07:00
committed by GitHub
parent c05e2e0fc8
commit 21156dd54a
7 changed files with 210 additions and 3 deletions

View File

@@ -17,8 +17,9 @@
"load:workflow:baseline": "BASE_URL=${BASE_URL:-http://localhost:3000} WARMUP_DURATION=${WARMUP_DURATION:-10} WARMUP_RATE=${WARMUP_RATE:-2} PEAK_RATE=${PEAK_RATE:-8} HOLD_DURATION=${HOLD_DURATION:-20} bunx artillery run scripts/load/workflow-concurrency.yml",
"load:workflow:waves": "BASE_URL=${BASE_URL:-http://localhost:3000} WAVE_ONE_DURATION=${WAVE_ONE_DURATION:-10} WAVE_ONE_RATE=${WAVE_ONE_RATE:-6} QUIET_DURATION=${QUIET_DURATION:-5} WAVE_TWO_DURATION=${WAVE_TWO_DURATION:-15} WAVE_TWO_RATE=${WAVE_TWO_RATE:-8} WAVE_THREE_DURATION=${WAVE_THREE_DURATION:-20} WAVE_THREE_RATE=${WAVE_THREE_RATE:-10} bunx artillery run scripts/load/workflow-waves.yml",
"load:workflow:isolation": "BASE_URL=${BASE_URL:-http://localhost:3000} ISOLATION_DURATION=${ISOLATION_DURATION:-30} TOTAL_RATE=${TOTAL_RATE:-9} WORKSPACE_A_WEIGHT=${WORKSPACE_A_WEIGHT:-8} WORKSPACE_B_WEIGHT=${WORKSPACE_B_WEIGHT:-1} bunx artillery run scripts/load/workflow-isolation.yml",
"build": "bun run build:pptx-worker && next build",
"build": "bun run build:pptx-worker && bun run build:worker && next build",
"build:pptx-worker": "bun build ./lib/execution/pptx-worker.cjs --target=node --format=cjs --outfile ./dist/pptx-worker.cjs",
"build:worker": "bun build ./worker/index.ts --target=node --format=cjs --packages=external --outfile ./dist/worker.cjs",
"start": "next start",
"worker": "NODE_ENV=production bun run worker/index.ts",
"prepare": "cd ../.. && bun husky",

View File

@@ -67,6 +67,38 @@ services:
retries: 3
start_period: 10s
sim-worker:
build:
context: .
dockerfile: docker/app.Dockerfile
command: ['bun', 'apps/sim/dist/worker.cjs']
restart: unless-stopped
profiles:
- worker
deploy:
resources:
limits:
memory: 4G
environment:
- NODE_ENV=development
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
- REDIS_URL=${REDIS_URL:-}
- ENCRYPTION_KEY=${ENCRYPTION_KEY:-dev-encryption-key-at-least-32-chars}
- API_ENCRYPTION_KEY=${API_ENCRYPTION_KEY:-}
- INTERNAL_API_SECRET=${INTERNAL_API_SECRET:-dev-internal-api-secret-min-32-chars}
- WORKER_PORT=3001
depends_on:
db:
condition: service_healthy
migrations:
condition: service_completed_successfully
healthcheck:
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3001/health/live']
interval: 90s
timeout: 5s
retries: 3
start_period: 10s
migrations:
build:
context: .

View File

@@ -42,7 +42,7 @@ services:
sim-worker:
image: ghcr.io/simstudioai/simstudio:latest
command: ['bun', 'run', 'worker']
command: ['bun', 'apps/sim/dist/worker.cjs']
restart: unless-stopped
deploy:
resources:
@@ -71,7 +71,7 @@ services:
migrations:
condition: service_completed_successfully
healthcheck:
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health']
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health/live']
interval: 90s
timeout: 5s
retries: 3

View File

@@ -114,6 +114,9 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/isolated-v
# Copy the bundled PPTX worker artifact
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/pptx-worker.cjs ./apps/sim/dist/pptx-worker.cjs
# Copy the bundled BullMQ worker artifact
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/worker.cjs ./apps/sim/dist/worker.cjs
# Guardrails setup with pip caching
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/requirements.txt ./apps/sim/lib/guardrails/requirements.txt
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/validate_pii.py ./apps/sim/lib/guardrails/validate_pii.py

View File

@@ -117,6 +117,22 @@ Ollama selector labels
app.kubernetes.io/component: ollama
{{- end }}
{{/*
Worker specific labels
*/}}
{{- define "sim.worker.labels" -}}
{{ include "sim.labels" . }}
app.kubernetes.io/component: worker
{{- end }}
{{/*
Worker selector labels
*/}}
{{- define "sim.worker.selectorLabels" -}}
{{ include "sim.selectorLabels" . }}
app.kubernetes.io/component: worker
{{- end }}
{{/*
Migrations specific labels
*/}}
@@ -206,6 +222,10 @@ Skip validation when using existing secrets or External Secrets Operator
{{- fail "realtime.env.BETTER_AUTH_SECRET must not use the default placeholder value. Generate a secure secret with: openssl rand -hex 32" }}
{{- end }}
{{- end }}
{{- /* Worker validation - REDIS_URL is required when worker is enabled */ -}}
{{- if and .Values.worker.enabled (not .Values.app.env.REDIS_URL) }}
{{- fail "app.env.REDIS_URL is required when worker.enabled=true" }}
{{- end }}
{{- /* PostgreSQL password validation - skip if using existing secret or ESO */ -}}
{{- if not (or $useExistingPostgresSecret $useExternalSecrets) }}
{{- if and .Values.postgresql.enabled (not .Values.postgresql.auth.password) }}

View File

@@ -0,0 +1,101 @@
{{- if .Values.worker.enabled }}
{{- include "sim.validateSecrets" . }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "sim.fullname" . }}-worker
namespace: {{ .Release.Namespace }}
labels:
{{- include "sim.worker.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.worker.replicaCount }}
selector:
matchLabels:
{{- include "sim.worker.selectorLabels" . | nindent 6 }}
template:
metadata:
annotations:
{{- with .Values.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "sim.worker.selectorLabels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.global.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "sim.serviceAccountName" . }}
{{- include "sim.podSecurityContext" .Values.worker | nindent 6 }}
{{- include "sim.nodeSelector" .Values.worker | nindent 6 }}
{{- include "sim.tolerations" .Values | nindent 6 }}
{{- include "sim.affinity" .Values | nindent 6 }}
containers:
- name: worker
image: {{ include "sim.image" (dict "context" . "image" .Values.worker.image) }}
imagePullPolicy: {{ .Values.worker.image.pullPolicy }}
command: ["bun", "apps/sim/dist/worker.cjs"]
ports:
- name: health
containerPort: {{ .Values.worker.healthPort }}
protocol: TCP
env:
- name: DATABASE_URL
value: {{ include "sim.databaseUrl" . | quote }}
{{- if .Values.app.env.REDIS_URL }}
- name: REDIS_URL
value: {{ .Values.app.env.REDIS_URL | quote }}
{{- end }}
- name: WORKER_PORT
value: {{ .Values.worker.healthPort | quote }}
{{- if .Values.telemetry.enabled }}
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://{{ include "sim.fullname" . }}-otel-collector:4318"
- name: OTEL_SERVICE_NAME
value: sim-worker
- name: OTEL_SERVICE_VERSION
value: {{ .Chart.AppVersion | quote }}
- name: OTEL_RESOURCE_ATTRIBUTES
value: "service.name=sim-worker,service.version={{ .Chart.AppVersion }},deployment.environment={{ .Values.worker.env.NODE_ENV }}"
{{- end }}
{{- range $key, $value := .Values.worker.env }}
{{- if ne $key "WORKER_PORT" }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end }}
{{- end }}
{{- with .Values.extraEnvVars }}
{{- toYaml . | nindent 12 }}
{{- end }}
envFrom:
- secretRef:
name: {{ include "sim.appSecretName" . }}
{{- if .Values.postgresql.enabled }}
- secretRef:
name: {{ include "sim.postgresqlSecretName" . }}
{{- else if .Values.externalDatabase.enabled }}
- secretRef:
name: {{ include "sim.externalDbSecretName" . }}
{{- end }}
livenessProbe:
httpGet:
path: /health/live
port: health
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
httpGet:
path: /health
port: health
initialDelaySeconds: 10
periodSeconds: 30
timeoutSeconds: 5
failureThreshold: 3
{{- include "sim.resources" .Values.worker | nindent 10 }}
{{- include "sim.securityContext" .Values.worker | nindent 10 }}
{{- end }}

View File

@@ -358,6 +358,56 @@ realtime:
extraVolumes: []
extraVolumeMounts: []
# BullMQ worker configuration (processes background jobs when Redis is available)
# Uses the same image as the main app with a different command
worker:
# Enable/disable the worker deployment (requires REDIS_URL to be set in app.env)
enabled: false
# Image configuration (defaults to same image as app)
image:
repository: simstudioai/simstudio
tag: latest
pullPolicy: Always
# Number of replicas
replicaCount: 1
# Health check port (worker exposes a lightweight HTTP health server)
healthPort: 3001
# Resource limits and requests
resources:
limits:
memory: "4Gi"
cpu: "1000m"
requests:
memory: "2Gi"
cpu: "500m"
# Node selector for pod scheduling
nodeSelector: {}
# Pod security context
podSecurityContext:
fsGroup: 1001
# Container security context
securityContext:
runAsNonRoot: true
runAsUser: 1001
# Environment variables (worker-specific tuning)
env:
NODE_ENV: "production"
WORKER_CONCURRENCY_WORKFLOW: "50"
WORKER_CONCURRENCY_WEBHOOK: "30"
WORKER_CONCURRENCY_SCHEDULE: "20"
WORKER_CONCURRENCY_MOTHERSHIP_JOB: "10"
WORKER_CONCURRENCY_CONNECTOR_SYNC: "5"
WORKER_CONCURRENCY_DOCUMENT_PROCESSING: "20"
WORKER_CONCURRENCY_NOTIFICATION_DELIVERY: "10"
# Database migrations job configuration
migrations:
# Enable/disable migrations job