mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-06 03:00:16 -04:00
fix(worker): dockerfile + helm updates (#3818)
* fix(worker): dockerfile + helm updates * address comments
This commit is contained in:
committed by
GitHub
parent
c05e2e0fc8
commit
21156dd54a
@@ -17,8 +17,9 @@
|
||||
"load:workflow:baseline": "BASE_URL=${BASE_URL:-http://localhost:3000} WARMUP_DURATION=${WARMUP_DURATION:-10} WARMUP_RATE=${WARMUP_RATE:-2} PEAK_RATE=${PEAK_RATE:-8} HOLD_DURATION=${HOLD_DURATION:-20} bunx artillery run scripts/load/workflow-concurrency.yml",
|
||||
"load:workflow:waves": "BASE_URL=${BASE_URL:-http://localhost:3000} WAVE_ONE_DURATION=${WAVE_ONE_DURATION:-10} WAVE_ONE_RATE=${WAVE_ONE_RATE:-6} QUIET_DURATION=${QUIET_DURATION:-5} WAVE_TWO_DURATION=${WAVE_TWO_DURATION:-15} WAVE_TWO_RATE=${WAVE_TWO_RATE:-8} WAVE_THREE_DURATION=${WAVE_THREE_DURATION:-20} WAVE_THREE_RATE=${WAVE_THREE_RATE:-10} bunx artillery run scripts/load/workflow-waves.yml",
|
||||
"load:workflow:isolation": "BASE_URL=${BASE_URL:-http://localhost:3000} ISOLATION_DURATION=${ISOLATION_DURATION:-30} TOTAL_RATE=${TOTAL_RATE:-9} WORKSPACE_A_WEIGHT=${WORKSPACE_A_WEIGHT:-8} WORKSPACE_B_WEIGHT=${WORKSPACE_B_WEIGHT:-1} bunx artillery run scripts/load/workflow-isolation.yml",
|
||||
"build": "bun run build:pptx-worker && next build",
|
||||
"build": "bun run build:pptx-worker && bun run build:worker && next build",
|
||||
"build:pptx-worker": "bun build ./lib/execution/pptx-worker.cjs --target=node --format=cjs --outfile ./dist/pptx-worker.cjs",
|
||||
"build:worker": "bun build ./worker/index.ts --target=node --format=cjs --packages=external --outfile ./dist/worker.cjs",
|
||||
"start": "next start",
|
||||
"worker": "NODE_ENV=production bun run worker/index.ts",
|
||||
"prepare": "cd ../.. && bun husky",
|
||||
|
||||
@@ -67,6 +67,38 @@ services:
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
sim-worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/app.Dockerfile
|
||||
command: ['bun', 'apps/sim/dist/worker.cjs']
|
||||
restart: unless-stopped
|
||||
profiles:
|
||||
- worker
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 4G
|
||||
environment:
|
||||
- NODE_ENV=development
|
||||
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
|
||||
- REDIS_URL=${REDIS_URL:-}
|
||||
- ENCRYPTION_KEY=${ENCRYPTION_KEY:-dev-encryption-key-at-least-32-chars}
|
||||
- API_ENCRYPTION_KEY=${API_ENCRYPTION_KEY:-}
|
||||
- INTERNAL_API_SECRET=${INTERNAL_API_SECRET:-dev-internal-api-secret-min-32-chars}
|
||||
- WORKER_PORT=3001
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
migrations:
|
||||
condition: service_completed_successfully
|
||||
healthcheck:
|
||||
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3001/health/live']
|
||||
interval: 90s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
migrations:
|
||||
build:
|
||||
context: .
|
||||
|
||||
@@ -42,7 +42,7 @@ services:
|
||||
|
||||
sim-worker:
|
||||
image: ghcr.io/simstudioai/simstudio:latest
|
||||
command: ['bun', 'run', 'worker']
|
||||
command: ['bun', 'apps/sim/dist/worker.cjs']
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
@@ -71,7 +71,7 @@ services:
|
||||
migrations:
|
||||
condition: service_completed_successfully
|
||||
healthcheck:
|
||||
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health']
|
||||
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health/live']
|
||||
interval: 90s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
@@ -114,6 +114,9 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/isolated-v
|
||||
# Copy the bundled PPTX worker artifact
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/pptx-worker.cjs ./apps/sim/dist/pptx-worker.cjs
|
||||
|
||||
# Copy the bundled BullMQ worker artifact
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/worker.cjs ./apps/sim/dist/worker.cjs
|
||||
|
||||
# Guardrails setup with pip caching
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/requirements.txt ./apps/sim/lib/guardrails/requirements.txt
|
||||
COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/validate_pii.py ./apps/sim/lib/guardrails/validate_pii.py
|
||||
|
||||
@@ -117,6 +117,22 @@ Ollama selector labels
|
||||
app.kubernetes.io/component: ollama
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Worker specific labels
|
||||
*/}}
|
||||
{{- define "sim.worker.labels" -}}
|
||||
{{ include "sim.labels" . }}
|
||||
app.kubernetes.io/component: worker
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Worker selector labels
|
||||
*/}}
|
||||
{{- define "sim.worker.selectorLabels" -}}
|
||||
{{ include "sim.selectorLabels" . }}
|
||||
app.kubernetes.io/component: worker
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Migrations specific labels
|
||||
*/}}
|
||||
@@ -206,6 +222,10 @@ Skip validation when using existing secrets or External Secrets Operator
|
||||
{{- fail "realtime.env.BETTER_AUTH_SECRET must not use the default placeholder value. Generate a secure secret with: openssl rand -hex 32" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- /* Worker validation - REDIS_URL is required when worker is enabled */ -}}
|
||||
{{- if and .Values.worker.enabled (not .Values.app.env.REDIS_URL) }}
|
||||
{{- fail "app.env.REDIS_URL is required when worker.enabled=true" }}
|
||||
{{- end }}
|
||||
{{- /* PostgreSQL password validation - skip if using existing secret or ESO */ -}}
|
||||
{{- if not (or $useExistingPostgresSecret $useExternalSecrets) }}
|
||||
{{- if and .Values.postgresql.enabled (not .Values.postgresql.auth.password) }}
|
||||
|
||||
101
helm/sim/templates/deployment-worker.yaml
Normal file
101
helm/sim/templates/deployment-worker.yaml
Normal file
@@ -0,0 +1,101 @@
|
||||
{{- if .Values.worker.enabled }}
|
||||
{{- include "sim.validateSecrets" . }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "sim.fullname" . }}-worker
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "sim.worker.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .Values.worker.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "sim.worker.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
{{- with .Values.podAnnotations }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "sim.worker.selectorLabels" . | nindent 8 }}
|
||||
{{- with .Values.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.global.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "sim.serviceAccountName" . }}
|
||||
{{- include "sim.podSecurityContext" .Values.worker | nindent 6 }}
|
||||
{{- include "sim.nodeSelector" .Values.worker | nindent 6 }}
|
||||
{{- include "sim.tolerations" .Values | nindent 6 }}
|
||||
{{- include "sim.affinity" .Values | nindent 6 }}
|
||||
containers:
|
||||
- name: worker
|
||||
image: {{ include "sim.image" (dict "context" . "image" .Values.worker.image) }}
|
||||
imagePullPolicy: {{ .Values.worker.image.pullPolicy }}
|
||||
command: ["bun", "apps/sim/dist/worker.cjs"]
|
||||
ports:
|
||||
- name: health
|
||||
containerPort: {{ .Values.worker.healthPort }}
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: DATABASE_URL
|
||||
value: {{ include "sim.databaseUrl" . | quote }}
|
||||
{{- if .Values.app.env.REDIS_URL }}
|
||||
- name: REDIS_URL
|
||||
value: {{ .Values.app.env.REDIS_URL | quote }}
|
||||
{{- end }}
|
||||
- name: WORKER_PORT
|
||||
value: {{ .Values.worker.healthPort | quote }}
|
||||
{{- if .Values.telemetry.enabled }}
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: "http://{{ include "sim.fullname" . }}-otel-collector:4318"
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: sim-worker
|
||||
- name: OTEL_SERVICE_VERSION
|
||||
value: {{ .Chart.AppVersion | quote }}
|
||||
- name: OTEL_RESOURCE_ATTRIBUTES
|
||||
value: "service.name=sim-worker,service.version={{ .Chart.AppVersion }},deployment.environment={{ .Values.worker.env.NODE_ENV }}"
|
||||
{{- end }}
|
||||
{{- range $key, $value := .Values.worker.env }}
|
||||
{{- if ne $key "WORKER_PORT" }}
|
||||
- name: {{ $key }}
|
||||
value: {{ $value | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.extraEnvVars }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: {{ include "sim.appSecretName" . }}
|
||||
{{- if .Values.postgresql.enabled }}
|
||||
- secretRef:
|
||||
name: {{ include "sim.postgresqlSecretName" . }}
|
||||
{{- else if .Values.externalDatabase.enabled }}
|
||||
- secretRef:
|
||||
name: {{ include "sim.externalDbSecretName" . }}
|
||||
{{- end }}
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health/live
|
||||
port: health
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: health
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
{{- include "sim.resources" .Values.worker | nindent 10 }}
|
||||
{{- include "sim.securityContext" .Values.worker | nindent 10 }}
|
||||
{{- end }}
|
||||
@@ -358,6 +358,56 @@ realtime:
|
||||
extraVolumes: []
|
||||
extraVolumeMounts: []
|
||||
|
||||
# BullMQ worker configuration (processes background jobs when Redis is available)
|
||||
# Uses the same image as the main app with a different command
|
||||
worker:
|
||||
# Enable/disable the worker deployment (requires REDIS_URL to be set in app.env)
|
||||
enabled: false
|
||||
|
||||
# Image configuration (defaults to same image as app)
|
||||
image:
|
||||
repository: simstudioai/simstudio
|
||||
tag: latest
|
||||
pullPolicy: Always
|
||||
|
||||
# Number of replicas
|
||||
replicaCount: 1
|
||||
|
||||
# Health check port (worker exposes a lightweight HTTP health server)
|
||||
healthPort: 3001
|
||||
|
||||
# Resource limits and requests
|
||||
resources:
|
||||
limits:
|
||||
memory: "4Gi"
|
||||
cpu: "1000m"
|
||||
requests:
|
||||
memory: "2Gi"
|
||||
cpu: "500m"
|
||||
|
||||
# Node selector for pod scheduling
|
||||
nodeSelector: {}
|
||||
|
||||
# Pod security context
|
||||
podSecurityContext:
|
||||
fsGroup: 1001
|
||||
|
||||
# Container security context
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1001
|
||||
|
||||
# Environment variables (worker-specific tuning)
|
||||
env:
|
||||
NODE_ENV: "production"
|
||||
WORKER_CONCURRENCY_WORKFLOW: "50"
|
||||
WORKER_CONCURRENCY_WEBHOOK: "30"
|
||||
WORKER_CONCURRENCY_SCHEDULE: "20"
|
||||
WORKER_CONCURRENCY_MOTHERSHIP_JOB: "10"
|
||||
WORKER_CONCURRENCY_CONNECTOR_SYNC: "5"
|
||||
WORKER_CONCURRENCY_DOCUMENT_PROCESSING: "20"
|
||||
WORKER_CONCURRENCY_NOTIFICATION_DELIVERY: "10"
|
||||
|
||||
# Database migrations job configuration
|
||||
migrations:
|
||||
# Enable/disable migrations job
|
||||
|
||||
Reference in New Issue
Block a user