# Global configuration global: # Image registry and pull policy imageRegistry: "ghcr.io" # Use registry for all images, not just simstudioai/* images useRegistryForAllImages: false imagePullSecrets: [] # Common labels applied to all resources commonLabels: {} # Storage class for persistent volumes storageClass: "" # Main Sim application configuration app: # Enable/disable the main application enabled: true # Image configuration image: repository: simstudioai/simstudio tag: latest pullPolicy: Always # Number of replicas replicaCount: 1 # Resource limits and requests resources: limits: memory: "8Gi" cpu: "2000m" requests: memory: "4Gi" cpu: "1000m" # Node selector for pod scheduling (leave empty to allow scheduling on any node) nodeSelector: {} # Pod security context podSecurityContext: fsGroup: 1001 # Container security context securityContext: runAsNonRoot: true runAsUser: 1001 # Secret management configuration # Use this to reference pre-existing Kubernetes secrets instead of defining values directly # This enables integration with External Secrets Operator, HashiCorp Vault, Azure Key Vault, etc. secrets: existingSecret: # Set to true to use an existing secret instead of creating one from values enabled: false # Name of the existing Kubernetes secret containing app credentials name: "" # Key mappings - specify the key names in your existing secret # Only needed if your secret uses different key names than the defaults keys: BETTER_AUTH_SECRET: "BETTER_AUTH_SECRET" ENCRYPTION_KEY: "ENCRYPTION_KEY" INTERNAL_API_SECRET: "INTERNAL_API_SECRET" CRON_SECRET: "CRON_SECRET" API_ENCRYPTION_KEY: "API_ENCRYPTION_KEY" REDIS_URL: "REDIS_URL" # Environment variables env: # Application URLs NEXT_PUBLIC_APP_URL: "http://localhost:3000" BETTER_AUTH_URL: "http://localhost:3000" INTERNAL_API_BASE_URL: "" # Optional server-side internal base URL for /api self-calls (include http:// or https://); falls back to NEXT_PUBLIC_APP_URL when empty # SOCKET_SERVER_URL: Auto-detected when realtime.enabled=true (uses internal service) # Only set this if using an external WebSocket service with realtime.enabled=false NEXT_PUBLIC_SOCKET_URL: "http://localhost:3002" # Public WebSocket URL for browsers # Node environment NODE_ENV: "production" NEXT_TELEMETRY_DISABLED: "1" # Telemetry & Monitoring TELEMETRY_ENDPOINT: "" # OTLP endpoint for traces/logs (e.g., "https://otlp-collector:4318/v1/traces") # Authentication and encryption secrets (REQUIRED for production) # Generate secure 32-character secrets using: openssl rand -hex 32 BETTER_AUTH_SECRET: "" # REQUIRED - set via --set flag or external secret manager ENCRYPTION_KEY: "" # REQUIRED - set via --set flag or external secret manager INTERNAL_API_SECRET: "" # REQUIRED - set via --set flag or external secret manager, used for internal service-to-service authentication # Optional: Scheduled Jobs Authentication # Generate using: openssl rand -hex 32 CRON_SECRET: "" # OPTIONAL - required only if cronjobs.enabled=true, authenticates scheduled job requests # Optional: API Key Encryption (RECOMMENDED for production) # Generate 64-character hex string using: openssl rand -hex 32 (outputs 64 hex chars = 32 bytes) API_ENCRYPTION_KEY: "" # OPTIONAL - encrypts API keys at rest, must be exactly 64 hex characters, if not set keys stored in plain text REDIS_URL: "" # OPTIONAL - Redis connection string for BullMQ/workers; can also come from app secret or External Secrets # Email & Communication EMAIL_VERIFICATION_ENABLED: "false" # Enable email verification for user registration and login (defaults to false) RESEND_API_KEY: "" # Resend API key for transactional emails FROM_EMAIL_ADDRESS: "" # Complete from address (e.g., "Sim " or "DoNotReply@domain.com") EMAIL_DOMAIN: "" # Domain for sending emails (fallback when FROM_EMAIL_ADDRESS not set) # OAuth Integration Credentials (leave empty if not using) GOOGLE_CLIENT_ID: "" # Google OAuth client ID GOOGLE_CLIENT_SECRET: "" # Google OAuth client secret GITHUB_CLIENT_ID: "" # GitHub OAuth client ID GITHUB_CLIENT_SECRET: "" # GitHub OAuth client secret DISABLE_GOOGLE_AUTH: "" # Set to "true" to hide Google OAuth login DISABLE_GITHUB_AUTH: "" # Set to "true" to hide GitHub OAuth login # Google Vertex AI Configuration VERTEX_PROJECT: "" # Google Cloud project ID for Vertex AI VERTEX_LOCATION: "us-central1" # Google Cloud region for Vertex AI (e.g., "us-central1") # Azure OpenAI Configuration (leave empty if not using Azure OpenAI) AZURE_OPENAI_ENDPOINT: "" # Azure OpenAI service endpoint (e.g., https://your-resource.openai.azure.com) AZURE_OPENAI_API_KEY: "" # Azure OpenAI API key AZURE_OPENAI_API_VERSION: "" # Azure OpenAI API version (e.g., 2024-07-01-preview) # Azure Anthropic Configuration (leave empty if not using Azure Anthropic via AI Foundry) AZURE_ANTHROPIC_ENDPOINT: "" # Azure AI Foundry endpoint for Anthropic models AZURE_ANTHROPIC_API_KEY: "" # Azure Anthropic API key AZURE_ANTHROPIC_API_VERSION: "" # Azure Anthropic API version (e.g., 2023-06-01) # AI Provider API Keys (leave empty if not using) OPENAI_API_KEY: "" # Primary OpenAI API key OPENAI_API_KEY_1: "" # Additional OpenAI API key for load balancing OPENAI_API_KEY_2: "" # Additional OpenAI API key for load balancing OPENAI_API_KEY_3: "" # Additional OpenAI API key for load balancing MISTRAL_API_KEY: "" # Mistral AI API key FIREWORKS_API_KEY: "" # Fireworks AI API key (for hosted model access) ANTHROPIC_API_KEY_1: "" # Primary Anthropic Claude API key ANTHROPIC_API_KEY_2: "" # Additional Anthropic API key for load balancing ANTHROPIC_API_KEY_3: "" # Additional Anthropic API key for load balancing OLLAMA_URL: "" # Ollama local LLM server URL ELEVENLABS_API_KEY: "" # ElevenLabs API key for text-to-speech in deployed chat # Admission & Dispatch Queue Configuration ADMISSION_GATE_MAX_INFLIGHT: "500" # Max concurrent in-flight execution requests per pod DISPATCH_MAX_QUEUE_PER_WORKSPACE: "1000" # Max queued dispatch jobs per workspace DISPATCH_MAX_QUEUE_GLOBAL: "50000" # Max queued dispatch jobs globally # Rate Limiting Configuration (per minute) RATE_LIMIT_WINDOW_MS: "60000" # Rate limit window duration (1 minute) RATE_LIMIT_FREE_SYNC: "50" # Sync API executions per minute RATE_LIMIT_FREE_ASYNC: "200" # Async API executions per minute # Execution Timeout Configuration (in seconds) # Sync timeouts apply to synchronous API calls EXECUTION_TIMEOUT_FREE: "300" # Free tier sync timeout (5 minutes) EXECUTION_TIMEOUT_PRO: "3000" # Pro tier sync timeout (50 minutes) EXECUTION_TIMEOUT_TEAM: "3000" # Team tier sync timeout (50 minutes) EXECUTION_TIMEOUT_ENTERPRISE: "3000" # Enterprise tier sync timeout (50 minutes) # Async timeouts apply to async/background job executions EXECUTION_TIMEOUT_ASYNC_FREE: "5400" # Free tier async timeout (90 minutes) EXECUTION_TIMEOUT_ASYNC_PRO: "5400" # Pro tier async timeout (90 minutes) EXECUTION_TIMEOUT_ASYNC_TEAM: "5400" # Team tier async timeout (90 minutes) EXECUTION_TIMEOUT_ASYNC_ENTERPRISE: "5400" # Enterprise tier async timeout (90 minutes) # Isolated-VM Worker Pool Configuration IVM_POOL_SIZE: "4" # Max worker processes in pool IVM_MAX_CONCURRENT: "10000" # Max concurrent executions globally IVM_MAX_PER_WORKER: "2500" # Max concurrent executions per worker IVM_WORKER_IDLE_TIMEOUT_MS: "60000" # Worker idle cleanup timeout (ms) IVM_QUEUE_TIMEOUT_MS: "300000" # Max queue wait before rejection (ms) IVM_MAX_QUEUE_SIZE: "10000" # Max queued executions globally IVM_MAX_ACTIVE_PER_OWNER: "200" # Max concurrent executions per user IVM_MAX_QUEUED_PER_OWNER: "2000" # Max queued executions per user IVM_MAX_OWNER_WEIGHT: "5" # Max scheduling weight per user IVM_DISTRIBUTED_MAX_INFLIGHT_PER_OWNER: "2200" # Max in-flight per user across instances (Redis) IVM_DISTRIBUTED_LEASE_MIN_TTL_MS: "120000" # Min distributed lease TTL (ms) IVM_MAX_FETCH_RESPONSE_BYTES: "8388608" # Max fetch response size (8MB) IVM_MAX_FETCH_RESPONSE_CHARS: "4000000" # Max fetch response chars IVM_MAX_FETCH_URL_LENGTH: "8192" # Max fetch URL length IVM_MAX_FETCH_OPTIONS_JSON_CHARS: "262144" # Max fetch options payload (256KB) IVM_MAX_STDOUT_CHARS: "200000" # Max stdout capture per execution # UI Branding & Whitelabeling Configuration NEXT_PUBLIC_BRAND_NAME: "Sim" # Custom brand name NEXT_PUBLIC_BRAND_LOGO_URL: "" # Custom logo URL (leave empty for default) NEXT_PUBLIC_BRAND_FAVICON_URL: "" # Custom favicon URL (leave empty for default) NEXT_PUBLIC_BRAND_PRIMARY_COLOR: "" # Primary brand color (hex, e.g., "#701a75") NEXT_PUBLIC_BRAND_ACCENT_COLOR: "" # Accent color (hex, e.g., "#9333ea") NEXT_PUBLIC_BRAND_BACKGROUND_COLOR: "" # Background color (hex, e.g., "#ffffff") NEXT_PUBLIC_CUSTOM_CSS_URL: "" # Custom stylesheet URL (leave empty for none) NEXT_PUBLIC_SUPPORT_EMAIL: "help@sim.ai" # Support email address NEXT_PUBLIC_DOCUMENTATION_URL: "" # Documentation URL (leave empty for none) NEXT_PUBLIC_TERMS_URL: "" # Terms of service URL (leave empty for none) NEXT_PUBLIC_PRIVACY_URL: "" # Privacy policy URL (leave empty for none) # Registration Control DISABLE_REGISTRATION: "" # Set to "true" to disable new user signups EMAIL_PASSWORD_SIGNUP_ENABLED: "" # Set to "false" to disable email/password login (SSO-only mode, server-side enforcement) NEXT_PUBLIC_EMAIL_PASSWORD_SIGNUP_ENABLED: "" # Set to "false" to hide email/password login form (UI-side) SIGNUP_EMAIL_VALIDATION_ENABLED: "" # Set to "true" to block 55K+ disposable email domains (requires normalized_email migration) # Bot Protection (Cloudflare Turnstile) TURNSTILE_SECRET_KEY: "" # Cloudflare Turnstile secret key (leave empty to disable captcha) NEXT_PUBLIC_TURNSTILE_SITE_KEY: "" # Cloudflare Turnstile site key (leave empty to disable captcha) # Access Control (leave empty if not restricting login) ALLOWED_LOGIN_EMAILS: "" # Comma-separated list of allowed email addresses for login ALLOWED_LOGIN_DOMAINS: "" # Comma-separated list of allowed email domains for login # Admin API Configuration ADMIN_API_KEY: "" # Admin API key for organization/user management (generate with: openssl rand -hex 32) # Organizations & Permission Groups ACCESS_CONTROL_ENABLED: "false" # Enable permission groups feature ("true" to enable) ORGANIZATIONS_ENABLED: "false" # Enable organizations feature ("true" to enable) NEXT_PUBLIC_ACCESS_CONTROL_ENABLED: "false" # Show permission groups UI ("true" to enable) NEXT_PUBLIC_ORGANIZATIONS_ENABLED: "false" # Show organizations UI ("true" to enable) # LLM Provider/Model Restrictions (leave empty if not restricting) BLACKLISTED_PROVIDERS: "" # Comma-separated provider IDs to hide from UI (e.g., "openai,anthropic,google") BLACKLISTED_MODELS: "" # Comma-separated model names/prefixes to hide (e.g., "gpt-4,claude-*") ALLOWED_MCP_DOMAINS: "" # Comma-separated domains for MCP servers (e.g., "internal.company.com,mcp.example.org"). Empty = all allowed. # Integration/Block Restrictions (leave empty if not restricting) ALLOWED_INTEGRATIONS: "" # Comma-separated block types to allow (e.g., "slack,github,agent"). Empty = all allowed. # Invitation Control DISABLE_INVITATIONS: "" # Set to "true" to disable workspace invitations globally NEXT_PUBLIC_DISABLE_INVITATIONS: "" # Set to "true" to hide invitation UI elements # Public API Access Control DISABLE_PUBLIC_API: "" # Set to "true" to disable public API toggle globally NEXT_PUBLIC_DISABLE_PUBLIC_API: "" # Set to "true" to hide public API toggle in UI # SSO Configuration (Enterprise Single Sign-On) # Set to "true" AFTER running the SSO registration script SSO_ENABLED: "" # Enable SSO authentication ("true" to enable) NEXT_PUBLIC_SSO_ENABLED: "" # Show SSO login button in UI ("true" to enable) # AWS Bedrock Credential Mode # Set to "true" when the deployment uses AWS default credential chain (IAM roles, instance # profiles, ECS task roles, IRSA, etc.) instead of explicit access key/secret per workflow. # When enabled, the AWS Access Key ID and Secret fields are hidden in the Agent block UI. NEXT_PUBLIC_BEDROCK_DEFAULT_CREDENTIALS: "" # Set to "true" to hide Bedrock credential fields # Azure Provider Credential Mode # Set to "true" when AZURE_OPENAI_ENDPOINT/API_KEY (and/or AZURE_ANTHROPIC_*) are configured # server-side. When enabled, the Azure endpoint, API key, and API version fields are hidden # in the Agent block UI — users just pick an Azure model and run. NEXT_PUBLIC_AZURE_CONFIGURED: "" # Set to "true" to hide Azure credential fields # AWS S3 Cloud Storage Configuration (optional - for file storage) # If configured, files will be stored in S3 instead of local storage AWS_REGION: "" # AWS region (e.g., "us-east-1") AWS_ACCESS_KEY_ID: "" # AWS access key ID AWS_SECRET_ACCESS_KEY: "" # AWS secret access key S3_BUCKET_NAME: "" # S3 bucket for workspace files S3_KB_BUCKET_NAME: "" # S3 bucket for knowledge base files S3_EXECUTION_FILES_BUCKET_NAME: "" # S3 bucket for workflow execution files S3_CHAT_BUCKET_NAME: "" # S3 bucket for deployed chat files S3_COPILOT_BUCKET_NAME: "" # S3 bucket for copilot files S3_PROFILE_PICTURES_BUCKET_NAME: "" # S3 bucket for user profile pictures S3_OG_IMAGES_BUCKET_NAME: "" # S3 bucket for OpenGraph preview images # Azure Blob Storage Configuration (optional - for file storage) # If configured, files will be stored in Azure Blob instead of local storage # Note: Azure Blob takes precedence over S3 if both are configured AZURE_ACCOUNT_NAME: "" # Azure storage account name AZURE_ACCOUNT_KEY: "" # Azure storage account key AZURE_CONNECTION_STRING: "" # Azure connection string (alternative to account name/key) AZURE_STORAGE_CONTAINER_NAME: "" # Azure container for workspace files AZURE_STORAGE_KB_CONTAINER_NAME: "" # Azure container for knowledge base files AZURE_STORAGE_EXECUTION_FILES_CONTAINER_NAME: "" # Azure container for workflow execution files AZURE_STORAGE_CHAT_CONTAINER_NAME: "" # Azure container for deployed chat files AZURE_STORAGE_COPILOT_CONTAINER_NAME: "" # Azure container for copilot files AZURE_STORAGE_PROFILE_PICTURES_CONTAINER_NAME: "" # Azure container for user profile pictures AZURE_STORAGE_OG_IMAGES_CONTAINER_NAME: "" # Azure container for OpenGraph preview images # Service configuration service: type: ClusterIP port: 3000 targetPort: 3000 # Health checks livenessProbe: httpGet: path: / port: 3000 initialDelaySeconds: 10 periodSeconds: 90 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: / port: 3000 initialDelaySeconds: 10 periodSeconds: 90 timeoutSeconds: 5 failureThreshold: 3 # Additional volumes for app deployment (e.g., branding assets, custom configs) extraVolumes: [] extraVolumeMounts: [] # Realtime socket server configuration realtime: # Enable/disable the realtime service enabled: true # Image configuration image: repository: simstudioai/realtime tag: latest pullPolicy: Always # Number of replicas replicaCount: 1 # Resource limits and requests resources: limits: memory: "1Gi" cpu: "500m" requests: memory: "512Mi" cpu: "250m" # Node selector for pod scheduling (leave empty to allow scheduling on any node) nodeSelector: {} # Pod security context podSecurityContext: fsGroup: 1001 # Container security context securityContext: runAsNonRoot: true runAsUser: 1001 # Environment variables env: # Application URLs NEXT_PUBLIC_APP_URL: "http://localhost:3000" BETTER_AUTH_URL: "http://localhost:3000" NEXT_PUBLIC_SOCKET_URL: "http://localhost:3002" # Authentication secret (REQUIRED for production) # Must match the BETTER_AUTH_SECRET value from the main app configuration BETTER_AUTH_SECRET: "" # REQUIRED - set via --set flag or external secret manager # Cross-Origin Resource Sharing (CORS) allowed origins ALLOWED_ORIGINS: "http://localhost:3000" # Node environment NODE_ENV: "production" # Service configuration service: type: ClusterIP port: 3002 targetPort: 3002 # Health checks livenessProbe: httpGet: path: /health port: 3002 initialDelaySeconds: 10 periodSeconds: 90 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: httpGet: path: /health port: 3002 initialDelaySeconds: 10 periodSeconds: 90 timeoutSeconds: 5 failureThreshold: 3 # Additional volumes for realtime deployment extraVolumes: [] extraVolumeMounts: [] # BullMQ worker configuration (processes background jobs when Redis is available) # Uses the same image as the main app with a different command. # Enabled by default so self-hosted deployments get the same topology as compose. # Without REDIS_URL the worker starts, logs that it is idle, and does no queue processing. worker: # Enable/disable the worker deployment enabled: true # Image configuration (defaults to same image as app) image: repository: simstudioai/simstudio tag: latest pullPolicy: Always # Number of replicas replicaCount: 1 # Health check port (worker exposes a lightweight HTTP health server) healthPort: 3001 # Resource limits and requests resources: limits: memory: "4Gi" cpu: "1000m" requests: memory: "2Gi" cpu: "500m" # Node selector for pod scheduling nodeSelector: {} # Pod security context podSecurityContext: fsGroup: 1001 # Container security context securityContext: runAsNonRoot: true runAsUser: 1001 # Environment variables (worker-specific tuning) env: NODE_ENV: "production" WORKER_CONCURRENCY_WORKFLOW: "50" WORKER_CONCURRENCY_WEBHOOK: "30" WORKER_CONCURRENCY_SCHEDULE: "20" WORKER_CONCURRENCY_MOTHERSHIP_JOB: "10" WORKER_CONCURRENCY_CONNECTOR_SYNC: "5" WORKER_CONCURRENCY_DOCUMENT_PROCESSING: "20" WORKER_CONCURRENCY_NOTIFICATION_DELIVERY: "10" # Database migrations job configuration migrations: # Enable/disable migrations job enabled: true # Image configuration image: repository: simstudioai/migrations tag: latest pullPolicy: Always # Resource limits and requests resources: limits: memory: "1Gi" requests: memory: "512Mi" cpu: "100m" # Pod security context podSecurityContext: fsGroup: 1001 # Container security context securityContext: runAsNonRoot: true runAsUser: 1001 # PostgreSQL database configuration postgresql: # Enable/disable internal PostgreSQL deployment enabled: true # Image configuration image: repository: pgvector/pgvector tag: pg17 pullPolicy: IfNotPresent # Authentication configuration auth: username: postgres password: "" # REQUIRED - set via --set flag or external secret manager database: sim # Use an existing secret for PostgreSQL credentials # This enables integration with External Secrets Operator, HashiCorp Vault, etc. existingSecret: enabled: false name: "" # Name of existing Kubernetes secret passwordKey: "POSTGRES_PASSWORD" # Key in the secret containing the password # Node selector for database pod scheduling (leave empty to allow scheduling on any node) nodeSelector: {} # Resource limits and requests resources: limits: memory: "2Gi" requests: memory: "1Gi" cpu: "500m" # Pod security context podSecurityContext: fsGroup: 999 # Container security context securityContext: runAsUser: 999 # Persistence configuration persistence: enabled: true storageClass: "" size: 10Gi accessModes: - ReadWriteOnce # SSL/TLS configuration (enable for production deployments with certificates) # Requires cert-manager to be installed in the cluster tls: enabled: false certificatesSecret: postgres-tls-secret # Certificate configuration (only used if enabled) duration: "87600h" # 10 years (default) renewBefore: "2160h" # Renew 90 days before expiry (default) rotationPolicy: "" # Set to "Always" to rotate private key on renewal (recommended for security) privateKey: algorithm: RSA # RSA or ECDSA size: 4096 # Key size in bits # Issuer reference (REQUIRED if tls.enabled is true) # By default, references the CA issuer created by certManager.caIssuer # Make sure certManager.enabled is true, or provide your own issuer issuerRef: name: sim-ca-issuer # Name of your cert-manager Issuer/ClusterIssuer kind: ClusterIssuer # ClusterIssuer or Issuer group: "" # Optional: cert-manager.io (leave empty for default) # Additional DNS names (optional) additionalDnsNames: [] # Example: # additionalDnsNames: # - postgres.example.com # - db.example.com # PostgreSQL configuration config: maxConnections: 1000 sharedBuffers: "1280MB" maxWalSize: "4GB" minWalSize: "80MB" # Service configuration service: type: ClusterIP port: 5432 targetPort: 5432 # Health checks livenessProbe: exec: command: ["pg_isready", "-U", "postgres", "-d", "sim"] initialDelaySeconds: 10 periodSeconds: 5 readinessProbe: exec: command: ["pg_isready", "-U", "postgres", "-d", "sim"] initialDelaySeconds: 5 periodSeconds: 3 # External database configuration (use when connecting to managed database services) externalDatabase: # Enable to use an external database instead of the internal PostgreSQL instance enabled: false # Database connection details host: "external-db.example.com" port: 5432 username: postgres password: "" database: sim # SSL configuration sslMode: require # Use an existing secret for external database credentials # This enables integration with External Secrets Operator, HashiCorp Vault, etc. existingSecret: enabled: false name: "" # Name of existing Kubernetes secret passwordKey: "EXTERNAL_DB_PASSWORD" # Key in the secret containing the password # Ollama local AI models configuration ollama: # Enable/disable Ollama deployment enabled: false # Image configuration image: repository: ollama/ollama tag: latest pullPolicy: Always # Number of replicas replicaCount: 1 # GPU configuration gpu: enabled: false count: 1 # GPU sharing strategy: "mig" (Multi-Instance GPU) or "time-slicing" # - mig: Hardware-level GPU partitioning (requires supported GPUs like A100) # - time-slicing: Software-level GPU sharing (works with most NVIDIA GPUs) strategy: "time-slicing" # Number of time-slicing replicas (only used when strategy is "time-slicing") timeSlicingReplicas: 5 # Node selector for GPU workloads (adjust labels based on your cluster configuration) nodeSelector: accelerator: nvidia # Tolerations for GPU nodes (adjust based on your cluster's GPU node taints) tolerations: - key: "sku" operator: "Equal" value: "gpu" effect: "NoSchedule" # Resource limits and requests resources: limits: memory: "8Gi" nvidia.com/gpu: "1" requests: memory: "4Gi" cpu: "1000m" # Environment variables env: NVIDIA_DRIVER_CAPABILITIES: "all" OLLAMA_LOAD_TIMEOUT: "-1" OLLAMA_KEEP_ALIVE: "-1" OLLAMA_DEBUG: "1" # Persistence configuration persistence: enabled: true storageClass: "" size: 100Gi accessModes: - ReadWriteOnce # Service configuration service: type: ClusterIP port: 11434 targetPort: 11434 # Health checks startupProbe: httpGet: path: / port: 11434 initialDelaySeconds: 10 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 10 livenessProbe: httpGet: path: / port: 11434 initialDelaySeconds: 60 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 5 readinessProbe: httpGet: path: / port: 11434 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 # Additional volumes for ollama deployment extraVolumes: [] extraVolumeMounts: [] # Ingress configuration # When services share the same host, paths are consolidated into a single rule. # Path order: realtime paths, copilot paths, then app paths (most specific first). # Ensure specific paths (e.g., /socket.io, /copilot) come before catch-all paths (/). ingress: enabled: false className: nginx annotations: nginx.ingress.kubernetes.io/force-ssl-redirect: "true" # Main application (use / as catch-all) app: host: sim.local paths: - path: / pathType: Prefix # Realtime service (use /socket.io when sharing host with app) realtime: host: sim-ws.local paths: - path: / pathType: Prefix # Copilot service (optional, use /copilot when sharing host) # copilot: # host: sim.local # paths: # - path: /copilot # pathType: Prefix tls: enabled: false secretName: sim-tls-secret # Internal Ingress configuration # Same path ordering rules apply as above. ingressInternal: enabled: false className: nginx annotations: {} app: host: sim-internal.local paths: - path: / pathType: Prefix realtime: host: sim-internal.local paths: - path: /socket.io pathType: Prefix # copilot: # host: sim-internal.local # paths: # - path: /copilot # pathType: Prefix tls: enabled: false secretName: sim-internal-tls-secret # Service Account configuration serviceAccount: # Specifies whether a service account should be created create: true # Annotations to add to the service account annotations: {} # The name of the service account to use name: "" # Horizontal Pod Autoscaler autoscaling: enabled: false minReplicas: 1 maxReplicas: 10 targetCPUUtilizationPercentage: 80 targetMemoryUtilizationPercentage: 80 # Custom metrics for scaling (advanced users can add custom metrics here) customMetrics: [] # Scaling behavior configuration (customize scale-up/down policies) # Example configuration: # behavior: # scaleDown: # stabilizationWindowSeconds: 300 # policies: # - type: Percent # value: 50 # periodSeconds: 60 # scaleUp: # stabilizationWindowSeconds: 60 # policies: # - type: Percent # value: 100 # periodSeconds: 15 # - type: Pods # value: 2 # periodSeconds: 60 behavior: {} # Pod disruption budget # Note: PDBs only protect against voluntary disruptions (node drains, autoscaler) # They do NOT affect rolling updates - use deployment.strategy.rollingUpdate for that podDisruptionBudget: enabled: false # Use either minAvailable or maxUnavailable (not both) # Recommendation: Use maxUnavailable as it scales better with HPA # - minAvailable: minimum pods that must remain available (e.g., 1, "50%") # - maxUnavailable: maximum pods that can be unavailable (e.g., 1, "25%") minAvailable: null maxUnavailable: 1 # unhealthyPodEvictionPolicy: allows eviction of unhealthy pods during node drains # Options: IfHealthyBudget (default) | AlwaysAllow (recommended for production) # Set to null to use K8s default (IfHealthyBudget) unhealthyPodEvictionPolicy: null # Monitoring configuration monitoring: # ServiceMonitor for Prometheus serviceMonitor: enabled: false # Additional labels for ServiceMonitor labels: {} # Additional annotations for ServiceMonitor annotations: {} # Metrics path path: /metrics # Scrape interval interval: 30s # Scrape timeout scrapeTimeout: 10s # Target labels to be added to scraped metrics targetLabels: [] # Metric relabeling configurations metricRelabelings: [] # Relabeling configurations relabelings: [] # Network policies networkPolicy: enabled: false # Custom ingress rules ingress: [] # Custom egress rules egress: [] # Shared storage for enterprise workflows requiring data sharing between pods sharedStorage: enabled: false # Storage class for shared volumes (must support ReadWriteMany access) storageClass: "" # Default access modes for shared volumes (ReadWriteMany required for multi-pod access) defaultAccessModes: - ReadWriteMany # Define shared volumes for your workflows (uncomment and customize as needed) # Example volume configurations: # volumes: # - name: output-share # size: 100Gi # accessModes: # - ReadWriteMany # annotations: {} # - name: rawdata-share # size: 500Gi # accessModes: # - ReadWriteMany # - name: model-share # size: 200Gi # accessModes: # - ReadWriteMany # - name: logs-share # size: 50Gi # accessModes: # - ReadWriteMany volumes: [] # Additional volumes for custom configurations (advanced users) extraVolumes: [] extraVolumeMounts: [] # Branding configuration # Use this to inject custom branding assets (logos, CSS, etc.) into the application branding: # Enable/disable branding ConfigMap enabled: false # Mount path in the container where branding files will be available mountPath: "/app/public/branding" # Text files (CSS, JSON, HTML, etc.) - values are plain text # Example: # files: # custom.css: | # .logo { background-color: #ff0000; } # config.json: | # {"theme": "dark"} files: {} # Binary files (PNG, JPG, ICO, etc.) - values must be base64 encoded # Generate base64 with: base64 -i logo.png | tr -d '\n' # Example: # binaryFiles: # logo.png: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk..." # favicon.ico: "AAABAAEAEBAAAAEAIABoBAAAFgAAAA..." binaryFiles: {} # Additional environment variables for custom integrations extraEnvVars: [] # Pod annotations for custom metadata podAnnotations: {} # Pod labels for custom labeling podLabels: {} # Affinity settings for advanced pod scheduling affinity: {} # Tolerations for scheduling on tainted nodes tolerations: [] # CronJob configuration for scheduled tasks cronjobs: # Enable/disable all cron jobs enabled: true # Individual job configurations jobs: scheduleExecution: enabled: true name: schedule-execution schedule: "*/1 * * * *" path: "/api/schedules/execute" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 gmailWebhookPoll: enabled: true name: gmail-webhook-poll schedule: "*/1 * * * *" path: "/api/webhooks/poll/gmail" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 outlookWebhookPoll: enabled: true name: outlook-webhook-poll schedule: "*/1 * * * *" path: "/api/webhooks/poll/outlook" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 rssWebhookPoll: enabled: true name: rss-webhook-poll schedule: "*/1 * * * *" path: "/api/webhooks/poll/rss" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 imapWebhookPoll: enabled: true name: imap-webhook-poll schedule: "*/1 * * * *" path: "/api/webhooks/poll/imap" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 renewSubscriptions: enabled: true name: renew-subscriptions schedule: "0 */12 * * *" path: "/api/cron/renew-subscriptions" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 inactivityAlertPoll: enabled: true name: inactivity-alert-poll schedule: "*/15 * * * *" path: "/api/notifications/poll" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 3 failedJobsHistoryLimit: 1 # Global CronJob settings image: repository: curlimages/curl tag: 8.5.0 pullPolicy: IfNotPresent resources: limits: memory: "128Mi" cpu: "100m" requests: memory: "64Mi" cpu: "50m" restartPolicy: OnFailure activeDeadlineSeconds: 300 startingDeadlineSeconds: 60 # Pod security context podSecurityContext: fsGroup: 1001 # Container security context securityContext: runAsNonRoot: true runAsUser: 1001 # Observability and telemetry configuration telemetry: # Enable/disable telemetry collection enabled: false # OpenTelemetry Collector image image: repository: otel/opentelemetry-collector-contrib tag: 0.91.0 pullPolicy: IfNotPresent # Number of collector replicas replicaCount: 1 # Resource limits and requests resources: limits: memory: "512Mi" cpu: "500m" requests: memory: "256Mi" cpu: "100m" # Node selector for telemetry pod scheduling (leave empty to allow scheduling on any node) nodeSelector: {} # Tolerations for telemetry workloads tolerations: [] # Affinity for telemetry workloads affinity: {} # Service configuration service: type: ClusterIP # Jaeger tracing backend jaeger: enabled: false endpoint: "http://jaeger-collector:14250" tls: enabled: false # Prometheus metrics backend prometheus: enabled: false endpoint: "http://prometheus-server/api/v1/write" auth: "" # Generic OTLP backend otlp: enabled: false endpoint: "http://otlp-collector:4317" tls: enabled: false # Copilot service configuration (optional microservice) copilot: # Enable/disable the copilot service enabled: false # Server deployment configuration server: # Image configuration image: repository: simstudioai/copilot tag: latest pullPolicy: Always # Number of replicas replicaCount: 1 # Resource limits and requests resources: limits: memory: "2Gi" cpu: "1000m" requests: memory: "1Gi" cpu: "500m" # Node selector for pod scheduling # Leave empty to run on same infrastructure as main Sim platform # Or specify labels to isolate on dedicated nodes: { "workload-type": "copilot" } nodeSelector: {} # Pod security context podSecurityContext: fsGroup: 1001 # Container security context securityContext: runAsNonRoot: true runAsUser: 1001 # Environment variables (required and optional) env: PORT: "8080" SERVICE_NAME: "copilot" ENVIRONMENT: "production" AGENT_API_DB_ENCRYPTION_KEY: "" INTERNAL_API_SECRET: "" LICENSE_KEY: "" OPENAI_API_KEY_1: "" ANTHROPIC_API_KEY_1: "" SIM_BASE_URL: "" SIM_AGENT_API_KEY: "" REDIS_URL: "" # Optional configuration LOG_LEVEL: "info" CORS_ALLOWED_ORIGINS: "" OTEL_EXPORTER_OTLP_ENDPOINT: "" # Optional: additional static environment variables extraEnv: [] # Optional: references to existing ConfigMaps/Secrets extraEnvFrom: [] # Secret generation configuration (set create=false to use an existing secret) secret: create: true name: "" annotations: {} # Service configuration service: type: ClusterIP port: 8080 targetPort: 8080 # Health checks readinessProbe: httpGet: path: /healthz port: 8080 initialDelaySeconds: 5 periodSeconds: 10 timeoutSeconds: 5 failureThreshold: 3 livenessProbe: httpGet: path: /healthz port: 8080 initialDelaySeconds: 15 periodSeconds: 30 timeoutSeconds: 5 failureThreshold: 3 # Pod Disruption Budget for high availability podDisruptionBudget: enabled: false minAvailable: 1 # PostgreSQL database for copilot (separate from main Sim database) postgresql: # Enable/disable internal PostgreSQL for copilot enabled: true # Image configuration image: repository: postgres tag: 17-alpine pullPolicy: IfNotPresent # Authentication configuration auth: username: copilot password: "" # REQUIRED - set via --set flag or external secret manager database: copilot # Node selector for database pod scheduling # Leave empty to run on same infrastructure as main Sim platform # Or specify labels to isolate on dedicated nodes: { "workload-type": "copilot" } nodeSelector: {} # Resource limits and requests resources: limits: memory: "1Gi" cpu: "500m" requests: memory: "512Mi" cpu: "250m" # Pod security context podSecurityContext: fsGroup: 999 # Container security context securityContext: runAsUser: 999 # Persistence configuration persistence: enabled: true storageClass: "" size: 10Gi accessModes: - ReadWriteOnce # Service configuration service: type: ClusterIP port: 5432 targetPort: 5432 # Health checks livenessProbe: exec: command: ["pg_isready", "-U", "copilot", "-d", "copilot"] initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 5 failureThreshold: 10 readinessProbe: exec: command: ["pg_isready", "-U", "copilot", "-d", "copilot"] initialDelaySeconds: 5 periodSeconds: 3 timeoutSeconds: 5 failureThreshold: 10 # External database configuration (use when connecting to a managed database) database: existingSecretName: "" secretKey: DATABASE_URL url: "" # Migration job configuration migrations: # Enable/disable migrations job enabled: true # Image configuration (same as server) image: repository: simstudioai/copilot tag: latest pullPolicy: Always # Resource limits and requests resources: limits: memory: "512Mi" cpu: "500m" requests: memory: "256Mi" cpu: "100m" # Pod security context podSecurityContext: fsGroup: 1001 # Container security context securityContext: runAsNonRoot: true runAsUser: 1001 # Job configuration backoffLimit: 3 restartPolicy: OnFailure # External Secrets Operator integration # Use this to automatically sync secrets from external secret managers (Azure Key Vault, AWS Secrets Manager, etc.) # Prerequisites: Install External Secrets Operator in your cluster first # See: https://external-secrets.io/latest/introduction/getting-started/ externalSecrets: # Enable External Secrets Operator integration enabled: false # ESO API version - use "v1" for ESO v0.17+ (recommended), "v1beta1" for older versions apiVersion: "v1" # How often to sync secrets from the external store refreshInterval: "1h" # Reference to the SecretStore or ClusterSecretStore secretStoreRef: # Name of the SecretStore or ClusterSecretStore resource name: "" # Kind of the store: "SecretStore" (namespaced) or "ClusterSecretStore" (cluster-wide) kind: "ClusterSecretStore" # Remote references - paths/keys in your external secret store # These map to the secrets that will be created in Kubernetes remoteRefs: # App secrets (authentication, encryption keys) app: # Path to BETTER_AUTH_SECRET in external store (e.g., "sim/app/better-auth-secret") BETTER_AUTH_SECRET: "" # Path to ENCRYPTION_KEY in external store ENCRYPTION_KEY: "" # Path to INTERNAL_API_SECRET in external store INTERNAL_API_SECRET: "" # Path to CRON_SECRET in external store (optional) CRON_SECRET: "" # Path to API_ENCRYPTION_KEY in external store (optional) API_ENCRYPTION_KEY: "" # Path to REDIS_URL in external store (optional, required for worker when not set in app.env) REDIS_URL: "" # PostgreSQL password (for internal PostgreSQL) postgresql: # Path to PostgreSQL password in external store (e.g., "sim/postgresql/password") password: "" # External database password (when using managed database services) externalDatabase: # Path to external database password in external store password: "" # cert-manager configuration # Prerequisites: Install cert-manager in your cluster first # See: https://cert-manager.io/docs/installation/ # # This implements the recommended CA bootstrap pattern from cert-manager: # 1. Self-signed ClusterIssuer (bootstrap only - creates root CA) # 2. Root CA Certificate (self-signed, becomes the trust anchor) # 3. CA ClusterIssuer (signs application certificates using root CA) # # Reference: https://cert-manager.io/docs/configuration/selfsigned/ certManager: # Enable/disable cert-manager issuer resources enabled: false # Self-signed ClusterIssuer (used ONLY to bootstrap the root CA) # Do not reference this issuer directly for application certificates selfSignedIssuer: name: "sim-selfsigned-bootstrap-issuer" # Root CA Certificate configuration # This certificate is signed by the self-signed issuer and used as the trust anchor rootCA: # Name of the Certificate resource certificateName: "sim-root-ca" # Namespace where the root CA certificate and secret will be created # Must match cert-manager's cluster-resource-namespace (default: cert-manager) namespace: "cert-manager" # Common name for the root CA certificate commonName: "sim-root-ca" # Secret name where the root CA certificate and key will be stored secretName: "sim-root-ca-secret" # Certificate validity duration (default: 10 years) duration: "87600h" # Renew before expiry (default: 90 days) renewBefore: "2160h" # Private key configuration privateKey: algorithm: RSA size: 4096 # Subject configuration subject: organizations: [] # If empty, defaults to the release name # CA ClusterIssuer configuration # This is the issuer that applications should reference for obtaining certificates caIssuer: name: "sim-ca-issuer"