From be2a9ef0f86cdb749c0ccadcaab19296c9d330b2 Mon Sep 17 00:00:00 2001 From: Waleed Date: Sun, 25 Jan 2026 13:06:12 -0800 Subject: [PATCH] fix(storage): support Azure connection string for presigned URLs (#2997) * fix(docs): update requirements to be more accurate for deploying the app * updated kb to support 1536 dimension vectors for models other than text embedding 3 small * fix(storage): support Azure connection string for presigned URLs * fix(kb): update test for embedding dimensions parameter * fix(storage): align credential source ordering for consistency --- .devcontainer/docker-compose.yml | 2 +- .../content/docs/de/self-hosting/index.mdx | 20 ++-- .../content/docs/en/self-hosting/index.mdx | 20 ++-- .../content/docs/es/self-hosting/index.mdx | 20 ++-- .../content/docs/fr/self-hosting/index.mdx | 20 ++-- .../content/docs/ja/self-hosting/index.mdx | 20 ++-- .../content/docs/zh/self-hosting/index.mdx | 20 ++-- .../app/api/knowledge/search/utils.test.ts | 1 + apps/sim/lib/knowledge/embeddings.ts | 15 +++ apps/sim/lib/uploads/providers/blob/client.ts | 94 +++++++++++++------ docker-compose.local.yml | 2 +- docker-compose.ollama.yml | 2 +- docker-compose.prod.yml | 2 +- helm/sim/examples/values-production.yaml | 16 ++-- helm/sim/values.yaml | 16 ++-- 15 files changed, 183 insertions(+), 87 deletions(-) diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index c107e30d3..012777093 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -44,7 +44,7 @@ services: deploy: resources: limits: - memory: 4G + memory: 1G environment: - NODE_ENV=development - DATABASE_URL=postgresql://postgres:postgres@db:5432/simstudio diff --git a/apps/docs/content/docs/de/self-hosting/index.mdx b/apps/docs/content/docs/de/self-hosting/index.mdx index 7a5faa4d3..1ad498091 100644 --- a/apps/docs/content/docs/de/self-hosting/index.mdx +++ b/apps/docs/content/docs/de/self-hosting/index.mdx @@ -10,12 +10,20 @@ Stellen Sie Sim auf Ihrer eigenen Infrastruktur mit Docker oder Kubernetes berei ## Anforderungen -| Ressource | Minimum | Empfohlen | -|----------|---------|-------------| -| CPU | 2 Kerne | 4+ Kerne | -| RAM | 12 GB | 16+ GB | -| Speicher | 20 GB SSD | 50+ GB SSD | -| Docker | 20.10+ | Neueste Version | +| Ressource | Klein | Standard | Produktion | +|----------|-------|----------|------------| +| CPU | 2 Kerne | 4 Kerne | 8+ Kerne | +| RAM | 12 GB | 16 GB | 32+ GB | +| Speicher | 20 GB SSD | 50 GB SSD | 100+ GB SSD | +| Docker | 20.10+ | 20.10+ | Neueste Version | + +**Klein**: Entwicklung, Tests, Einzelnutzer (1-5 Nutzer) +**Standard**: Teams (5-50 Nutzer), moderate Arbeitslasten +**Produktion**: Große Teams (50+ Nutzer), Hochverfügbarkeit, intensive Workflow-Ausführung + + +Die Ressourcenanforderungen werden durch Workflow-Ausführung (isolated-vm Sandboxing), Dateiverarbeitung (In-Memory-Dokumentenparsing) und Vektoroperationen (pgvector) bestimmt. Arbeitsspeicher ist typischerweise der limitierende Faktor, nicht CPU. Produktionsdaten zeigen, dass die Hauptanwendung durchschnittlich 4-8 GB und bei hoher Last bis zu 12 GB benötigt. + ## Schnellstart diff --git a/apps/docs/content/docs/en/self-hosting/index.mdx b/apps/docs/content/docs/en/self-hosting/index.mdx index 87ab39b9a..bbdef7e4e 100644 --- a/apps/docs/content/docs/en/self-hosting/index.mdx +++ b/apps/docs/content/docs/en/self-hosting/index.mdx @@ -16,12 +16,20 @@ Deploy Sim on your own infrastructure with Docker or Kubernetes. ## Requirements -| Resource | Minimum | Recommended | -|----------|---------|-------------| -| CPU | 2 cores | 4+ cores | -| RAM | 12 GB | 16+ GB | -| Storage | 20 GB SSD | 50+ GB SSD | -| Docker | 20.10+ | Latest | +| Resource | Small | Standard | Production | +|----------|-------|----------|------------| +| CPU | 2 cores | 4 cores | 8+ cores | +| RAM | 12 GB | 16 GB | 32+ GB | +| Storage | 20 GB SSD | 50 GB SSD | 100+ GB SSD | +| Docker | 20.10+ | 20.10+ | Latest | + +**Small**: Development, testing, single user (1-5 users) +**Standard**: Teams (5-50 users), moderate workloads +**Production**: Large teams (50+ users), high availability, heavy workflow execution + + +Resource requirements are driven by workflow execution (isolated-vm sandboxing), file processing (in-memory document parsing), and vector operations (pgvector). Memory is typically the constraining factor rather than CPU. Production telemetry shows the main app uses 4-8 GB average with peaks up to 12 GB under heavy load. + ## Quick Start diff --git a/apps/docs/content/docs/es/self-hosting/index.mdx b/apps/docs/content/docs/es/self-hosting/index.mdx index a511b1963..2b9c5c1ef 100644 --- a/apps/docs/content/docs/es/self-hosting/index.mdx +++ b/apps/docs/content/docs/es/self-hosting/index.mdx @@ -10,12 +10,20 @@ Despliega Sim en tu propia infraestructura con Docker o Kubernetes. ## Requisitos -| Recurso | Mínimo | Recomendado | -|----------|---------|-------------| -| CPU | 2 núcleos | 4+ núcleos | -| RAM | 12 GB | 16+ GB | -| Almacenamiento | 20 GB SSD | 50+ GB SSD | -| Docker | 20.10+ | Última versión | +| Recurso | Pequeño | Estándar | Producción | +|----------|---------|----------|------------| +| CPU | 2 núcleos | 4 núcleos | 8+ núcleos | +| RAM | 12 GB | 16 GB | 32+ GB | +| Almacenamiento | 20 GB SSD | 50 GB SSD | 100+ GB SSD | +| Docker | 20.10+ | 20.10+ | Última versión | + +**Pequeño**: Desarrollo, pruebas, usuario único (1-5 usuarios) +**Estándar**: Equipos (5-50 usuarios), cargas de trabajo moderadas +**Producción**: Equipos grandes (50+ usuarios), alta disponibilidad, ejecución intensiva de workflows + + +Los requisitos de recursos están determinados por la ejecución de workflows (sandboxing isolated-vm), procesamiento de archivos (análisis de documentos en memoria) y operaciones vectoriales (pgvector). La memoria suele ser el factor limitante, no la CPU. La telemetría de producción muestra que la aplicación principal usa 4-8 GB en promedio con picos de hasta 12 GB bajo carga pesada. + ## Inicio rápido diff --git a/apps/docs/content/docs/fr/self-hosting/index.mdx b/apps/docs/content/docs/fr/self-hosting/index.mdx index bb52a8aa8..cfd0d49f8 100644 --- a/apps/docs/content/docs/fr/self-hosting/index.mdx +++ b/apps/docs/content/docs/fr/self-hosting/index.mdx @@ -10,12 +10,20 @@ Déployez Sim sur votre propre infrastructure avec Docker ou Kubernetes. ## Prérequis -| Ressource | Minimum | Recommandé | -|----------|---------|-------------| -| CPU | 2 cœurs | 4+ cœurs | -| RAM | 12 Go | 16+ Go | -| Stockage | 20 Go SSD | 50+ Go SSD | -| Docker | 20.10+ | Dernière version | +| Ressource | Petit | Standard | Production | +|----------|-------|----------|------------| +| CPU | 2 cœurs | 4 cœurs | 8+ cœurs | +| RAM | 12 Go | 16 Go | 32+ Go | +| Stockage | 20 Go SSD | 50 Go SSD | 100+ Go SSD | +| Docker | 20.10+ | 20.10+ | Dernière version | + +**Petit** : Développement, tests, utilisateur unique (1-5 utilisateurs) +**Standard** : Équipes (5-50 utilisateurs), charges de travail modérées +**Production** : Grandes équipes (50+ utilisateurs), haute disponibilité, exécution intensive de workflows + + +Les besoins en ressources sont déterminés par l'exécution des workflows (sandboxing isolated-vm), le traitement des fichiers (analyse de documents en mémoire) et les opérations vectorielles (pgvector). La mémoire est généralement le facteur limitant, pas le CPU. La télémétrie de production montre que l'application principale utilise 4-8 Go en moyenne avec des pics jusqu'à 12 Go sous forte charge. + ## Démarrage rapide diff --git a/apps/docs/content/docs/ja/self-hosting/index.mdx b/apps/docs/content/docs/ja/self-hosting/index.mdx index 69ae41e6c..18aea7e7d 100644 --- a/apps/docs/content/docs/ja/self-hosting/index.mdx +++ b/apps/docs/content/docs/ja/self-hosting/index.mdx @@ -10,12 +10,20 @@ DockerまたはKubernetesを使用して、自社のインフラストラクチ ## 要件 -| リソース | 最小 | 推奨 | -|----------|---------|-------------| -| CPU | 2コア | 4+コア | -| RAM | 12 GB | 16+ GB | -| ストレージ | 20 GB SSD | 50+ GB SSD | -| Docker | 20.10+ | 最新版 | +| リソース | スモール | スタンダード | プロダクション | +|----------|---------|-------------|----------------| +| CPU | 2コア | 4コア | 8+コア | +| RAM | 12 GB | 16 GB | 32+ GB | +| ストレージ | 20 GB SSD | 50 GB SSD | 100+ GB SSD | +| Docker | 20.10+ | 20.10+ | 最新版 | + +**スモール**: 開発、テスト、シングルユーザー(1-5ユーザー) +**スタンダード**: チーム(5-50ユーザー)、中程度のワークロード +**プロダクション**: 大規模チーム(50+ユーザー)、高可用性、高負荷ワークフロー実行 + + +リソース要件は、ワークフロー実行(isolated-vmサンドボックス)、ファイル処理(メモリ内ドキュメント解析)、ベクトル演算(pgvector)によって決まります。CPUよりもメモリが制約要因となることが多いです。本番環境のテレメトリによると、メインアプリは平均4-8 GB、高負荷時は最大12 GBを使用します。 + ## クイックスタート diff --git a/apps/docs/content/docs/zh/self-hosting/index.mdx b/apps/docs/content/docs/zh/self-hosting/index.mdx index a2a4f4e50..de56839a1 100644 --- a/apps/docs/content/docs/zh/self-hosting/index.mdx +++ b/apps/docs/content/docs/zh/self-hosting/index.mdx @@ -10,12 +10,20 @@ import { Callout } from 'fumadocs-ui/components/callout' ## 要求 -| 资源 | 最低要求 | 推荐配置 | -|----------|---------|-------------| -| CPU | 2 核 | 4 核及以上 | -| 内存 | 12 GB | 16 GB 及以上 | -| 存储 | 20 GB SSD | 50 GB 及以上 SSD | -| Docker | 20.10+ | 最新版本 | +| 资源 | 小型 | 标准 | 生产环境 | +|----------|------|------|----------| +| CPU | 2 核 | 4 核 | 8+ 核 | +| 内存 | 12 GB | 16 GB | 32+ GB | +| 存储 | 20 GB SSD | 50 GB SSD | 100+ GB SSD | +| Docker | 20.10+ | 20.10+ | 最新版本 | + +**小型**: 开发、测试、单用户(1-5 用户) +**标准**: 团队(5-50 用户)、中等工作负载 +**生产环境**: 大型团队(50+ 用户)、高可用性、密集工作流执行 + + +资源需求由工作流执行(isolated-vm 沙箱)、文件处理(内存中文档解析)和向量运算(pgvector)决定。内存通常是限制因素,而不是 CPU。生产遥测数据显示,主应用平均使用 4-8 GB,高负载时峰值可达 12 GB。 + ## 快速开始 diff --git a/apps/sim/app/api/knowledge/search/utils.test.ts b/apps/sim/app/api/knowledge/search/utils.test.ts index 279f7e56e..6224e046e 100644 --- a/apps/sim/app/api/knowledge/search/utils.test.ts +++ b/apps/sim/app/api/knowledge/search/utils.test.ts @@ -408,6 +408,7 @@ describe('Knowledge Search Utils', () => { input: ['test query'], model: 'text-embedding-3-small', encoding_format: 'float', + dimensions: 1536, }), }) ) diff --git a/apps/sim/lib/knowledge/embeddings.ts b/apps/sim/lib/knowledge/embeddings.ts index 2b57b34c5..1171065c7 100644 --- a/apps/sim/lib/knowledge/embeddings.ts +++ b/apps/sim/lib/knowledge/embeddings.ts @@ -8,6 +8,17 @@ const logger = createLogger('EmbeddingUtils') const MAX_TOKENS_PER_REQUEST = 8000 const MAX_CONCURRENT_BATCHES = env.KB_CONFIG_CONCURRENCY_LIMIT || 50 +const EMBEDDING_DIMENSIONS = 1536 + +/** + * Check if the model supports custom dimensions. + * text-embedding-3-* models support the dimensions parameter. + * Checks for 'embedding-3' to handle Azure deployments with custom naming conventions. + */ +function supportsCustomDimensions(modelName: string): boolean { + const name = modelName.toLowerCase() + return name.includes('embedding-3') && !name.includes('ada') +} export class EmbeddingAPIError extends Error { public status: number @@ -93,15 +104,19 @@ async function getEmbeddingConfig( async function callEmbeddingAPI(inputs: string[], config: EmbeddingConfig): Promise { return retryWithExponentialBackoff( async () => { + const useDimensions = supportsCustomDimensions(config.modelName) + const requestBody = config.useAzure ? { input: inputs, encoding_format: 'float', + ...(useDimensions && { dimensions: EMBEDDING_DIMENSIONS }), } : { input: inputs, model: config.modelName, encoding_format: 'float', + ...(useDimensions && { dimensions: EMBEDDING_DIMENSIONS }), } const response = await fetch(config.apiUrl, { diff --git a/apps/sim/lib/uploads/providers/blob/client.ts b/apps/sim/lib/uploads/providers/blob/client.ts index 346ce27fa..f83e6b576 100644 --- a/apps/sim/lib/uploads/providers/blob/client.ts +++ b/apps/sim/lib/uploads/providers/blob/client.ts @@ -18,6 +18,52 @@ const logger = createLogger('BlobClient') let _blobServiceClient: BlobServiceClientInstance | null = null +interface ParsedCredentials { + accountName: string + accountKey: string +} + +/** + * Extract account name and key from an Azure connection string. + * Connection strings have the format: DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=... + */ +function parseConnectionString(connectionString: string): ParsedCredentials { + const accountNameMatch = connectionString.match(/AccountName=([^;]+)/) + if (!accountNameMatch) { + throw new Error('Cannot extract account name from connection string') + } + + const accountKeyMatch = connectionString.match(/AccountKey=([^;]+)/) + if (!accountKeyMatch) { + throw new Error('Cannot extract account key from connection string') + } + + return { + accountName: accountNameMatch[1], + accountKey: accountKeyMatch[1], + } +} + +/** + * Get account credentials from BLOB_CONFIG, extracting from connection string if necessary. + */ +function getAccountCredentials(): ParsedCredentials { + if (BLOB_CONFIG.connectionString) { + return parseConnectionString(BLOB_CONFIG.connectionString) + } + + if (BLOB_CONFIG.accountName && BLOB_CONFIG.accountKey) { + return { + accountName: BLOB_CONFIG.accountName, + accountKey: BLOB_CONFIG.accountKey, + } + } + + throw new Error( + 'Azure Blob Storage credentials are missing – set AZURE_CONNECTION_STRING or both AZURE_ACCOUNT_NAME and AZURE_ACCOUNT_KEY' + ) +} + export async function getBlobServiceClient(): Promise { if (_blobServiceClient) return _blobServiceClient @@ -127,6 +173,8 @@ export async function getPresignedUrl(key: string, expiresIn = 3600) { const containerClient = blobServiceClient.getContainerClient(BLOB_CONFIG.containerName) const blockBlobClient = containerClient.getBlockBlobClient(key) + const { accountName, accountKey } = getAccountCredentials() + const sasOptions = { containerName: BLOB_CONFIG.containerName, blobName: key, @@ -137,13 +185,7 @@ export async function getPresignedUrl(key: string, expiresIn = 3600) { const sasToken = generateBlobSASQueryParameters( sasOptions, - new StorageSharedKeyCredential( - BLOB_CONFIG.accountName, - BLOB_CONFIG.accountKey ?? - (() => { - throw new Error('AZURE_ACCOUNT_KEY is required when using account name authentication') - })() - ) + new StorageSharedKeyCredential(accountName, accountKey) ).toString() return `${blockBlobClient.url}?${sasToken}` @@ -168,9 +210,14 @@ export async function getPresignedUrlWithConfig( StorageSharedKeyCredential, } = await import('@azure/storage-blob') let tempBlobServiceClient: BlobServiceClientInstance + let accountName: string + let accountKey: string if (customConfig.connectionString) { tempBlobServiceClient = BlobServiceClient.fromConnectionString(customConfig.connectionString) + const credentials = parseConnectionString(customConfig.connectionString) + accountName = credentials.accountName + accountKey = credentials.accountKey } else if (customConfig.accountName && customConfig.accountKey) { const sharedKeyCredential = new StorageSharedKeyCredential( customConfig.accountName, @@ -180,6 +227,8 @@ export async function getPresignedUrlWithConfig( `https://${customConfig.accountName}.blob.core.windows.net`, sharedKeyCredential ) + accountName = customConfig.accountName + accountKey = customConfig.accountKey } else { throw new Error( 'Custom blob config must include either connectionString or accountName + accountKey' @@ -199,13 +248,7 @@ export async function getPresignedUrlWithConfig( const sasToken = generateBlobSASQueryParameters( sasOptions, - new StorageSharedKeyCredential( - customConfig.accountName, - customConfig.accountKey ?? - (() => { - throw new Error('Account key is required when using account name authentication') - })() - ) + new StorageSharedKeyCredential(accountName, accountKey) ).toString() return `${blockBlobClient.url}?${sasToken}` @@ -403,13 +446,9 @@ export async function getMultipartPartUrls( if (customConfig) { if (customConfig.connectionString) { blobServiceClient = BlobServiceClient.fromConnectionString(customConfig.connectionString) - const match = customConfig.connectionString.match(/AccountName=([^;]+)/) - if (!match) throw new Error('Cannot extract account name from connection string') - accountName = match[1] - - const keyMatch = customConfig.connectionString.match(/AccountKey=([^;]+)/) - if (!keyMatch) throw new Error('Cannot extract account key from connection string') - accountKey = keyMatch[1] + const credentials = parseConnectionString(customConfig.connectionString) + accountName = credentials.accountName + accountKey = credentials.accountKey } else if (customConfig.accountName && customConfig.accountKey) { const credential = new StorageSharedKeyCredential( customConfig.accountName, @@ -428,12 +467,9 @@ export async function getMultipartPartUrls( } else { blobServiceClient = await getBlobServiceClient() containerName = BLOB_CONFIG.containerName - accountName = BLOB_CONFIG.accountName - accountKey = - BLOB_CONFIG.accountKey || - (() => { - throw new Error('AZURE_ACCOUNT_KEY is required') - })() + const credentials = getAccountCredentials() + accountName = credentials.accountName + accountKey = credentials.accountKey } const containerClient = blobServiceClient.getContainerClient(containerName) @@ -501,12 +537,10 @@ export async function completeMultipartUpload( const containerClient = blobServiceClient.getContainerClient(containerName) const blockBlobClient = containerClient.getBlockBlobClient(key) - // Sort parts by part number and extract block IDs const sortedBlockIds = parts .sort((a, b) => a.partNumber - b.partNumber) .map((part) => part.blockId) - // Commit the block list to create the final blob await blockBlobClient.commitBlockList(sortedBlockIds, { metadata: { multipartUpload: 'completed', @@ -557,10 +591,8 @@ export async function abortMultipartUpload(key: string, customConfig?: BlobConfi const blockBlobClient = containerClient.getBlockBlobClient(key) try { - // Delete the blob if it exists (this also cleans up any uncommitted blocks) await blockBlobClient.deleteIfExists() } catch (error) { - // Ignore errors since we're just cleaning up logger.warn('Error cleaning up multipart upload:', error) } } diff --git a/docker-compose.local.yml b/docker-compose.local.yml index 768c0cc70..a2f768c30 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -52,7 +52,7 @@ services: deploy: resources: limits: - memory: 8G + memory: 1G healthcheck: test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health'] interval: 90s diff --git a/docker-compose.ollama.yml b/docker-compose.ollama.yml index 4f32929f9..9d4f072bf 100644 --- a/docker-compose.ollama.yml +++ b/docker-compose.ollama.yml @@ -56,7 +56,7 @@ services: deploy: resources: limits: - memory: 8G + memory: 1G healthcheck: test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health'] interval: 90s diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index c6b79e6c1..74bdd67f8 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -42,7 +42,7 @@ services: deploy: resources: limits: - memory: 4G + memory: 1G environment: - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000} diff --git a/helm/sim/examples/values-production.yaml b/helm/sim/examples/values-production.yaml index 3bac1ba09..794afa4ac 100644 --- a/helm/sim/examples/values-production.yaml +++ b/helm/sim/examples/values-production.yaml @@ -10,13 +10,13 @@ global: app: enabled: true replicaCount: 2 - + resources: limits: - memory: "6Gi" + memory: "8Gi" cpu: "2000m" requests: - memory: "4Gi" + memory: "6Gi" cpu: "1000m" # Production URLs (REQUIRED - update with your actual domain names) @@ -49,14 +49,14 @@ app: realtime: enabled: true replicaCount: 2 - + resources: limits: - memory: "4Gi" - cpu: "1000m" - requests: - memory: "2Gi" + memory: "1Gi" cpu: "500m" + requests: + memory: "512Mi" + cpu: "250m" env: NEXT_PUBLIC_APP_URL: "https://sim.acme.ai" diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index c182c2772..f0def91cf 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -29,10 +29,10 @@ app: # Resource limits and requests resources: limits: - memory: "4Gi" + memory: "8Gi" cpu: "2000m" requests: - memory: "2Gi" + memory: "4Gi" cpu: "1000m" # Node selector for pod scheduling (leave empty to allow scheduling on any node) @@ -232,24 +232,24 @@ app: realtime: # Enable/disable the realtime service enabled: true - + # Image configuration image: repository: simstudioai/realtime tag: latest pullPolicy: Always - + # Number of replicas replicaCount: 1 - + # Resource limits and requests resources: limits: - memory: "2Gi" - cpu: "1000m" - requests: memory: "1Gi" cpu: "500m" + requests: + memory: "512Mi" + cpu: "250m" # Node selector for pod scheduling (leave empty to allow scheduling on any node) nodeSelector: {}