Compare commits

...

1 Commits

Author SHA1 Message Date
Theodore Li
bb0a4f089a Migrate agents to use hosted api key framework 2026-04-09 18:30:37 -07:00
13 changed files with 367 additions and 486 deletions

View File

@@ -2,12 +2,12 @@ import { db } from '@sim/db'
import { workspaceBYOKKeys } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, eq } from 'drizzle-orm'
import { getRotatingApiKey } from '@/lib/core/config/api-keys'
import { acquireHostedKey } from '@/lib/api-key/hosted-key'
import { env } from '@/lib/core/config/env'
import { isHosted } from '@/lib/core/config/feature-flags'
import { decryptSecret } from '@/lib/core/security/encryption'
import { getWorkspaceById } from '@/lib/workspaces/permissions/utils'
import { getHostedModels } from '@/providers/models'
import { getHostedModels, PROVIDER_DEFINITIONS } from '@/providers/models'
import { PROVIDER_PLACEHOLDER_KEY } from '@/providers/utils'
import { useProvidersStore } from '@/stores/providers/store'
import type { BYOKProviderId } from '@/tools/types'
@@ -107,41 +107,26 @@ export async function getApiKeyWithBYOK(
return { apiKey: userProvidedKey || env.AZURE_ANTHROPIC_API_KEY || '', isBYOK: false }
}
const isOpenAIModel = provider === 'openai'
const isClaudeModel = provider === 'anthropic'
const isGeminiModel = provider === 'google'
const isMistralModel = provider === 'mistral'
const hosting = PROVIDER_DEFINITIONS[provider]?.hosting
const byokProviderId = isGeminiModel ? 'google' : (provider as BYOKProviderId)
if (
isHosted &&
workspaceId &&
(isOpenAIModel || isClaudeModel || isGeminiModel || isMistralModel)
) {
if (isHosted && workspaceId && hosting) {
const hostedModels = getHostedModels()
const isModelHosted = hostedModels.some((m) => m.toLowerCase() === model.toLowerCase())
logger.debug('BYOK check', { provider, model, workspaceId, isHosted, isModelHosted })
if (isModelHosted || isMistralModel) {
const byokResult = await getBYOKKey(workspaceId, byokProviderId)
if (byokResult) {
logger.info('Using BYOK key', { provider, model, workspaceId })
return byokResult
}
logger.debug('No BYOK key found, falling back', { provider, model, workspaceId })
if (isModelHosted) {
try {
const serverKey = getRotatingApiKey(isGeminiModel ? 'gemini' : provider)
return { apiKey: serverKey, isBYOK: false }
} catch (_error) {
if (userProvidedKey) {
return { apiKey: userProvidedKey, isBYOK: false }
}
throw new Error(`No API key available for ${provider} ${model}`)
if (isModelHosted) {
try {
const result = await acquireHostedKey(hosting, workspaceId, `${provider} ${model}`)
return { apiKey: result.apiKey, isBYOK: result.isBYOK }
} catch (error) {
const status = (error as { status?: number }).status
// Fall back to user-provided key only when no platform keys are configured.
// Rate-limit (429) errors must surface so the workspace gets the right signal.
if (status === 503 && userProvidedKey) {
return { apiKey: userProvidedKey, isBYOK: false }
}
throw error
}
}
}

View File

@@ -0,0 +1,102 @@
import { createLogger } from '@sim/logger'
import { getBYOKKey } from '@/lib/api-key/byok'
import { getHostedKeyRateLimiter } from '@/lib/core/rate-limiter/hosted-key'
import type { CustomPricingResult, HostingConfig, HostingPricing } from '@/tools/types'
const logger = createLogger('HostedKey')
/** Re-export so non-tool callers can stay out of `@/tools/types`. */
export type { CustomPricingResult, HostingConfig, HostingPricing } from '@/tools/types'
export interface AcquireHostedKeyResult {
apiKey: string
/** True if the key came from a workspace BYOK entry; false if from the platform pool */
isBYOK: boolean
/** Env var name the platform key came from (only when !isBYOK) */
envVarName?: string
/** Index of the key in the rotation pool (only when !isBYOK) */
keyIndex?: number
}
/**
* Acquire an API key for a hosted resource (tool or LLM provider).
*
* 1. Tries the workspace BYOK key first — if present, returns it without billing.
* 2. Falls back to the platform's rate-limited key pool, distributed round-robin.
*
* Throws an error with `status: 429` and `retryAfterMs` when the workspace is
* rate limited, or `status: 503` when no platform keys are configured.
*
* @param hosting - Hosting config describing the env prefix, BYOK provider, rate limit
* @param workspaceId - Billing actor for rate limiting
* @param resourceId - Identifier used in error/log messages and as the rate-limiter
* provider key when `hosting.byokProviderId` is not set
*/
export async function acquireHostedKey(
hosting: HostingConfig,
workspaceId: string,
resourceId: string
): Promise<AcquireHostedKeyResult> {
if (hosting.byokProviderId) {
try {
const byokResult = await getBYOKKey(workspaceId, hosting.byokProviderId)
if (byokResult) {
logger.info(`Using BYOK key for ${resourceId}`)
return { apiKey: byokResult.apiKey, isBYOK: true }
}
} catch (error) {
logger.error(`Failed to get BYOK key for ${resourceId}:`, error)
}
}
const rateLimiter = getHostedKeyRateLimiter()
const acquireResult = await rateLimiter.acquireKey(
hosting.byokProviderId ?? resourceId,
hosting.envKeyPrefix,
hosting.rateLimit,
workspaceId
)
if (acquireResult.success && acquireResult.key) {
return {
apiKey: acquireResult.key,
isBYOK: false,
envVarName: acquireResult.envVarName,
keyIndex: acquireResult.keyIndex,
}
}
if (acquireResult.billingActorRateLimited) {
const error = new Error(
acquireResult.error || `Rate limit exceeded for ${resourceId}`
) as Error & { status: number; retryAfterMs?: number }
error.status = 429
error.retryAfterMs = acquireResult.retryAfterMs
throw error
}
const error = new Error(
acquireResult.error || `No hosted keys configured for ${resourceId}`
) as Error & { status: number }
error.status = 503
throw error
}
/**
* Resolve a {@link HostingPricing} to a flat `{ cost, metadata? }`.
*
* Mirrors the previous `calculateToolCost()` in `tools/index.ts` so the same
* logic powers both tool cost handling and (after the unification) LLM
* provider cost handling.
*/
export function calculateHostedCost<P>(
pricing: HostingPricing<P>,
params: P,
response: Record<string, unknown>
): CustomPricingResult {
if (pricing.type === 'per_request') {
return { cost: pricing.cost }
}
const result = pricing.getCost(params, response)
return typeof result === 'number' ? { cost: result } : result
}

View File

@@ -5,7 +5,8 @@ import {
type BaseServerTool,
type ServerToolContext,
} from '@/lib/copilot/tools/server/base-tool'
import { getRotatingApiKey } from '@/lib/core/config/api-keys'
import { acquireHostedKey } from '@/lib/api-key/hosted-key'
import { PROVIDER_DEFINITIONS } from '@/providers/models'
import { getServePathPrefix } from '@/lib/uploads'
import {
downloadWorkspaceFile,
@@ -76,8 +77,12 @@ export const generateImageServerTool: BaseServerTool<GenerateImageArgs, Generate
}
try {
const apiKey = getRotatingApiKey('gemini')
const ai = new GoogleGenAI({ apiKey })
const googleHosting = PROVIDER_DEFINITIONS.google?.hosting
if (!googleHosting) {
throw new Error('No hosted key config for google')
}
const acquired = await acquireHostedKey(googleHosting, workspaceId, 'gemini-image')
const ai = new GoogleGenAI({ apiKey: acquired.apiKey })
const aspectRatio = params.aspectRatio || '1:1'
const sizeHint = ASPECT_RATIO_TO_SIZE[aspectRatio]

View File

@@ -1,42 +1,2 @@
import { env } from '@/lib/core/config/env'
/**
* Rotates through available API keys for a provider
* @param provider - The provider to get a key for (e.g., 'openai')
* @returns The selected API key
* @throws Error if no API keys are configured for rotation
*/
export function getRotatingApiKey(provider: string): string {
if (provider !== 'openai' && provider !== 'anthropic' && provider !== 'gemini') {
throw new Error(`No rotation implemented for provider: ${provider}`)
}
const keys = []
if (provider === 'openai') {
if (env.OPENAI_API_KEY_1) keys.push(env.OPENAI_API_KEY_1)
if (env.OPENAI_API_KEY_2) keys.push(env.OPENAI_API_KEY_2)
if (env.OPENAI_API_KEY_3) keys.push(env.OPENAI_API_KEY_3)
} else if (provider === 'anthropic') {
if (env.ANTHROPIC_API_KEY_1) keys.push(env.ANTHROPIC_API_KEY_1)
if (env.ANTHROPIC_API_KEY_2) keys.push(env.ANTHROPIC_API_KEY_2)
if (env.ANTHROPIC_API_KEY_3) keys.push(env.ANTHROPIC_API_KEY_3)
} else if (provider === 'gemini') {
if (env.GEMINI_API_KEY_1) keys.push(env.GEMINI_API_KEY_1)
if (env.GEMINI_API_KEY_2) keys.push(env.GEMINI_API_KEY_2)
if (env.GEMINI_API_KEY_3) keys.push(env.GEMINI_API_KEY_3)
}
if (keys.length === 0) {
throw new Error(
`No API keys configured for rotation. Please configure ${provider.toUpperCase()}_API_KEY_1, ${provider.toUpperCase()}_API_KEY_2, or ${provider.toUpperCase()}_API_KEY_3.`
)
}
// Simple round-robin rotation based on current minute
// This distributes load across keys and is stateless
const currentMinute = new Date().getMinutes()
const keyIndex = currentMinute % keys.length
return keys[keyIndex]
}
// This module previously exported getRotatingApiKey(). That function has been
// replaced by HostedKeyRateLimiter in @/lib/core/rate-limiter/hosted-key.

View File

@@ -1,6 +1,5 @@
import { createEnvMock } from '@sim/testing'
import { afterEach, describe, expect, it, vi } from 'vitest'
import { getRotatingApiKey } from '@/lib/core/config/api-keys'
import { decryptSecret, encryptSecret } from '@/lib/core/security/encryption'
import { cn } from '@/lib/core/utils/cn'
import {
@@ -311,28 +310,3 @@ describe('getInvalidCharacters', () => {
})
})
describe('getRotatingApiKey', () => {
it.concurrent('should return OpenAI API key based on current minute', () => {
const result = getRotatingApiKey('openai')
expect(result).toMatch(/^test-openai-key-[1-3]$/)
})
it.concurrent('should return Anthropic API key based on current minute', () => {
const result = getRotatingApiKey('anthropic')
expect(result).toMatch(/^test-anthropic-key-[1-3]$/)
})
it.concurrent('should return Gemini API key based on current minute', () => {
const result = getRotatingApiKey('gemini')
expect(result).toMatch(/^test-gemini-key-[1-3]$/)
})
it.concurrent('should throw error for unsupported provider', () => {
expect(() => getRotatingApiKey('unsupported')).toThrow('No rotation implemented for provider')
})
it.concurrent('should rotate keys based on minute modulo', () => {
const result = getRotatingApiKey('openai')
expect(['test-openai-key-1', 'test-openai-key-2', 'test-openai-key-3']).toContain(result)
})
})

View File

@@ -1,13 +1,12 @@
import { createLogger } from '@sim/logger'
import { getApiKeyWithBYOK } from '@/lib/api-key/byok'
import { getCostMultiplier } from '@/lib/core/config/feature-flags'
import { calculateHostedCost } from '@/lib/api-key/hosted-key'
import type { StreamingExecution } from '@/executor/types'
import { PROVIDER_DEFINITIONS } from '@/providers/models'
import { getProviderExecutor } from '@/providers/registry'
import type { ProviderId, ProviderRequest, ProviderResponse } from '@/providers/types'
import type { ModelPricing, ProviderId, ProviderRequest, ProviderResponse } from '@/providers/types'
import {
calculateCost,
generateStructuredOutputInstructions,
shouldBillModelUsage,
sumToolCosts,
supportsReasoningEffort,
supportsTemperature,
@@ -15,6 +14,12 @@ import {
supportsVerbosity,
} from '@/providers/utils'
const ZERO_PRICING: ModelPricing = {
input: 0,
output: 0,
updatedAt: new Date(0).toISOString(),
}
const logger = createLogger('Providers')
/**
@@ -67,7 +72,7 @@ export async function executeProviderRequest(
throw new Error(`Provider ${providerId} does not implement executeRequest`)
}
let resolvedRequest = sanitizeRequest(request)
const resolvedRequest = sanitizeRequest(request) as ProviderRequest & Record<string, unknown>
let isBYOK = false
if (request.workspaceId) {
@@ -78,7 +83,8 @@ export async function executeProviderRequest(
request.workspaceId,
request.apiKey
)
resolvedRequest = { ...resolvedRequest, apiKey: result.apiKey }
const apiKeyField = PROVIDER_DEFINITIONS[providerId]?.hosting?.apiKeyParam ?? 'apiKey'
resolvedRequest[apiKeyField] = result.apiKey
isBYOK = result.isBYOK
} catch (error) {
logger.error('Failed to resolve API key:', {
@@ -128,36 +134,36 @@ export async function executeProviderRequest(
}
if (response.tokens) {
const { input: promptTokens = 0, output: completionTokens = 0 } = response.tokens
const useCachedInput = !!request.context && request.context.length > 0
const shouldBill = shouldBillModelUsage(response.model) && !isBYOK
if (shouldBill) {
const costMultiplier = getCostMultiplier()
response.cost = calculateCost(
response.model,
promptTokens,
completionTokens,
useCachedInput,
costMultiplier,
costMultiplier
const hostingPricing = PROVIDER_DEFINITIONS[providerId]?.hosting?.pricing
if (hostingPricing && !isBYOK) {
const result = calculateHostedCost(
hostingPricing,
sanitizedRequest as unknown as Record<string, unknown>,
response as unknown as Record<string, unknown>
)
const meta = (result.metadata ?? {}) as {
input?: number
output?: number
pricing?: ModelPricing
}
response.cost = {
input: meta.input ?? 0,
output: meta.output ?? 0,
total: result.cost,
pricing: meta.pricing ?? ZERO_PRICING,
}
} else {
response.cost = {
input: 0,
output: 0,
total: 0,
pricing: {
input: 0,
output: 0,
updatedAt: new Date().toISOString(),
},
pricing: { ...ZERO_PRICING, updatedAt: new Date().toISOString() },
}
if (isBYOK) {
logger.debug(`Not billing model usage for ${response.model} - workspace BYOK key used`)
} else {
logger.debug(
`Not billing model usage for ${response.model} - user provided API key or not hosted model`
`Not billing model usage for ${response.model} - provider has no hosting.pricing or non-hosted model`
)
}
}

View File

@@ -0,0 +1,100 @@
import { getCostMultiplier } from '@/lib/core/config/feature-flags'
import { getEmbeddingModelPricing, getModelPricing } from '@/providers/models'
import type { ModelPricing } from '@/providers/types'
import type { CustomPricing } from '@/tools/types'
/**
* This module is intentionally a leaf — it imports `getEmbeddingModelPricing`
* and `getModelPricing` from `@/providers/models` but only invokes them inside
* a closure, never at top level. That keeps the import safe inside the
* `models.ts → llm-token-pricing.ts → models.ts` cycle, because all the
* back-references resolve lazily by the time `getCost` actually runs.
*/
const DEFAULT_PRICING: ModelPricing = {
input: 1.0,
cachedInput: 0.5,
output: 5.0,
updatedAt: '2025-03-21',
}
interface LlmCostResult {
input: number
output: number
total: number
pricing: ModelPricing
}
/**
* Calculates token-based cost for an LLM model. Mirrors `calculateCost` in
* `@/providers/utils` but lives in this leaf file so `models.ts` can wire up
* `hosting.pricing` without dragging `utils.ts`'s top-level dependencies into
* a circular import.
*/
function calculateLlmCost(
model: string,
promptTokens: number,
completionTokens: number,
useCachedInput: boolean,
inputMultiplier: number,
outputMultiplier: number
): LlmCostResult {
let pricing = getEmbeddingModelPricing(model)
if (!pricing) pricing = getModelPricing(model)
if (!pricing) {
return { input: 0, output: 0, total: 0, pricing: DEFAULT_PRICING }
}
const inputCost =
promptTokens *
(useCachedInput && pricing.cachedInput
? pricing.cachedInput / 1_000_000
: pricing.input / 1_000_000)
const outputCost = completionTokens * (pricing.output / 1_000_000)
const finalInputCost = inputCost * inputMultiplier
const finalOutputCost = outputCost * outputMultiplier
const finalTotalCost = finalInputCost + finalOutputCost
return {
input: Number.parseFloat(finalInputCost.toFixed(8)),
output: Number.parseFloat(finalOutputCost.toFixed(8)),
total: Number.parseFloat(finalTotalCost.toFixed(8)),
pricing,
}
}
/**
* Build a token-based pricing entry for an LLM provider's hosting config.
*
* The returned `CustomPricing` reads `model` and `tokens` off the provider
* response, runs them through {@link calculateLlmCost}, and packs the
* structured breakdown into `metadata`. `providers/index.ts` reads it back
* out of `metadata` to populate `response.cost`.
*/
export function buildLlmTokenPricing(): CustomPricing {
return {
type: 'custom',
getCost: (params, response) => {
const r = response as { model?: string; tokens?: { input?: number; output?: number } }
const p = params as { context?: unknown[] }
const useCachedInput = Array.isArray(p.context) && p.context.length > 0
const multiplier = getCostMultiplier()
const result = calculateLlmCost(
r.model ?? '',
r.tokens?.input ?? 0,
r.tokens?.output ?? 0,
useCachedInput,
multiplier,
multiplier
)
return {
cost: result.total,
metadata: {
input: result.input,
output: result.output,
pricing: result.pricing,
},
}
},
}
}

View File

@@ -25,6 +25,8 @@ import {
VllmIcon,
xAIIcon,
} from '@/components/icons'
import type { HostingConfig } from '@/lib/api-key/hosted-key'
import { buildLlmTokenPricing } from '@/providers/llm-token-pricing'
import type { ModelPricing, ProviderId } from '@/providers/types'
export interface ModelCapabilities {
@@ -69,6 +71,12 @@ export interface ProviderDefinition {
icon?: React.ComponentType<{ className?: string }>
capabilities?: ModelCapabilities
contextInformationAvailable?: boolean
/**
* Hosted-key configuration. When set, the platform supplies rate-limited API
* keys for this provider and BYOK workspace keys take precedence. See
* {@link HostingConfig} for the shared shape used by tools and providers alike.
*/
hosting?: HostingConfig
}
export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
@@ -123,6 +131,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
toolUsageControl: true,
},
hosting: {
envKeyPrefix: 'OPENAI_API_KEY',
apiKeyParam: 'apiKey',
byokProviderId: 'openai',
rateLimit: { mode: 'per_request', requestsPerMinute: 10000 },
pricing: buildLlmTokenPricing(),
},
models: [
// GPT-4.1 family
{
@@ -478,6 +493,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
toolUsageControl: true,
},
hosting: {
envKeyPrefix: 'ANTHROPIC_API_KEY',
apiKeyParam: 'apiKey',
byokProviderId: 'anthropic',
rateLimit: { mode: 'per_request', requestsPerMinute: 10000 },
pricing: buildLlmTokenPricing(),
},
models: [
{
id: 'claude-opus-4-6',
@@ -1051,6 +1073,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
toolUsageControl: true,
},
hosting: {
envKeyPrefix: 'GEMINI_API_KEY',
apiKeyParam: 'apiKey',
byokProviderId: 'google',
rateLimit: { mode: 'per_request', requestsPerMinute: 10000 },
pricing: buildLlmTokenPricing(),
},
icon: GeminiIcon,
models: [
{
@@ -1732,6 +1761,13 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
capabilities: {
toolUsageControl: true,
},
hosting: {
envKeyPrefix: 'MISTRAL_API_KEY',
apiKeyParam: 'apiKey',
byokProviderId: 'mistral',
rateLimit: { mode: 'per_request', requestsPerMinute: 10000 },
pricing: buildLlmTokenPricing(),
},
models: [
{
id: 'mistral-large-latest',
@@ -2602,11 +2638,15 @@ export function getProvidersWithToolUsageControl(): string[] {
}
export function getHostedModels(): string[] {
return [
...getProviderModels('openai'),
...getProviderModels('anthropic'),
...getProviderModels('google'),
]
const models: string[] = []
for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
if (provider.hosting) {
for (const model of provider.models) {
models.push(model.id)
}
}
}
return models
}
export function getComputerUseModels(): string[] {

View File

@@ -1,5 +1,4 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import * as environmentModule from '@/lib/core/config/feature-flags'
import { describe, expect, it } from 'vitest'
import {
calculateCost,
extractAndParseJSON,
@@ -9,7 +8,6 @@ import {
getAllModelProviders,
getAllModels,
getAllProviderIds,
getApiKey,
getBaseModelProviders,
getHostedModels,
getMaxOutputTokensForModel,
@@ -32,7 +30,6 @@ import {
PROVIDERS_WITH_TOOL_USAGE_CONTROL,
prepareToolExecution,
prepareToolsWithUsageControl,
shouldBillModelUsage,
supportsReasoningEffort,
supportsTemperature,
supportsThinking,
@@ -41,135 +38,6 @@ import {
updateOllamaProviderModels,
} from '@/providers/utils'
const isHostedSpy = vi.spyOn(environmentModule, 'isHosted', 'get') as unknown as {
mockReturnValue: (value: boolean) => void
}
const mockGetRotatingApiKey = vi.fn().mockReturnValue('rotating-server-key')
const originalRequire = module.require
describe('getApiKey', () => {
const originalEnv = { ...process.env }
beforeEach(() => {
vi.clearAllMocks()
isHostedSpy.mockReturnValue(false)
module.require = vi.fn(() => ({
getRotatingApiKey: mockGetRotatingApiKey,
}))
})
afterEach(() => {
process.env = { ...originalEnv }
module.require = originalRequire
})
it.concurrent('should return user-provided key when not in hosted environment', () => {
isHostedSpy.mockReturnValue(false)
const key1 = getApiKey('openai', 'gpt-4', 'user-key-openai')
expect(key1).toBe('user-key-openai')
const key2 = getApiKey('anthropic', 'claude-3', 'user-key-anthropic')
expect(key2).toBe('user-key-anthropic')
const key3 = getApiKey('google', 'gemini-2.5-flash', 'user-key-google')
expect(key3).toBe('user-key-google')
})
it.concurrent('should throw error if no key provided in non-hosted environment', () => {
isHostedSpy.mockReturnValue(false)
expect(() => getApiKey('openai', 'gpt-4')).toThrow('API key is required for openai gpt-4')
expect(() => getApiKey('anthropic', 'claude-3')).toThrow(
'API key is required for anthropic claude-3'
)
})
it.concurrent('should fall back to user key in hosted environment if rotation fails', () => {
isHostedSpy.mockReturnValue(true)
module.require = vi.fn(() => {
throw new Error('Rotation failed')
})
const key = getApiKey('openai', 'gpt-4o', 'user-fallback-key')
expect(key).toBe('user-fallback-key')
})
it.concurrent(
'should throw error in hosted environment if rotation fails and no user key',
() => {
isHostedSpy.mockReturnValue(true)
module.require = vi.fn(() => {
throw new Error('Rotation failed')
})
expect(() => getApiKey('openai', 'gpt-4o')).toThrow('No API key available for openai gpt-4o')
}
)
it.concurrent(
'should require user key for non-OpenAI/Anthropic providers even in hosted environment',
() => {
isHostedSpy.mockReturnValue(true)
const key = getApiKey('other-provider', 'some-model', 'user-key')
expect(key).toBe('user-key')
expect(() => getApiKey('other-provider', 'some-model')).toThrow(
'API key is required for other-provider some-model'
)
}
)
it.concurrent(
'should require user key for models NOT in hosted list even if provider matches',
() => {
isHostedSpy.mockReturnValue(true)
const key1 = getApiKey('anthropic', 'claude-sonnet-4-20250514', 'user-key-anthropic')
expect(key1).toBe('user-key-anthropic')
expect(() => getApiKey('anthropic', 'claude-sonnet-4-20250514')).toThrow(
'API key is required for anthropic claude-sonnet-4-20250514'
)
const key2 = getApiKey('openai', 'gpt-4o-2024-08-06', 'user-key-openai')
expect(key2).toBe('user-key-openai')
expect(() => getApiKey('openai', 'gpt-4o-2024-08-06')).toThrow(
'API key is required for openai gpt-4o-2024-08-06'
)
}
)
it.concurrent('should return empty for ollama provider without requiring API key', () => {
isHostedSpy.mockReturnValue(false)
const key = getApiKey('ollama', 'llama2')
expect(key).toBe('empty')
const key2 = getApiKey('ollama', 'codellama', 'user-key')
expect(key2).toBe('empty')
})
it.concurrent(
'should return empty or user-provided key for vllm provider without requiring API key',
() => {
isHostedSpy.mockReturnValue(false)
const key = getApiKey('vllm', 'vllm/qwen-3')
expect(key).toBe('empty')
const key2 = getApiKey('vllm', 'vllm/llama', 'user-key')
expect(key2).toBe('user-key')
}
)
})
describe('Model Capabilities', () => {
describe('supportsTemperature', () => {
it.concurrent('should return true for models that support temperature', () => {
@@ -827,44 +695,6 @@ describe('getHostedModels', () => {
})
})
describe('shouldBillModelUsage', () => {
it.concurrent('should return true for exact matches of hosted models', () => {
expect(shouldBillModelUsage('gpt-4o')).toBe(true)
expect(shouldBillModelUsage('o1')).toBe(true)
expect(shouldBillModelUsage('claude-sonnet-4-0')).toBe(true)
expect(shouldBillModelUsage('claude-opus-4-0')).toBe(true)
expect(shouldBillModelUsage('gemini-2.5-pro')).toBe(true)
expect(shouldBillModelUsage('gemini-2.5-flash')).toBe(true)
})
it.concurrent('should return false for non-hosted models', () => {
expect(shouldBillModelUsage('deepseek-v3')).toBe(false)
expect(shouldBillModelUsage('grok-4-latest')).toBe(false)
expect(shouldBillModelUsage('unknown-model')).toBe(false)
})
it.concurrent('should return false for versioned model names not in hosted list', () => {
expect(shouldBillModelUsage('claude-sonnet-4-20250514')).toBe(false)
expect(shouldBillModelUsage('gpt-4o-2024-08-06')).toBe(false)
expect(shouldBillModelUsage('claude-3-5-sonnet-20241022')).toBe(false)
})
it.concurrent('should be case insensitive', () => {
expect(shouldBillModelUsage('GPT-4O')).toBe(true)
expect(shouldBillModelUsage('Claude-Sonnet-4-0')).toBe(true)
expect(shouldBillModelUsage('GEMINI-2.5-PRO')).toBe(true)
})
it.concurrent('should not match partial model names', () => {
expect(shouldBillModelUsage('gpt-4')).toBe(false)
expect(shouldBillModelUsage('claude-sonnet')).toBe(false)
expect(shouldBillModelUsage('gemini')).toBe(false)
})
})
describe('Provider Management', () => {
describe('getProviderFromModel', () => {
it.concurrent('should return correct provider for known models', () => {

View File

@@ -4,7 +4,6 @@ import type { ChatCompletionChunk } from 'openai/resources/chat/completions'
import type { CompletionUsage } from 'openai/resources/completions'
import { dollarsToCredits } from '@/lib/billing/credits/conversion'
import { env } from '@/lib/core/config/env'
import { isHosted } from '@/lib/core/config/feature-flags'
import {
buildCanonicalIndex,
type CanonicalGroup,
@@ -39,7 +38,6 @@ import {
updateOllamaModels as updateOllamaModelsInDefinitions,
} from '@/providers/models'
import type { ProviderId, ProviderToolConfig } from '@/providers/types'
import { useProvidersStore } from '@/stores/providers/store'
import { mergeToolParameters } from '@/tools/params'
const logger = createLogger('ProviderUtils')
@@ -707,17 +705,6 @@ export function getHostedModels(): string[] {
return getHostedModelsFromDefinitions()
}
/**
* Determine if model usage should be billed to the user
*
* @param model The model name
* @returns true if the usage should be billed to the user
*/
export function shouldBillModelUsage(model: string): boolean {
const hostedModels = getHostedModels()
return hostedModels.some((hostedModel) => model.toLowerCase() === hostedModel.toLowerCase())
}
/**
* Placeholder returned for providers that use their own credential mechanism
* rather than a user-supplied API key (e.g. AWS Bedrock via IAM/instance profiles).
@@ -725,61 +712,6 @@ export function shouldBillModelUsage(model: string): boolean {
*/
export const PROVIDER_PLACEHOLDER_KEY = 'provider-uses-own-credentials'
/**
* Get an API key for a specific provider, handling rotation and fallbacks
* For use server-side only
*/
export function getApiKey(provider: string, model: string, userProvidedKey?: string): string {
const hasUserKey = !!userProvidedKey
const isOllamaModel =
provider === 'ollama' || useProvidersStore.getState().providers.ollama.models.includes(model)
if (isOllamaModel) {
return 'empty'
}
const isVllmModel =
provider === 'vllm' || useProvidersStore.getState().providers.vllm.models.includes(model)
if (isVllmModel) {
return userProvidedKey || 'empty'
}
// Bedrock uses its own credentials (bedrockAccessKeyId/bedrockSecretKey), not apiKey
const isBedrockModel = provider === 'bedrock' || model.startsWith('bedrock/')
if (isBedrockModel) {
return PROVIDER_PLACEHOLDER_KEY
}
const isOpenAIModel = provider === 'openai'
const isClaudeModel = provider === 'anthropic'
const isGeminiModel = provider === 'google'
if (isHosted && (isOpenAIModel || isClaudeModel || isGeminiModel)) {
const hostedModels = getHostedModels()
const isModelHosted = hostedModels.some((m) => m.toLowerCase() === model.toLowerCase())
if (isModelHosted) {
try {
const { getRotatingApiKey } = require('@/lib/core/config/api-keys')
const serverKey = getRotatingApiKey(isGeminiModel ? 'gemini' : provider)
return serverKey
} catch (_error) {
if (hasUserKey) {
return userProvidedKey!
}
throw new Error(`No API key available for ${provider} ${model}`)
}
}
}
if (!hasUserKey) {
throw new Error(`API key is required for ${provider} ${model}`)
}
return userProvidedKey!
}
/**
* Prepares tool configuration for provider requests with consistent tool usage control behavior
*

View File

@@ -1,5 +1,5 @@
import { createLogger } from '@sim/logger'
import { getBYOKKey } from '@/lib/api-key/byok'
import { acquireHostedKey, calculateHostedCost } from '@/lib/api-key/hosted-key'
import { generateInternalToken } from '@/lib/auth/internal'
import { isHosted } from '@/lib/core/config/feature-flags'
import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/core/execution-limits'
@@ -19,10 +19,8 @@ import type { ExecutionContext } from '@/executor/types'
import type { ErrorInfo } from '@/tools/error-extractors'
import { extractErrorMessage } from '@/tools/error-extractors'
import type {
BYOKProviderId,
OAuthTokenPayload,
ToolConfig,
ToolHostingPricing,
ToolResponse,
ToolRetryConfig,
} from '@/tools/types'
@@ -68,8 +66,9 @@ interface HostedKeyInjectionResult {
/**
* Inject hosted API key if tool supports it and user didn't provide one.
* Checks BYOK workspace keys first, then uses the HostedKeyRateLimiter for round-robin key selection.
* Returns whether a hosted (billable) key was injected and which env var it came from.
* Delegates to the shared {@link acquireHostedKey} helper for the BYOK →
* rate-limiter → 429/503 ladder; this wrapper adds tool-specific concerns
* (parameter injection, telemetry, the `{ isUsingHostedKey, envVarName }` return shape).
*/
async function injectHostedKeyIfNeeded(
tool: ToolConfig,
@@ -80,81 +79,51 @@ async function injectHostedKeyIfNeeded(
if (!tool.hosting) return { isUsingHostedKey: false }
if (!isHosted) return { isUsingHostedKey: false }
const { envKeyPrefix, apiKeyParam, byokProviderId, rateLimit } = tool.hosting
const { workspaceId, userId, workflowId } = resolveToolScope(params, executionContext)
// Check BYOK workspace key first
if (byokProviderId && workspaceId) {
try {
const byokResult = await getBYOKKey(workspaceId, byokProviderId as BYOKProviderId)
if (byokResult) {
params[apiKeyParam] = byokResult.apiKey
logger.info(`[${requestId}] Using BYOK key for ${tool.id}`)
return { isUsingHostedKey: false } // Don't bill - user's own key
}
} catch (error) {
logger.error(`[${requestId}] Failed to get BYOK key for ${tool.id}:`, error)
// Fall through to hosted key
}
}
const rateLimiter = getHostedKeyRateLimiter()
const provider = byokProviderId || tool.id
const billingActorId = workspaceId
if (!billingActorId) {
if (!workspaceId) {
logger.error(`[${requestId}] No workspace ID available for hosted key rate limiting`)
return { isUsingHostedKey: false }
}
const acquireResult = await rateLimiter.acquireKey(
provider,
envKeyPrefix,
rateLimit,
billingActorId
)
try {
const result = await acquireHostedKey(tool.hosting, workspaceId, tool.id)
if (!acquireResult.success && acquireResult.billingActorRateLimited) {
logger.warn(`[${requestId}] Billing actor ${billingActorId} rate limited for ${tool.id}`, {
provider,
retryAfterMs: acquireResult.retryAfterMs,
params[tool.hosting.apiKeyParam] = result.apiKey
if (result.isBYOK) {
logger.info(`[${requestId}] Using BYOK key for ${tool.id}`)
return { isUsingHostedKey: false } // Don't bill - user's own key
}
logger.info(`[${requestId}] Using hosted key for ${tool.id} (${result.envVarName})`, {
keyIndex: result.keyIndex,
provider: tool.hosting.byokProviderId || tool.id,
})
PlatformEvents.hostedKeyUserThrottled({
toolId: tool.id,
reason: 'billing_actor_limit',
provider,
retryAfterMs: acquireResult.retryAfterMs ?? 0,
userId,
workspaceId,
workflowId,
})
const error = new Error(acquireResult.error || `Rate limit exceeded for ${tool.id}`)
;(error as any).status = 429
;(error as any).retryAfterMs = acquireResult.retryAfterMs
return { isUsingHostedKey: true, envVarName: result.envVarName }
} catch (error) {
const status = (error as { status?: number }).status
if (status === 429) {
const retryAfterMs = (error as { retryAfterMs?: number }).retryAfterMs
logger.warn(`[${requestId}] Billing actor ${workspaceId} rate limited for ${tool.id}`, {
provider: tool.hosting.byokProviderId || tool.id,
retryAfterMs,
})
PlatformEvents.hostedKeyUserThrottled({
toolId: tool.id,
reason: 'billing_actor_limit',
provider: tool.hosting.byokProviderId || tool.id,
retryAfterMs: retryAfterMs ?? 0,
userId,
workspaceId,
workflowId,
})
} else if (status === 503) {
logger.error(
`[${requestId}] No hosted keys configured for ${tool.id}: ${(error as Error).message}`
)
}
throw error
}
// Handle no keys configured (503)
if (!acquireResult.success) {
logger.error(`[${requestId}] No hosted keys configured for ${tool.id}: ${acquireResult.error}`)
const error = new Error(acquireResult.error || `No hosted keys configured for ${tool.id}`)
;(error as any).status = 503
throw error
}
params[apiKeyParam] = acquireResult.key
logger.info(`[${requestId}] Using hosted key for ${tool.id} (${acquireResult.envVarName})`, {
keyIndex: acquireResult.keyIndex,
provider,
})
return {
isUsingHostedKey: true,
envVarName: acquireResult.envVarName,
}
}
/**
@@ -241,39 +210,6 @@ async function executeWithRetry<T>(
throw lastError
}
/** Result from cost calculation */
interface ToolCostResult {
cost: number
metadata?: Record<string, unknown>
}
/**
* Calculate cost based on pricing model
*/
function calculateToolCost(
pricing: ToolHostingPricing,
params: Record<string, unknown>,
response: Record<string, unknown>
): ToolCostResult {
switch (pricing.type) {
case 'per_request':
return { cost: pricing.cost }
case 'custom': {
const result = pricing.getCost(params, response)
if (typeof result === 'number') {
return { cost: result }
}
return result
}
default: {
const exhaustiveCheck: never = pricing
throw new Error(`Unknown pricing type: ${(exhaustiveCheck as ToolHostingPricing).type}`)
}
}
}
interface HostedKeyCostResult {
cost: number
metadata?: Record<string, unknown>
@@ -281,7 +217,8 @@ interface HostedKeyCostResult {
/**
* Calculate and log hosted key cost for a tool execution.
* Logs to usageLog for audit trail and returns cost + metadata for output.
* Delegates to the shared {@link calculateHostedCost} helper, then adds
* tool-specific logging.
*/
async function processHostedKeyCost(
tool: ToolConfig,
@@ -294,7 +231,7 @@ async function processHostedKeyCost(
return { cost: 0 }
}
const { cost, metadata } = calculateToolCost(tool.hosting.pricing, params, response)
const { cost, metadata } = calculateHostedCost(tool.hosting.pricing, params, response)
if (cost <= 0) return { cost: 0 }

View File

@@ -175,7 +175,7 @@ export interface ToolConfig<P = any, R = any> {
* When configured, the tool can use Sim's hosted API keys if user doesn't provide their own.
* Usage is billed according to the pricing config.
*/
hosting?: ToolHostingConfig<P>
hosting?: HostingConfig<P>
}
export interface TableRow {
@@ -274,11 +274,15 @@ export interface CustomPricing<P = Record<string, unknown>> {
}
/** Union of all pricing models */
export type ToolHostingPricing<P = Record<string, unknown>> = PerRequestPricing | CustomPricing<P>
export type HostingPricing<P = Record<string, unknown>> = PerRequestPricing | CustomPricing<P>
/** @deprecated Use {@link HostingPricing} */
export type ToolHostingPricing<P = Record<string, unknown>> = HostingPricing<P>
/**
* Configuration for hosted API key support.
* When configured, the tool can use Sim's hosted API keys if user doesn't provide their own.
* Used by both tools (e.g. Exa) and LLM providers to declare that the platform
* supplies rate-limited keys for this resource and BYOK workspace keys take precedence.
*
* ### Hosted key env var convention
*
@@ -300,19 +304,27 @@ export type ToolHostingPricing<P = Record<string, unknown>> = PerRequestPricing
* Adding more keys only requires updating the count and adding the new env var —
* no code changes needed.
*/
export interface ToolHostingConfig<P = Record<string, unknown>> {
export interface HostingConfig<P = Record<string, unknown>> {
/**
* Env var name prefix for hosted keys.
* At runtime, `{envKeyPrefix}_COUNT` is read to determine how many keys exist,
* then `{envKeyPrefix}_1` through `{envKeyPrefix}_N` are resolved.
*/
envKeyPrefix: string
/** The parameter name that receives the API key */
/**
* Name of the field on the downstream request/params that receives the
* resolved API key. For tools this is whatever the tool's request body
* expects (commonly `'apiKey'`). For LLM providers this is `'apiKey'` —
* the typed field on `ProviderRequest`.
*/
apiKeyParam: string
/** BYOK provider ID for workspace key lookup */
byokProviderId?: BYOKProviderId
/** Pricing when using hosted key */
pricing: ToolHostingPricing<P>
/** Pricing for usage of the hosted key */
pricing: HostingPricing<P>
/** Hosted key rate limit configuration (required for hosted key distribution) */
rateLimit: HostedKeyRateLimitConfig
}
/** @deprecated Use {@link HostingConfig} */
export type ToolHostingConfig<P = Record<string, unknown>> = HostingConfig<P>

View File

@@ -44,9 +44,7 @@ vi.mock('@/lib/core/config/environment', () => ({
isHosted: false,
}))
vi.mock('@/lib/core/config/api-keys', () => ({
getRotatingApiKey: vi.fn(),
}))
vi.mock('@/lib/core/config/api-keys', () => ({}))
// Tools module
vi.mock('@/tools')