mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-28 03:00:29 -04:00
feat(models): updated model configs, updated anthropic provider to propagate errors back to user if any
This commit is contained in:
@@ -164,6 +164,7 @@ Return ONLY the JSON array.`,
|
|||||||
type: 'dropdown',
|
type: 'dropdown',
|
||||||
placeholder: 'Select reasoning effort...',
|
placeholder: 'Select reasoning effort...',
|
||||||
options: [
|
options: [
|
||||||
|
{ label: 'auto', id: 'auto' },
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
{ label: 'high', id: 'high' },
|
{ label: 'high', id: 'high' },
|
||||||
@@ -173,9 +174,12 @@ Return ONLY the JSON array.`,
|
|||||||
const { useSubBlockStore } = await import('@/stores/workflows/subblock/store')
|
const { useSubBlockStore } = await import('@/stores/workflows/subblock/store')
|
||||||
const { useWorkflowRegistry } = await import('@/stores/workflows/registry/store')
|
const { useWorkflowRegistry } = await import('@/stores/workflows/registry/store')
|
||||||
|
|
||||||
|
const autoOption = { label: 'auto', id: 'auto' }
|
||||||
|
|
||||||
const activeWorkflowId = useWorkflowRegistry.getState().activeWorkflowId
|
const activeWorkflowId = useWorkflowRegistry.getState().activeWorkflowId
|
||||||
if (!activeWorkflowId) {
|
if (!activeWorkflowId) {
|
||||||
return [
|
return [
|
||||||
|
autoOption,
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
{ label: 'high', id: 'high' },
|
{ label: 'high', id: 'high' },
|
||||||
@@ -188,6 +192,7 @@ Return ONLY the JSON array.`,
|
|||||||
|
|
||||||
if (!modelValue) {
|
if (!modelValue) {
|
||||||
return [
|
return [
|
||||||
|
autoOption,
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
{ label: 'high', id: 'high' },
|
{ label: 'high', id: 'high' },
|
||||||
@@ -197,15 +202,15 @@ Return ONLY the JSON array.`,
|
|||||||
const validOptions = getReasoningEffortValuesForModel(modelValue)
|
const validOptions = getReasoningEffortValuesForModel(modelValue)
|
||||||
if (!validOptions) {
|
if (!validOptions) {
|
||||||
return [
|
return [
|
||||||
|
autoOption,
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
{ label: 'high', id: 'high' },
|
{ label: 'high', id: 'high' },
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
return validOptions.map((opt) => ({ label: opt, id: opt }))
|
return [autoOption, ...validOptions.map((opt) => ({ label: opt, id: opt }))]
|
||||||
},
|
},
|
||||||
value: () => 'medium',
|
|
||||||
condition: {
|
condition: {
|
||||||
field: 'model',
|
field: 'model',
|
||||||
value: MODELS_WITH_REASONING_EFFORT,
|
value: MODELS_WITH_REASONING_EFFORT,
|
||||||
@@ -217,6 +222,7 @@ Return ONLY the JSON array.`,
|
|||||||
type: 'dropdown',
|
type: 'dropdown',
|
||||||
placeholder: 'Select verbosity...',
|
placeholder: 'Select verbosity...',
|
||||||
options: [
|
options: [
|
||||||
|
{ label: 'auto', id: 'auto' },
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
{ label: 'high', id: 'high' },
|
{ label: 'high', id: 'high' },
|
||||||
@@ -226,9 +232,12 @@ Return ONLY the JSON array.`,
|
|||||||
const { useSubBlockStore } = await import('@/stores/workflows/subblock/store')
|
const { useSubBlockStore } = await import('@/stores/workflows/subblock/store')
|
||||||
const { useWorkflowRegistry } = await import('@/stores/workflows/registry/store')
|
const { useWorkflowRegistry } = await import('@/stores/workflows/registry/store')
|
||||||
|
|
||||||
|
const autoOption = { label: 'auto', id: 'auto' }
|
||||||
|
|
||||||
const activeWorkflowId = useWorkflowRegistry.getState().activeWorkflowId
|
const activeWorkflowId = useWorkflowRegistry.getState().activeWorkflowId
|
||||||
if (!activeWorkflowId) {
|
if (!activeWorkflowId) {
|
||||||
return [
|
return [
|
||||||
|
autoOption,
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
{ label: 'high', id: 'high' },
|
{ label: 'high', id: 'high' },
|
||||||
@@ -241,6 +250,7 @@ Return ONLY the JSON array.`,
|
|||||||
|
|
||||||
if (!modelValue) {
|
if (!modelValue) {
|
||||||
return [
|
return [
|
||||||
|
autoOption,
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
{ label: 'high', id: 'high' },
|
{ label: 'high', id: 'high' },
|
||||||
@@ -250,15 +260,15 @@ Return ONLY the JSON array.`,
|
|||||||
const validOptions = getVerbosityValuesForModel(modelValue)
|
const validOptions = getVerbosityValuesForModel(modelValue)
|
||||||
if (!validOptions) {
|
if (!validOptions) {
|
||||||
return [
|
return [
|
||||||
|
autoOption,
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
{ label: 'high', id: 'high' },
|
{ label: 'high', id: 'high' },
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
return validOptions.map((opt) => ({ label: opt, id: opt }))
|
return [autoOption, ...validOptions.map((opt) => ({ label: opt, id: opt }))]
|
||||||
},
|
},
|
||||||
value: () => 'medium',
|
|
||||||
condition: {
|
condition: {
|
||||||
field: 'model',
|
field: 'model',
|
||||||
value: MODELS_WITH_VERBOSITY,
|
value: MODELS_WITH_VERBOSITY,
|
||||||
@@ -270,6 +280,7 @@ Return ONLY the JSON array.`,
|
|||||||
type: 'dropdown',
|
type: 'dropdown',
|
||||||
placeholder: 'Select thinking level...',
|
placeholder: 'Select thinking level...',
|
||||||
options: [
|
options: [
|
||||||
|
{ label: 'none', id: 'none' },
|
||||||
{ label: 'minimal', id: 'minimal' },
|
{ label: 'minimal', id: 'minimal' },
|
||||||
{ label: 'low', id: 'low' },
|
{ label: 'low', id: 'low' },
|
||||||
{ label: 'medium', id: 'medium' },
|
{ label: 'medium', id: 'medium' },
|
||||||
@@ -281,12 +292,11 @@ Return ONLY the JSON array.`,
|
|||||||
const { useSubBlockStore } = await import('@/stores/workflows/subblock/store')
|
const { useSubBlockStore } = await import('@/stores/workflows/subblock/store')
|
||||||
const { useWorkflowRegistry } = await import('@/stores/workflows/registry/store')
|
const { useWorkflowRegistry } = await import('@/stores/workflows/registry/store')
|
||||||
|
|
||||||
|
const noneOption = { label: 'none', id: 'none' }
|
||||||
|
|
||||||
const activeWorkflowId = useWorkflowRegistry.getState().activeWorkflowId
|
const activeWorkflowId = useWorkflowRegistry.getState().activeWorkflowId
|
||||||
if (!activeWorkflowId) {
|
if (!activeWorkflowId) {
|
||||||
return [
|
return [noneOption, { label: 'low', id: 'low' }, { label: 'high', id: 'high' }]
|
||||||
{ label: 'low', id: 'low' },
|
|
||||||
{ label: 'high', id: 'high' },
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const workflowValues = useSubBlockStore.getState().workflowValues[activeWorkflowId]
|
const workflowValues = useSubBlockStore.getState().workflowValues[activeWorkflowId]
|
||||||
@@ -294,23 +304,16 @@ Return ONLY the JSON array.`,
|
|||||||
const modelValue = blockValues?.model as string
|
const modelValue = blockValues?.model as string
|
||||||
|
|
||||||
if (!modelValue) {
|
if (!modelValue) {
|
||||||
return [
|
return [noneOption, { label: 'low', id: 'low' }, { label: 'high', id: 'high' }]
|
||||||
{ label: 'low', id: 'low' },
|
|
||||||
{ label: 'high', id: 'high' },
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const validOptions = getThinkingLevelsForModel(modelValue)
|
const validOptions = getThinkingLevelsForModel(modelValue)
|
||||||
if (!validOptions) {
|
if (!validOptions) {
|
||||||
return [
|
return [noneOption, { label: 'low', id: 'low' }, { label: 'high', id: 'high' }]
|
||||||
{ label: 'low', id: 'low' },
|
|
||||||
{ label: 'high', id: 'high' },
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return validOptions.map((opt) => ({ label: opt, id: opt }))
|
return [noneOption, ...validOptions.map((opt) => ({ label: opt, id: opt }))]
|
||||||
},
|
},
|
||||||
value: () => 'high',
|
|
||||||
condition: {
|
condition: {
|
||||||
field: 'model',
|
field: 'model',
|
||||||
value: MODELS_WITH_THINKING,
|
value: MODELS_WITH_THINKING,
|
||||||
|
|||||||
@@ -906,24 +906,17 @@ export class AgentBlockHandler implements BlockHandler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find first system message
|
|
||||||
const firstSystemIndex = messages.findIndex((msg) => msg.role === 'system')
|
const firstSystemIndex = messages.findIndex((msg) => msg.role === 'system')
|
||||||
|
|
||||||
if (firstSystemIndex === -1) {
|
if (firstSystemIndex === -1) {
|
||||||
// No system message exists - add at position 0
|
|
||||||
messages.unshift({ role: 'system', content })
|
messages.unshift({ role: 'system', content })
|
||||||
} else if (firstSystemIndex === 0) {
|
} else if (firstSystemIndex === 0) {
|
||||||
// System message already at position 0 - replace it
|
|
||||||
// Explicit systemPrompt parameter takes precedence over memory/messages
|
|
||||||
messages[0] = { role: 'system', content }
|
messages[0] = { role: 'system', content }
|
||||||
} else {
|
} else {
|
||||||
// System message exists but not at position 0 - move it to position 0
|
|
||||||
// and update with new content
|
|
||||||
messages.splice(firstSystemIndex, 1)
|
messages.splice(firstSystemIndex, 1)
|
||||||
messages.unshift({ role: 'system', content })
|
messages.unshift({ role: 'system', content })
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove any additional system messages (keep only the first one)
|
|
||||||
for (let i = messages.length - 1; i >= 1; i--) {
|
for (let i = messages.length - 1; i >= 1; i--) {
|
||||||
if (messages[i].role === 'system') {
|
if (messages[i].role === 'system') {
|
||||||
messages.splice(i, 1)
|
messages.splice(i, 1)
|
||||||
@@ -996,6 +989,7 @@ export class AgentBlockHandler implements BlockHandler {
|
|||||||
blockNameMapping,
|
blockNameMapping,
|
||||||
reasoningEffort: inputs.reasoningEffort,
|
reasoningEffort: inputs.reasoningEffort,
|
||||||
verbosity: inputs.verbosity,
|
verbosity: inputs.verbosity,
|
||||||
|
thinkingLevel: inputs.thinkingLevel,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1064,6 +1058,7 @@ export class AgentBlockHandler implements BlockHandler {
|
|||||||
isDeployedContext: ctx.isDeployedContext,
|
isDeployedContext: ctx.isDeployedContext,
|
||||||
reasoningEffort: providerRequest.reasoningEffort,
|
reasoningEffort: providerRequest.reasoningEffort,
|
||||||
verbosity: providerRequest.verbosity,
|
verbosity: providerRequest.verbosity,
|
||||||
|
thinkingLevel: providerRequest.thinkingLevel,
|
||||||
})
|
})
|
||||||
|
|
||||||
return this.processProviderResponse(response, block, responseFormat)
|
return this.processProviderResponse(response, block, responseFormat)
|
||||||
@@ -1081,8 +1076,6 @@ export class AgentBlockHandler implements BlockHandler {
|
|||||||
|
|
||||||
logger.info(`[${requestId}] Resolving Vertex AI credential: ${credentialId}`)
|
logger.info(`[${requestId}] Resolving Vertex AI credential: ${credentialId}`)
|
||||||
|
|
||||||
// Get the credential - we need to find the owner
|
|
||||||
// Since we're in a workflow context, we can query the credential directly
|
|
||||||
const credential = await db.query.account.findFirst({
|
const credential = await db.query.account.findFirst({
|
||||||
where: eq(account.id, credentialId),
|
where: eq(account.id, credentialId),
|
||||||
})
|
})
|
||||||
@@ -1091,7 +1084,6 @@ export class AgentBlockHandler implements BlockHandler {
|
|||||||
throw new Error(`Vertex AI credential not found: ${credentialId}`)
|
throw new Error(`Vertex AI credential not found: ${credentialId}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Refresh the token if needed
|
|
||||||
const { accessToken } = await refreshTokenIfNeeded(requestId, credential, credentialId)
|
const { accessToken } = await refreshTokenIfNeeded(requestId, credential, credentialId)
|
||||||
|
|
||||||
if (!accessToken) {
|
if (!accessToken) {
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ export interface AgentInputs {
|
|||||||
bedrockRegion?: string
|
bedrockRegion?: string
|
||||||
reasoningEffort?: string
|
reasoningEffort?: string
|
||||||
verbosity?: string
|
verbosity?: string
|
||||||
|
thinkingLevel?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ToolInput {
|
export interface ToolInput {
|
||||||
|
|||||||
@@ -113,6 +113,28 @@ function buildThinkingConfig(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Anthropic SDK requires streaming for non-streaming requests when max_tokens exceeds
|
||||||
|
* this threshold, to avoid HTTP timeouts. When thinking is enabled and pushes max_tokens
|
||||||
|
* above this limit, we use streaming internally and collect the final message.
|
||||||
|
*/
|
||||||
|
const ANTHROPIC_SDK_NON_STREAMING_MAX_TOKENS = 21333
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an Anthropic message, automatically using streaming internally when max_tokens
|
||||||
|
* exceeds the SDK's non-streaming threshold. Returns the same Message object either way.
|
||||||
|
*/
|
||||||
|
async function createMessage(
|
||||||
|
anthropic: Anthropic,
|
||||||
|
payload: any
|
||||||
|
): Promise<Anthropic.Messages.Message> {
|
||||||
|
if (payload.max_tokens > ANTHROPIC_SDK_NON_STREAMING_MAX_TOKENS && !payload.stream) {
|
||||||
|
const stream = anthropic.messages.stream(payload)
|
||||||
|
return stream.finalMessage()
|
||||||
|
}
|
||||||
|
return anthropic.messages.create(payload) as Promise<Anthropic.Messages.Message>
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Executes a request using the Anthropic API with full tool loop support.
|
* Executes a request using the Anthropic API with full tool loop support.
|
||||||
* This is the shared core implementation used by both the standard Anthropic provider
|
* This is the shared core implementation used by both the standard Anthropic provider
|
||||||
@@ -268,13 +290,35 @@ export async function executeAnthropicProviderRequest(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add extended thinking configuration if supported and requested
|
// Add extended thinking configuration if supported and requested
|
||||||
if (request.thinkingLevel) {
|
// The 'none' sentinel means "disable thinking" — skip configuration entirely.
|
||||||
|
if (request.thinkingLevel && request.thinkingLevel !== 'none') {
|
||||||
const thinkingConfig = buildThinkingConfig(request.model, request.thinkingLevel)
|
const thinkingConfig = buildThinkingConfig(request.model, request.thinkingLevel)
|
||||||
if (thinkingConfig) {
|
if (thinkingConfig) {
|
||||||
payload.thinking = thinkingConfig.thinking
|
payload.thinking = thinkingConfig.thinking
|
||||||
if (thinkingConfig.outputConfig) {
|
if (thinkingConfig.outputConfig) {
|
||||||
payload.output_config = thinkingConfig.outputConfig
|
payload.output_config = thinkingConfig.outputConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Per Anthropic docs: budget_tokens must be less than max_tokens.
|
||||||
|
// Ensure max_tokens leaves room for both thinking and text output.
|
||||||
|
if (
|
||||||
|
thinkingConfig.thinking.type === 'enabled' &&
|
||||||
|
'budget_tokens' in thinkingConfig.thinking
|
||||||
|
) {
|
||||||
|
const budgetTokens = thinkingConfig.thinking.budget_tokens
|
||||||
|
const minMaxTokens = budgetTokens + 4096
|
||||||
|
if (payload.max_tokens < minMaxTokens) {
|
||||||
|
const modelMax = getMaxOutputTokensForModel(request.model, true)
|
||||||
|
payload.max_tokens = Math.min(minMaxTokens, modelMax)
|
||||||
|
logger.info(
|
||||||
|
`Adjusted max_tokens to ${payload.max_tokens} to satisfy budget_tokens (${budgetTokens}) constraint`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per Anthropic docs: thinking is not compatible with temperature or top_k modifications.
|
||||||
|
payload.temperature = undefined
|
||||||
|
|
||||||
const isAdaptive = thinkingConfig.thinking.type === 'adaptive'
|
const isAdaptive = thinkingConfig.thinking.type === 'adaptive'
|
||||||
logger.info(
|
logger.info(
|
||||||
`Using ${isAdaptive ? 'adaptive' : 'extended'} thinking for model: ${modelId} with ${isAdaptive ? `effort: ${request.thinkingLevel}` : `budget: ${(thinkingConfig.thinking as { budget_tokens: number }).budget_tokens}`}`
|
`Using ${isAdaptive ? 'adaptive' : 'extended'} thinking for model: ${modelId} with ${isAdaptive ? `effort: ${request.thinkingLevel}` : `budget: ${(thinkingConfig.thinking as { budget_tokens: number }).budget_tokens}`}`
|
||||||
@@ -288,7 +332,16 @@ export async function executeAnthropicProviderRequest(
|
|||||||
|
|
||||||
if (anthropicTools?.length) {
|
if (anthropicTools?.length) {
|
||||||
payload.tools = anthropicTools
|
payload.tools = anthropicTools
|
||||||
if (toolChoice !== 'auto') {
|
// Per Anthropic docs: forced tool_choice (type: "tool" or "any") is incompatible with
|
||||||
|
// thinking. Only auto and none are supported when thinking is enabled.
|
||||||
|
if (payload.thinking) {
|
||||||
|
// Per Anthropic docs: only 'auto' (default) and 'none' work with thinking.
|
||||||
|
if (toolChoice === 'none') {
|
||||||
|
payload.tool_choice = { type: 'none' }
|
||||||
|
}
|
||||||
|
} else if (toolChoice === 'none') {
|
||||||
|
payload.tool_choice = { type: 'none' }
|
||||||
|
} else if (toolChoice !== 'auto') {
|
||||||
payload.tool_choice = toolChoice
|
payload.tool_choice = toolChoice
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -386,12 +439,16 @@ export async function executeAnthropicProviderRequest(
|
|||||||
const providerStartTimeISO = new Date(providerStartTime).toISOString()
|
const providerStartTimeISO = new Date(providerStartTime).toISOString()
|
||||||
|
|
||||||
// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
|
// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
|
||||||
// but allow users to set lower values if desired
|
// but allow users to set lower values if desired. Use Math.max to preserve
|
||||||
|
// thinking-adjusted max_tokens from payload when it's higher.
|
||||||
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
|
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
|
||||||
const nonStreamingMaxTokens = request.maxTokens
|
const nonStreamingMaxTokens = request.maxTokens
|
||||||
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
|
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
|
||||||
: nonStreamingLimit
|
: nonStreamingLimit
|
||||||
const intermediatePayload = { ...payload, max_tokens: nonStreamingMaxTokens }
|
const intermediatePayload = {
|
||||||
|
...payload,
|
||||||
|
max_tokens: Math.max(nonStreamingMaxTokens, payload.max_tokens),
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const initialCallTime = Date.now()
|
const initialCallTime = Date.now()
|
||||||
@@ -399,7 +456,7 @@ export async function executeAnthropicProviderRequest(
|
|||||||
const forcedTools = preparedTools?.forcedTools || []
|
const forcedTools = preparedTools?.forcedTools || []
|
||||||
let usedForcedTools: string[] = []
|
let usedForcedTools: string[] = []
|
||||||
|
|
||||||
let currentResponse = await anthropic.messages.create(intermediatePayload)
|
let currentResponse = await createMessage(anthropic, intermediatePayload)
|
||||||
const firstResponseTime = Date.now() - initialCallTime
|
const firstResponseTime = Date.now() - initialCallTime
|
||||||
|
|
||||||
let content = ''
|
let content = ''
|
||||||
@@ -583,11 +640,20 @@ export async function executeAnthropicProviderRequest(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add ONE assistant message with ALL tool_use blocks
|
// Per Anthropic docs: thinking blocks must be preserved in assistant messages
|
||||||
|
// during tool use to maintain reasoning continuity.
|
||||||
|
const thinkingBlocks = currentResponse.content.filter(
|
||||||
|
(item) => item.type === 'thinking' || item.type === 'redacted_thinking'
|
||||||
|
)
|
||||||
|
|
||||||
|
// Add ONE assistant message with thinking + tool_use blocks
|
||||||
if (toolUseBlocks.length > 0) {
|
if (toolUseBlocks.length > 0) {
|
||||||
currentMessages.push({
|
currentMessages.push({
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: toolUseBlocks as unknown as Anthropic.Messages.ContentBlock[],
|
content: [
|
||||||
|
...thinkingBlocks,
|
||||||
|
...toolUseBlocks,
|
||||||
|
] as unknown as Anthropic.Messages.ContentBlock[],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -607,7 +673,11 @@ export async function executeAnthropicProviderRequest(
|
|||||||
messages: currentMessages,
|
messages: currentMessages,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Per Anthropic docs: forced tool_choice is incompatible with thinking.
|
||||||
|
// Only auto and none are supported when thinking is enabled.
|
||||||
|
const thinkingEnabled = !!payload.thinking
|
||||||
if (
|
if (
|
||||||
|
!thinkingEnabled &&
|
||||||
typeof originalToolChoice === 'object' &&
|
typeof originalToolChoice === 'object' &&
|
||||||
hasUsedForcedTool &&
|
hasUsedForcedTool &&
|
||||||
forcedTools.length > 0
|
forcedTools.length > 0
|
||||||
@@ -624,7 +694,11 @@ export async function executeAnthropicProviderRequest(
|
|||||||
nextPayload.tool_choice = undefined
|
nextPayload.tool_choice = undefined
|
||||||
logger.info('All forced tools have been used, removing tool_choice parameter')
|
logger.info('All forced tools have been used, removing tool_choice parameter')
|
||||||
}
|
}
|
||||||
} else if (hasUsedForcedTool && typeof originalToolChoice === 'object') {
|
} else if (
|
||||||
|
!thinkingEnabled &&
|
||||||
|
hasUsedForcedTool &&
|
||||||
|
typeof originalToolChoice === 'object'
|
||||||
|
) {
|
||||||
nextPayload.tool_choice = undefined
|
nextPayload.tool_choice = undefined
|
||||||
logger.info(
|
logger.info(
|
||||||
'Removing tool_choice parameter for subsequent requests after forced tool was used'
|
'Removing tool_choice parameter for subsequent requests after forced tool was used'
|
||||||
@@ -633,7 +707,7 @@ export async function executeAnthropicProviderRequest(
|
|||||||
|
|
||||||
const nextModelStartTime = Date.now()
|
const nextModelStartTime = Date.now()
|
||||||
|
|
||||||
currentResponse = await anthropic.messages.create(nextPayload)
|
currentResponse = await createMessage(anthropic, nextPayload)
|
||||||
|
|
||||||
const nextCheckResult = checkForForcedToolUsage(
|
const nextCheckResult = checkForForcedToolUsage(
|
||||||
currentResponse,
|
currentResponse,
|
||||||
@@ -779,12 +853,16 @@ export async function executeAnthropicProviderRequest(
|
|||||||
const providerStartTimeISO = new Date(providerStartTime).toISOString()
|
const providerStartTimeISO = new Date(providerStartTime).toISOString()
|
||||||
|
|
||||||
// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
|
// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
|
||||||
// but allow users to set lower values if desired
|
// but allow users to set lower values if desired. Use Math.max to preserve
|
||||||
|
// thinking-adjusted max_tokens from payload when it's higher.
|
||||||
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
|
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
|
||||||
const toolLoopMaxTokens = request.maxTokens
|
const toolLoopMaxTokens = request.maxTokens
|
||||||
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
|
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
|
||||||
: nonStreamingLimit
|
: nonStreamingLimit
|
||||||
const toolLoopPayload = { ...payload, max_tokens: toolLoopMaxTokens }
|
const toolLoopPayload = {
|
||||||
|
...payload,
|
||||||
|
max_tokens: Math.max(toolLoopMaxTokens, payload.max_tokens),
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const initialCallTime = Date.now()
|
const initialCallTime = Date.now()
|
||||||
@@ -792,7 +870,7 @@ export async function executeAnthropicProviderRequest(
|
|||||||
const forcedTools = preparedTools?.forcedTools || []
|
const forcedTools = preparedTools?.forcedTools || []
|
||||||
let usedForcedTools: string[] = []
|
let usedForcedTools: string[] = []
|
||||||
|
|
||||||
let currentResponse = await anthropic.messages.create(toolLoopPayload)
|
let currentResponse = await createMessage(anthropic, toolLoopPayload)
|
||||||
const firstResponseTime = Date.now() - initialCallTime
|
const firstResponseTime = Date.now() - initialCallTime
|
||||||
|
|
||||||
let content = ''
|
let content = ''
|
||||||
@@ -989,11 +1067,20 @@ export async function executeAnthropicProviderRequest(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add ONE assistant message with ALL tool_use blocks
|
// Per Anthropic docs: thinking blocks must be preserved in assistant messages
|
||||||
|
// during tool use to maintain reasoning continuity.
|
||||||
|
const thinkingBlocks = currentResponse.content.filter(
|
||||||
|
(item) => item.type === 'thinking' || item.type === 'redacted_thinking'
|
||||||
|
)
|
||||||
|
|
||||||
|
// Add ONE assistant message with thinking + tool_use blocks
|
||||||
if (toolUseBlocks.length > 0) {
|
if (toolUseBlocks.length > 0) {
|
||||||
currentMessages.push({
|
currentMessages.push({
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: toolUseBlocks as unknown as Anthropic.Messages.ContentBlock[],
|
content: [
|
||||||
|
...thinkingBlocks,
|
||||||
|
...toolUseBlocks,
|
||||||
|
] as unknown as Anthropic.Messages.ContentBlock[],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1013,7 +1100,15 @@ export async function executeAnthropicProviderRequest(
|
|||||||
messages: currentMessages,
|
messages: currentMessages,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
|
// Per Anthropic docs: forced tool_choice is incompatible with thinking.
|
||||||
|
// Only auto and none are supported when thinking is enabled.
|
||||||
|
const thinkingEnabled = !!payload.thinking
|
||||||
|
if (
|
||||||
|
!thinkingEnabled &&
|
||||||
|
typeof originalToolChoice === 'object' &&
|
||||||
|
hasUsedForcedTool &&
|
||||||
|
forcedTools.length > 0
|
||||||
|
) {
|
||||||
const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
|
const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
|
||||||
|
|
||||||
if (remainingTools.length > 0) {
|
if (remainingTools.length > 0) {
|
||||||
@@ -1026,7 +1121,11 @@ export async function executeAnthropicProviderRequest(
|
|||||||
nextPayload.tool_choice = undefined
|
nextPayload.tool_choice = undefined
|
||||||
logger.info('All forced tools have been used, removing tool_choice parameter')
|
logger.info('All forced tools have been used, removing tool_choice parameter')
|
||||||
}
|
}
|
||||||
} else if (hasUsedForcedTool && typeof originalToolChoice === 'object') {
|
} else if (
|
||||||
|
!thinkingEnabled &&
|
||||||
|
hasUsedForcedTool &&
|
||||||
|
typeof originalToolChoice === 'object'
|
||||||
|
) {
|
||||||
nextPayload.tool_choice = undefined
|
nextPayload.tool_choice = undefined
|
||||||
logger.info(
|
logger.info(
|
||||||
'Removing tool_choice parameter for subsequent requests after forced tool was used'
|
'Removing tool_choice parameter for subsequent requests after forced tool was used'
|
||||||
@@ -1035,7 +1134,7 @@ export async function executeAnthropicProviderRequest(
|
|||||||
|
|
||||||
const nextModelStartTime = Date.now()
|
const nextModelStartTime = Date.now()
|
||||||
|
|
||||||
currentResponse = await anthropic.messages.create(nextPayload)
|
currentResponse = await createMessage(anthropic, nextPayload)
|
||||||
|
|
||||||
const nextCheckResult = checkForForcedToolUsage(
|
const nextCheckResult = checkForForcedToolUsage(
|
||||||
currentResponse,
|
currentResponse,
|
||||||
|
|||||||
@@ -98,8 +98,10 @@ async function executeChatCompletionsRequest(
|
|||||||
if (request.temperature !== undefined) payload.temperature = request.temperature
|
if (request.temperature !== undefined) payload.temperature = request.temperature
|
||||||
if (request.maxTokens != null) payload.max_completion_tokens = request.maxTokens
|
if (request.maxTokens != null) payload.max_completion_tokens = request.maxTokens
|
||||||
|
|
||||||
if (request.reasoningEffort !== undefined) payload.reasoning_effort = request.reasoningEffort
|
if (request.reasoningEffort !== undefined && request.reasoningEffort !== 'auto')
|
||||||
if (request.verbosity !== undefined) payload.verbosity = request.verbosity
|
payload.reasoning_effort = request.reasoningEffort
|
||||||
|
if (request.verbosity !== undefined && request.verbosity !== 'auto')
|
||||||
|
payload.verbosity = request.verbosity
|
||||||
|
|
||||||
if (request.responseFormat) {
|
if (request.responseFormat) {
|
||||||
payload.response_format = {
|
payload.response_format = {
|
||||||
|
|||||||
@@ -197,6 +197,9 @@ export const bedrockProvider: ProviderConfig = {
|
|||||||
} else if (tc.type === 'function' && tc.function?.name) {
|
} else if (tc.type === 'function' && tc.function?.name) {
|
||||||
toolChoice = { tool: { name: tc.function.name } }
|
toolChoice = { tool: { name: tc.function.name } }
|
||||||
logger.info(`Using Bedrock tool_choice format: force tool "${tc.function.name}"`)
|
logger.info(`Using Bedrock tool_choice format: force tool "${tc.function.name}"`)
|
||||||
|
} else if (tc.type === 'any') {
|
||||||
|
toolChoice = { any: {} }
|
||||||
|
logger.info('Using Bedrock tool_choice format: any tool')
|
||||||
} else {
|
} else {
|
||||||
toolChoice = { auto: {} }
|
toolChoice = { auto: {} }
|
||||||
}
|
}
|
||||||
@@ -860,6 +863,11 @@ export const bedrockProvider: ProviderConfig = {
|
|||||||
content,
|
content,
|
||||||
model: request.model,
|
model: request.model,
|
||||||
tokens,
|
tokens,
|
||||||
|
cost: {
|
||||||
|
input: cost.input,
|
||||||
|
output: cost.output,
|
||||||
|
total: cost.total,
|
||||||
|
},
|
||||||
toolCalls:
|
toolCalls:
|
||||||
toolCalls.length > 0
|
toolCalls.length > 0
|
||||||
? toolCalls.map((tc) => ({
|
? toolCalls.map((tc) => ({
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ import {
|
|||||||
extractTextContent,
|
extractTextContent,
|
||||||
mapToThinkingLevel,
|
mapToThinkingLevel,
|
||||||
} from '@/providers/google/utils'
|
} from '@/providers/google/utils'
|
||||||
import { getThinkingCapability } from '@/providers/models'
|
|
||||||
import type { FunctionCallResponse, ProviderRequest, ProviderResponse } from '@/providers/types'
|
import type { FunctionCallResponse, ProviderRequest, ProviderResponse } from '@/providers/types'
|
||||||
import {
|
import {
|
||||||
calculateCost,
|
calculateCost,
|
||||||
@@ -432,13 +431,11 @@ export async function executeGeminiRequest(
|
|||||||
logger.warn('Gemini does not support responseFormat with tools. Structured output ignored.')
|
logger.warn('Gemini does not support responseFormat with tools. Structured output ignored.')
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure thinking for models that support it
|
// Configure thinking only when the user explicitly selects a thinking level
|
||||||
const thinkingCapability = getThinkingCapability(model)
|
if (request.thinkingLevel && request.thinkingLevel !== 'none') {
|
||||||
if (thinkingCapability) {
|
|
||||||
const level = request.thinkingLevel ?? thinkingCapability.default ?? 'high'
|
|
||||||
const thinkingConfig: ThinkingConfig = {
|
const thinkingConfig: ThinkingConfig = {
|
||||||
includeThoughts: false,
|
includeThoughts: false,
|
||||||
thinkingLevel: mapToThinkingLevel(level),
|
thinkingLevel: mapToThinkingLevel(request.thinkingLevel),
|
||||||
}
|
}
|
||||||
geminiConfig.thinkingConfig = thinkingConfig
|
geminiConfig.thinkingConfig = thinkingConfig
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,9 +40,9 @@ export interface ModelCapabilities {
|
|||||||
* This only applies to direct Anthropic API calls, not Bedrock (which uses AWS SDK).
|
* This only applies to direct Anthropic API calls, not Bedrock (which uses AWS SDK).
|
||||||
*/
|
*/
|
||||||
maxOutputTokens?: {
|
maxOutputTokens?: {
|
||||||
/** Maximum tokens for streaming requests */
|
/** Maximum supported output tokens (used for streaming requests) */
|
||||||
max: number
|
max: number
|
||||||
/** Safe default for non-streaming requests (to avoid Anthropic SDK timeout errors) */
|
/** Conservative default when user doesn't specify maxTokens (controls cost/latency) */
|
||||||
default: number
|
default: number
|
||||||
}
|
}
|
||||||
reasoningEffort?: {
|
reasoningEffort?: {
|
||||||
@@ -109,7 +109,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
name: 'OpenAI',
|
name: 'OpenAI',
|
||||||
description: "OpenAI's models",
|
description: "OpenAI's models",
|
||||||
defaultModel: 'gpt-4o',
|
defaultModel: 'gpt-4o',
|
||||||
modelPatterns: [/^gpt/, /^o1/, /^text-embedding/],
|
modelPatterns: [/^gpt/, /^o\d/, /^text-embedding/],
|
||||||
icon: OpenAIIcon,
|
icon: OpenAIIcon,
|
||||||
capabilities: {
|
capabilities: {
|
||||||
toolUsageControl: true,
|
toolUsageControl: true,
|
||||||
@@ -138,7 +138,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
},
|
},
|
||||||
capabilities: {
|
capabilities: {
|
||||||
reasoningEffort: {
|
reasoningEffort: {
|
||||||
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'],
|
values: ['none', 'low', 'medium', 'high', 'xhigh'],
|
||||||
},
|
},
|
||||||
verbosity: {
|
verbosity: {
|
||||||
values: ['low', 'medium', 'high'],
|
values: ['low', 'medium', 'high'],
|
||||||
@@ -164,60 +164,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
},
|
},
|
||||||
contextWindow: 400000,
|
contextWindow: 400000,
|
||||||
},
|
},
|
||||||
// {
|
|
||||||
// id: 'gpt-5.1-mini',
|
|
||||||
// pricing: {
|
|
||||||
// input: 0.25,
|
|
||||||
// cachedInput: 0.025,
|
|
||||||
// output: 2.0,
|
|
||||||
// updatedAt: '2025-11-14',
|
|
||||||
// },
|
|
||||||
// capabilities: {
|
|
||||||
// reasoningEffort: {
|
|
||||||
// values: ['none', 'low', 'medium', 'high'],
|
|
||||||
// },
|
|
||||||
// verbosity: {
|
|
||||||
// values: ['low', 'medium', 'high'],
|
|
||||||
// },
|
|
||||||
// },
|
|
||||||
// contextWindow: 400000,
|
|
||||||
// },
|
|
||||||
// {
|
|
||||||
// id: 'gpt-5.1-nano',
|
|
||||||
// pricing: {
|
|
||||||
// input: 0.05,
|
|
||||||
// cachedInput: 0.005,
|
|
||||||
// output: 0.4,
|
|
||||||
// updatedAt: '2025-11-14',
|
|
||||||
// },
|
|
||||||
// capabilities: {
|
|
||||||
// reasoningEffort: {
|
|
||||||
// values: ['none', 'low', 'medium', 'high'],
|
|
||||||
// },
|
|
||||||
// verbosity: {
|
|
||||||
// values: ['low', 'medium', 'high'],
|
|
||||||
// },
|
|
||||||
// },
|
|
||||||
// contextWindow: 400000,
|
|
||||||
// },
|
|
||||||
// {
|
|
||||||
// id: 'gpt-5.1-codex',
|
|
||||||
// pricing: {
|
|
||||||
// input: 1.25,
|
|
||||||
// cachedInput: 0.125,
|
|
||||||
// output: 10.0,
|
|
||||||
// updatedAt: '2025-11-14',
|
|
||||||
// },
|
|
||||||
// capabilities: {
|
|
||||||
// reasoningEffort: {
|
|
||||||
// values: ['none', 'medium', 'high'],
|
|
||||||
// },
|
|
||||||
// verbosity: {
|
|
||||||
// values: ['low', 'medium', 'high'],
|
|
||||||
// },
|
|
||||||
// },
|
|
||||||
// contextWindow: 400000,
|
|
||||||
// },
|
|
||||||
{
|
{
|
||||||
id: 'gpt-5',
|
id: 'gpt-5',
|
||||||
pricing: {
|
pricing: {
|
||||||
@@ -280,8 +226,10 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
output: 10.0,
|
output: 10.0,
|
||||||
updatedAt: '2025-08-07',
|
updatedAt: '2025-08-07',
|
||||||
},
|
},
|
||||||
capabilities: {},
|
capabilities: {
|
||||||
contextWindow: 400000,
|
temperature: { min: 0, max: 2 },
|
||||||
|
},
|
||||||
|
contextWindow: 128000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'o1',
|
id: 'o1',
|
||||||
@@ -311,7 +259,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
values: ['low', 'medium', 'high'],
|
values: ['low', 'medium', 'high'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 128000,
|
contextWindow: 200000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'o4-mini',
|
id: 'o4-mini',
|
||||||
@@ -326,7 +274,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
values: ['low', 'medium', 'high'],
|
values: ['low', 'medium', 'high'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 128000,
|
contextWindow: 200000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'gpt-4.1',
|
id: 'gpt-4.1',
|
||||||
@@ -413,7 +361,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 64000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -429,10 +377,10 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
capabilities: {
|
capabilities: {
|
||||||
temperature: { min: 0, max: 1 },
|
temperature: { min: 0, max: 1 },
|
||||||
nativeStructuredOutputs: true,
|
nativeStructuredOutputs: true,
|
||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 32000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -447,10 +395,10 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
},
|
},
|
||||||
capabilities: {
|
capabilities: {
|
||||||
temperature: { min: 0, max: 1 },
|
temperature: { min: 0, max: 1 },
|
||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 32000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -469,7 +417,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 64000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -487,7 +435,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 64000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -506,7 +454,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 64000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -515,7 +463,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
id: 'claude-3-haiku-20240307',
|
id: 'claude-3-haiku-20240307',
|
||||||
pricing: {
|
pricing: {
|
||||||
input: 0.25,
|
input: 0.25,
|
||||||
cachedInput: 0.025,
|
cachedInput: 0.03,
|
||||||
output: 1.25,
|
output: 1.25,
|
||||||
updatedAt: '2026-02-05',
|
updatedAt: '2026-02-05',
|
||||||
},
|
},
|
||||||
@@ -536,10 +484,10 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
capabilities: {
|
capabilities: {
|
||||||
temperature: { min: 0, max: 1 },
|
temperature: { min: 0, max: 1 },
|
||||||
computerUse: true,
|
computerUse: true,
|
||||||
maxOutputTokens: { max: 8192, default: 8192 },
|
maxOutputTokens: { max: 64000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -580,7 +528,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
},
|
},
|
||||||
capabilities: {
|
capabilities: {
|
||||||
reasoningEffort: {
|
reasoningEffort: {
|
||||||
values: ['none', 'minimal', 'low', 'medium', 'high', 'xhigh'],
|
values: ['none', 'low', 'medium', 'high', 'xhigh'],
|
||||||
},
|
},
|
||||||
verbosity: {
|
verbosity: {
|
||||||
values: ['low', 'medium', 'high'],
|
values: ['low', 'medium', 'high'],
|
||||||
@@ -606,42 +554,6 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
},
|
},
|
||||||
contextWindow: 400000,
|
contextWindow: 400000,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
id: 'azure/gpt-5.1-mini',
|
|
||||||
pricing: {
|
|
||||||
input: 0.25,
|
|
||||||
cachedInput: 0.025,
|
|
||||||
output: 2.0,
|
|
||||||
updatedAt: '2025-11-14',
|
|
||||||
},
|
|
||||||
capabilities: {
|
|
||||||
reasoningEffort: {
|
|
||||||
values: ['none', 'low', 'medium', 'high'],
|
|
||||||
},
|
|
||||||
verbosity: {
|
|
||||||
values: ['low', 'medium', 'high'],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
contextWindow: 400000,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
id: 'azure/gpt-5.1-nano',
|
|
||||||
pricing: {
|
|
||||||
input: 0.05,
|
|
||||||
cachedInput: 0.005,
|
|
||||||
output: 0.4,
|
|
||||||
updatedAt: '2025-11-14',
|
|
||||||
},
|
|
||||||
capabilities: {
|
|
||||||
reasoningEffort: {
|
|
||||||
values: ['none', 'low', 'medium', 'high'],
|
|
||||||
},
|
|
||||||
verbosity: {
|
|
||||||
values: ['low', 'medium', 'high'],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
contextWindow: 400000,
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
id: 'azure/gpt-5.1-codex',
|
id: 'azure/gpt-5.1-codex',
|
||||||
pricing: {
|
pricing: {
|
||||||
@@ -652,7 +564,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
},
|
},
|
||||||
capabilities: {
|
capabilities: {
|
||||||
reasoningEffort: {
|
reasoningEffort: {
|
||||||
values: ['none', 'medium', 'high'],
|
values: ['none', 'low', 'medium', 'high'],
|
||||||
},
|
},
|
||||||
verbosity: {
|
verbosity: {
|
||||||
values: ['low', 'medium', 'high'],
|
values: ['low', 'medium', 'high'],
|
||||||
@@ -722,23 +634,25 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
output: 10.0,
|
output: 10.0,
|
||||||
updatedAt: '2025-08-07',
|
updatedAt: '2025-08-07',
|
||||||
},
|
},
|
||||||
capabilities: {},
|
capabilities: {
|
||||||
contextWindow: 400000,
|
temperature: { min: 0, max: 2 },
|
||||||
|
},
|
||||||
|
contextWindow: 128000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'azure/o3',
|
id: 'azure/o3',
|
||||||
pricing: {
|
pricing: {
|
||||||
input: 10,
|
input: 2,
|
||||||
cachedInput: 2.5,
|
cachedInput: 0.5,
|
||||||
output: 40,
|
output: 8,
|
||||||
updatedAt: '2025-06-15',
|
updatedAt: '2026-02-06',
|
||||||
},
|
},
|
||||||
capabilities: {
|
capabilities: {
|
||||||
reasoningEffort: {
|
reasoningEffort: {
|
||||||
values: ['low', 'medium', 'high'],
|
values: ['low', 'medium', 'high'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 128000,
|
contextWindow: 200000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'azure/o4-mini',
|
id: 'azure/o4-mini',
|
||||||
@@ -753,7 +667,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
values: ['low', 'medium', 'high'],
|
values: ['low', 'medium', 'high'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 128000,
|
contextWindow: 200000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'azure/gpt-4.1',
|
id: 'azure/gpt-4.1',
|
||||||
@@ -763,7 +677,35 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
output: 8.0,
|
output: 8.0,
|
||||||
updatedAt: '2025-06-15',
|
updatedAt: '2025-06-15',
|
||||||
},
|
},
|
||||||
capabilities: {},
|
capabilities: {
|
||||||
|
temperature: { min: 0, max: 2 },
|
||||||
|
},
|
||||||
|
contextWindow: 1000000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'azure/gpt-4.1-mini',
|
||||||
|
pricing: {
|
||||||
|
input: 0.4,
|
||||||
|
cachedInput: 0.1,
|
||||||
|
output: 1.6,
|
||||||
|
updatedAt: '2025-06-15',
|
||||||
|
},
|
||||||
|
capabilities: {
|
||||||
|
temperature: { min: 0, max: 2 },
|
||||||
|
},
|
||||||
|
contextWindow: 1000000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'azure/gpt-4.1-nano',
|
||||||
|
pricing: {
|
||||||
|
input: 0.1,
|
||||||
|
cachedInput: 0.025,
|
||||||
|
output: 0.4,
|
||||||
|
updatedAt: '2025-06-15',
|
||||||
|
},
|
||||||
|
capabilities: {
|
||||||
|
temperature: { min: 0, max: 2 },
|
||||||
|
},
|
||||||
contextWindow: 1000000,
|
contextWindow: 1000000,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -775,7 +717,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
updatedAt: '2025-06-15',
|
updatedAt: '2025-06-15',
|
||||||
},
|
},
|
||||||
capabilities: {},
|
capabilities: {},
|
||||||
contextWindow: 1000000,
|
contextWindow: 200000,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@@ -823,7 +765,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 64000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -842,7 +784,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 64000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -858,10 +800,10 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
capabilities: {
|
capabilities: {
|
||||||
temperature: { min: 0, max: 1 },
|
temperature: { min: 0, max: 1 },
|
||||||
nativeStructuredOutputs: true,
|
nativeStructuredOutputs: true,
|
||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 32000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
@@ -880,7 +822,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
|||||||
maxOutputTokens: { max: 64000, default: 8192 },
|
maxOutputTokens: { max: 64000, default: 8192 },
|
||||||
thinking: {
|
thinking: {
|
||||||
levels: ['low', 'medium', 'high'],
|
levels: ['low', 'medium', 'high'],
|
||||||
default: 'medium',
|
default: 'high',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
contextWindow: 200000,
|
contextWindow: 200000,
|
||||||
|
|||||||
@@ -130,14 +130,14 @@ export async function executeResponsesProviderRequest(
|
|||||||
if (request.temperature !== undefined) basePayload.temperature = request.temperature
|
if (request.temperature !== undefined) basePayload.temperature = request.temperature
|
||||||
if (request.maxTokens != null) basePayload.max_output_tokens = request.maxTokens
|
if (request.maxTokens != null) basePayload.max_output_tokens = request.maxTokens
|
||||||
|
|
||||||
if (request.reasoningEffort !== undefined) {
|
if (request.reasoningEffort !== undefined && request.reasoningEffort !== 'auto') {
|
||||||
basePayload.reasoning = {
|
basePayload.reasoning = {
|
||||||
effort: request.reasoningEffort,
|
effort: request.reasoningEffort,
|
||||||
summary: 'auto',
|
summary: 'auto',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (request.verbosity !== undefined) {
|
if (request.verbosity !== undefined && request.verbosity !== 'auto') {
|
||||||
basePayload.text = {
|
basePayload.text = {
|
||||||
...(basePayload.text ?? {}),
|
...(basePayload.text ?? {}),
|
||||||
verbosity: request.verbosity,
|
verbosity: request.verbosity,
|
||||||
@@ -627,13 +627,13 @@ export async function executeResponsesProviderRequest(
|
|||||||
// Copy over non-tool related settings
|
// Copy over non-tool related settings
|
||||||
if (request.temperature !== undefined) finalPayload.temperature = request.temperature
|
if (request.temperature !== undefined) finalPayload.temperature = request.temperature
|
||||||
if (request.maxTokens != null) finalPayload.max_output_tokens = request.maxTokens
|
if (request.maxTokens != null) finalPayload.max_output_tokens = request.maxTokens
|
||||||
if (request.reasoningEffort !== undefined) {
|
if (request.reasoningEffort !== undefined && request.reasoningEffort !== 'auto') {
|
||||||
finalPayload.reasoning = {
|
finalPayload.reasoning = {
|
||||||
effort: request.reasoningEffort,
|
effort: request.reasoningEffort,
|
||||||
summary: 'auto',
|
summary: 'auto',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (request.verbosity !== undefined) {
|
if (request.verbosity !== undefined && request.verbosity !== 'auto') {
|
||||||
finalPayload.text = {
|
finalPayload.text = {
|
||||||
...finalPayload.text,
|
...finalPayload.text,
|
||||||
verbosity: request.verbosity,
|
verbosity: request.verbosity,
|
||||||
|
|||||||
@@ -12,16 +12,22 @@ import {
|
|||||||
getApiKey,
|
getApiKey,
|
||||||
getBaseModelProviders,
|
getBaseModelProviders,
|
||||||
getHostedModels,
|
getHostedModels,
|
||||||
|
getMaxOutputTokensForModel,
|
||||||
getMaxTemperature,
|
getMaxTemperature,
|
||||||
|
getModelPricing,
|
||||||
getProvider,
|
getProvider,
|
||||||
getProviderConfigFromModel,
|
getProviderConfigFromModel,
|
||||||
getProviderFromModel,
|
getProviderFromModel,
|
||||||
getProviderModels,
|
getProviderModels,
|
||||||
|
getReasoningEffortValuesForModel,
|
||||||
|
getThinkingLevelsForModel,
|
||||||
|
getVerbosityValuesForModel,
|
||||||
isProviderBlacklisted,
|
isProviderBlacklisted,
|
||||||
MODELS_TEMP_RANGE_0_1,
|
MODELS_TEMP_RANGE_0_1,
|
||||||
MODELS_TEMP_RANGE_0_2,
|
MODELS_TEMP_RANGE_0_2,
|
||||||
MODELS_WITH_REASONING_EFFORT,
|
MODELS_WITH_REASONING_EFFORT,
|
||||||
MODELS_WITH_TEMPERATURE_SUPPORT,
|
MODELS_WITH_TEMPERATURE_SUPPORT,
|
||||||
|
MODELS_WITH_THINKING,
|
||||||
MODELS_WITH_VERBOSITY,
|
MODELS_WITH_VERBOSITY,
|
||||||
PROVIDERS_WITH_TOOL_USAGE_CONTROL,
|
PROVIDERS_WITH_TOOL_USAGE_CONTROL,
|
||||||
prepareToolExecution,
|
prepareToolExecution,
|
||||||
@@ -169,6 +175,8 @@ describe('Model Capabilities', () => {
|
|||||||
'gpt-4.1',
|
'gpt-4.1',
|
||||||
'gpt-4.1-mini',
|
'gpt-4.1-mini',
|
||||||
'gpt-4.1-nano',
|
'gpt-4.1-nano',
|
||||||
|
'gpt-5-chat-latest',
|
||||||
|
'azure/gpt-5-chat-latest',
|
||||||
'gemini-2.5-flash',
|
'gemini-2.5-flash',
|
||||||
'claude-sonnet-4-0',
|
'claude-sonnet-4-0',
|
||||||
'claude-opus-4-0',
|
'claude-opus-4-0',
|
||||||
@@ -186,34 +194,27 @@ describe('Model Capabilities', () => {
|
|||||||
it.concurrent('should return false for models that do not support temperature', () => {
|
it.concurrent('should return false for models that do not support temperature', () => {
|
||||||
const unsupportedModels = [
|
const unsupportedModels = [
|
||||||
'unsupported-model',
|
'unsupported-model',
|
||||||
'cerebras/llama-3.3-70b', // Cerebras models don't have temperature defined
|
'cerebras/llama-3.3-70b',
|
||||||
'groq/meta-llama/llama-4-scout-17b-16e-instruct', // Groq models don't have temperature defined
|
'groq/meta-llama/llama-4-scout-17b-16e-instruct',
|
||||||
// Reasoning models that don't support temperature
|
|
||||||
'o1',
|
'o1',
|
||||||
'o3',
|
'o3',
|
||||||
'o4-mini',
|
'o4-mini',
|
||||||
'azure/o3',
|
'azure/o3',
|
||||||
'azure/o4-mini',
|
'azure/o4-mini',
|
||||||
'deepseek-r1',
|
'deepseek-r1',
|
||||||
// Chat models that don't support temperature
|
|
||||||
'deepseek-chat',
|
'deepseek-chat',
|
||||||
'azure/gpt-4.1',
|
|
||||||
'azure/model-router',
|
'azure/model-router',
|
||||||
// GPT-5.1 models don't support temperature (removed in our implementation)
|
|
||||||
'gpt-5.1',
|
'gpt-5.1',
|
||||||
'azure/gpt-5.1',
|
'azure/gpt-5.1',
|
||||||
'azure/gpt-5.1-mini',
|
'azure/gpt-5.1-mini',
|
||||||
'azure/gpt-5.1-nano',
|
'azure/gpt-5.1-nano',
|
||||||
'azure/gpt-5.1-codex',
|
'azure/gpt-5.1-codex',
|
||||||
// GPT-5 models don't support temperature (removed in our implementation)
|
|
||||||
'gpt-5',
|
'gpt-5',
|
||||||
'gpt-5-mini',
|
'gpt-5-mini',
|
||||||
'gpt-5-nano',
|
'gpt-5-nano',
|
||||||
'gpt-5-chat-latest',
|
|
||||||
'azure/gpt-5',
|
'azure/gpt-5',
|
||||||
'azure/gpt-5-mini',
|
'azure/gpt-5-mini',
|
||||||
'azure/gpt-5-nano',
|
'azure/gpt-5-nano',
|
||||||
'azure/gpt-5-chat-latest',
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for (const model of unsupportedModels) {
|
for (const model of unsupportedModels) {
|
||||||
@@ -240,6 +241,8 @@ describe('Model Capabilities', () => {
|
|||||||
const modelsRange02 = [
|
const modelsRange02 = [
|
||||||
'gpt-4o',
|
'gpt-4o',
|
||||||
'azure/gpt-4o',
|
'azure/gpt-4o',
|
||||||
|
'gpt-5-chat-latest',
|
||||||
|
'azure/gpt-5-chat-latest',
|
||||||
'gemini-2.5-pro',
|
'gemini-2.5-pro',
|
||||||
'gemini-2.5-flash',
|
'gemini-2.5-flash',
|
||||||
'deepseek-v3',
|
'deepseek-v3',
|
||||||
@@ -268,28 +271,23 @@ describe('Model Capabilities', () => {
|
|||||||
expect(getMaxTemperature('unsupported-model')).toBeUndefined()
|
expect(getMaxTemperature('unsupported-model')).toBeUndefined()
|
||||||
expect(getMaxTemperature('cerebras/llama-3.3-70b')).toBeUndefined()
|
expect(getMaxTemperature('cerebras/llama-3.3-70b')).toBeUndefined()
|
||||||
expect(getMaxTemperature('groq/meta-llama/llama-4-scout-17b-16e-instruct')).toBeUndefined()
|
expect(getMaxTemperature('groq/meta-llama/llama-4-scout-17b-16e-instruct')).toBeUndefined()
|
||||||
// Reasoning models that don't support temperature
|
|
||||||
expect(getMaxTemperature('o1')).toBeUndefined()
|
expect(getMaxTemperature('o1')).toBeUndefined()
|
||||||
expect(getMaxTemperature('o3')).toBeUndefined()
|
expect(getMaxTemperature('o3')).toBeUndefined()
|
||||||
expect(getMaxTemperature('o4-mini')).toBeUndefined()
|
expect(getMaxTemperature('o4-mini')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/o3')).toBeUndefined()
|
expect(getMaxTemperature('azure/o3')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/o4-mini')).toBeUndefined()
|
expect(getMaxTemperature('azure/o4-mini')).toBeUndefined()
|
||||||
expect(getMaxTemperature('deepseek-r1')).toBeUndefined()
|
expect(getMaxTemperature('deepseek-r1')).toBeUndefined()
|
||||||
// GPT-5.1 models don't support temperature
|
|
||||||
expect(getMaxTemperature('gpt-5.1')).toBeUndefined()
|
expect(getMaxTemperature('gpt-5.1')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/gpt-5.1')).toBeUndefined()
|
expect(getMaxTemperature('azure/gpt-5.1')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/gpt-5.1-mini')).toBeUndefined()
|
expect(getMaxTemperature('azure/gpt-5.1-mini')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/gpt-5.1-nano')).toBeUndefined()
|
expect(getMaxTemperature('azure/gpt-5.1-nano')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/gpt-5.1-codex')).toBeUndefined()
|
expect(getMaxTemperature('azure/gpt-5.1-codex')).toBeUndefined()
|
||||||
// GPT-5 models don't support temperature
|
|
||||||
expect(getMaxTemperature('gpt-5')).toBeUndefined()
|
expect(getMaxTemperature('gpt-5')).toBeUndefined()
|
||||||
expect(getMaxTemperature('gpt-5-mini')).toBeUndefined()
|
expect(getMaxTemperature('gpt-5-mini')).toBeUndefined()
|
||||||
expect(getMaxTemperature('gpt-5-nano')).toBeUndefined()
|
expect(getMaxTemperature('gpt-5-nano')).toBeUndefined()
|
||||||
expect(getMaxTemperature('gpt-5-chat-latest')).toBeUndefined()
|
|
||||||
expect(getMaxTemperature('azure/gpt-5')).toBeUndefined()
|
expect(getMaxTemperature('azure/gpt-5')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/gpt-5-mini')).toBeUndefined()
|
expect(getMaxTemperature('azure/gpt-5-mini')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/gpt-5-nano')).toBeUndefined()
|
expect(getMaxTemperature('azure/gpt-5-nano')).toBeUndefined()
|
||||||
expect(getMaxTemperature('azure/gpt-5-chat-latest')).toBeUndefined()
|
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should be case insensitive', () => {
|
it.concurrent('should be case insensitive', () => {
|
||||||
@@ -340,13 +338,13 @@ describe('Model Capabilities', () => {
|
|||||||
expect(MODELS_TEMP_RANGE_0_2).toContain('gpt-4o')
|
expect(MODELS_TEMP_RANGE_0_2).toContain('gpt-4o')
|
||||||
expect(MODELS_TEMP_RANGE_0_2).toContain('gemini-2.5-flash')
|
expect(MODELS_TEMP_RANGE_0_2).toContain('gemini-2.5-flash')
|
||||||
expect(MODELS_TEMP_RANGE_0_2).toContain('deepseek-v3')
|
expect(MODELS_TEMP_RANGE_0_2).toContain('deepseek-v3')
|
||||||
expect(MODELS_TEMP_RANGE_0_2).not.toContain('claude-sonnet-4-0') // Should be in 0-1 range
|
expect(MODELS_TEMP_RANGE_0_2).not.toContain('claude-sonnet-4-0')
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should have correct models in MODELS_TEMP_RANGE_0_1', () => {
|
it.concurrent('should have correct models in MODELS_TEMP_RANGE_0_1', () => {
|
||||||
expect(MODELS_TEMP_RANGE_0_1).toContain('claude-sonnet-4-0')
|
expect(MODELS_TEMP_RANGE_0_1).toContain('claude-sonnet-4-0')
|
||||||
expect(MODELS_TEMP_RANGE_0_1).toContain('grok-3-latest')
|
expect(MODELS_TEMP_RANGE_0_1).toContain('grok-3-latest')
|
||||||
expect(MODELS_TEMP_RANGE_0_1).not.toContain('gpt-4o') // Should be in 0-2 range
|
expect(MODELS_TEMP_RANGE_0_1).not.toContain('gpt-4o')
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should have correct providers in PROVIDERS_WITH_TOOL_USAGE_CONTROL', () => {
|
it.concurrent('should have correct providers in PROVIDERS_WITH_TOOL_USAGE_CONTROL', () => {
|
||||||
@@ -363,20 +361,19 @@ describe('Model Capabilities', () => {
|
|||||||
expect(MODELS_WITH_TEMPERATURE_SUPPORT.length).toBe(
|
expect(MODELS_WITH_TEMPERATURE_SUPPORT.length).toBe(
|
||||||
MODELS_TEMP_RANGE_0_2.length + MODELS_TEMP_RANGE_0_1.length
|
MODELS_TEMP_RANGE_0_2.length + MODELS_TEMP_RANGE_0_1.length
|
||||||
)
|
)
|
||||||
expect(MODELS_WITH_TEMPERATURE_SUPPORT).toContain('gpt-4o') // From 0-2 range
|
expect(MODELS_WITH_TEMPERATURE_SUPPORT).toContain('gpt-4o')
|
||||||
expect(MODELS_WITH_TEMPERATURE_SUPPORT).toContain('claude-sonnet-4-0') // From 0-1 range
|
expect(MODELS_WITH_TEMPERATURE_SUPPORT).toContain('claude-sonnet-4-0')
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
it.concurrent('should have correct models in MODELS_WITH_REASONING_EFFORT', () => {
|
it.concurrent('should have correct models in MODELS_WITH_REASONING_EFFORT', () => {
|
||||||
// Should contain GPT-5.1 models that support reasoning effort
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.1')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.1')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5.1')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5.1')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5.1-mini')
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5.1-nano')
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5.1-codex')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5.1-codex')
|
||||||
|
|
||||||
// Should contain GPT-5 models that support reasoning effort
|
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('azure/gpt-5.1-mini')
|
||||||
|
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('azure/gpt-5.1-nano')
|
||||||
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5-mini')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5-mini')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5-nano')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5-nano')
|
||||||
@@ -384,35 +381,30 @@ describe('Model Capabilities', () => {
|
|||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5-mini')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5-mini')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5-nano')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5-nano')
|
||||||
|
|
||||||
// Should contain gpt-5.2 models
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.2')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('gpt-5.2')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5.2')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/gpt-5.2')
|
||||||
|
|
||||||
// Should contain o-series reasoning models (reasoning_effort added Dec 17, 2024)
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('o1')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('o1')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('o3')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('o3')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('o4-mini')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('o4-mini')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/o3')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/o3')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/o4-mini')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('azure/o4-mini')
|
||||||
|
|
||||||
// Should NOT contain non-reasoning GPT-5 models
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('gpt-5-chat-latest')
|
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('gpt-5-chat-latest')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('azure/gpt-5-chat-latest')
|
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('azure/gpt-5-chat-latest')
|
||||||
|
|
||||||
// Should NOT contain other models
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('gpt-4o')
|
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('gpt-4o')
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('claude-sonnet-4-0')
|
expect(MODELS_WITH_REASONING_EFFORT).not.toContain('claude-sonnet-4-0')
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should have correct models in MODELS_WITH_VERBOSITY', () => {
|
it.concurrent('should have correct models in MODELS_WITH_VERBOSITY', () => {
|
||||||
// Should contain GPT-5.1 models that support verbosity
|
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5.1')
|
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5.1')
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.1')
|
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.1')
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.1-mini')
|
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.1-nano')
|
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.1-codex')
|
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.1-codex')
|
||||||
|
|
||||||
// Should contain GPT-5 models that support verbosity
|
expect(MODELS_WITH_VERBOSITY).not.toContain('azure/gpt-5.1-mini')
|
||||||
|
expect(MODELS_WITH_VERBOSITY).not.toContain('azure/gpt-5.1-nano')
|
||||||
|
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5')
|
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5')
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5-mini')
|
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5-mini')
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5-nano')
|
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5-nano')
|
||||||
@@ -420,26 +412,39 @@ describe('Model Capabilities', () => {
|
|||||||
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5-mini')
|
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5-mini')
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5-nano')
|
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5-nano')
|
||||||
|
|
||||||
// Should contain gpt-5.2 models
|
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5.2')
|
expect(MODELS_WITH_VERBOSITY).toContain('gpt-5.2')
|
||||||
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.2')
|
expect(MODELS_WITH_VERBOSITY).toContain('azure/gpt-5.2')
|
||||||
|
|
||||||
// Should NOT contain non-reasoning GPT-5 models
|
|
||||||
expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5-chat-latest')
|
expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-5-chat-latest')
|
||||||
expect(MODELS_WITH_VERBOSITY).not.toContain('azure/gpt-5-chat-latest')
|
expect(MODELS_WITH_VERBOSITY).not.toContain('azure/gpt-5-chat-latest')
|
||||||
|
|
||||||
// Should NOT contain o-series models (they support reasoning_effort but not verbosity)
|
|
||||||
expect(MODELS_WITH_VERBOSITY).not.toContain('o1')
|
expect(MODELS_WITH_VERBOSITY).not.toContain('o1')
|
||||||
expect(MODELS_WITH_VERBOSITY).not.toContain('o3')
|
expect(MODELS_WITH_VERBOSITY).not.toContain('o3')
|
||||||
expect(MODELS_WITH_VERBOSITY).not.toContain('o4-mini')
|
expect(MODELS_WITH_VERBOSITY).not.toContain('o4-mini')
|
||||||
|
|
||||||
// Should NOT contain other models
|
|
||||||
expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-4o')
|
expect(MODELS_WITH_VERBOSITY).not.toContain('gpt-4o')
|
||||||
expect(MODELS_WITH_VERBOSITY).not.toContain('claude-sonnet-4-0')
|
expect(MODELS_WITH_VERBOSITY).not.toContain('claude-sonnet-4-0')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it.concurrent('should have correct models in MODELS_WITH_THINKING', () => {
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('claude-opus-4-6')
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('claude-opus-4-5')
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('claude-opus-4-1')
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('claude-opus-4-0')
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('claude-sonnet-4-5')
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('claude-sonnet-4-0')
|
||||||
|
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('gemini-3-pro-preview')
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('gemini-3-flash-preview')
|
||||||
|
|
||||||
|
expect(MODELS_WITH_THINKING).toContain('claude-haiku-4-5')
|
||||||
|
|
||||||
|
expect(MODELS_WITH_THINKING).not.toContain('gpt-4o')
|
||||||
|
expect(MODELS_WITH_THINKING).not.toContain('gpt-5')
|
||||||
|
expect(MODELS_WITH_THINKING).not.toContain('o3')
|
||||||
|
})
|
||||||
|
|
||||||
it.concurrent('should have GPT-5 models in both reasoning effort and verbosity arrays', () => {
|
it.concurrent('should have GPT-5 models in both reasoning effort and verbosity arrays', () => {
|
||||||
// GPT-5 series models support both reasoning effort and verbosity
|
|
||||||
const gpt5ModelsWithReasoningEffort = MODELS_WITH_REASONING_EFFORT.filter(
|
const gpt5ModelsWithReasoningEffort = MODELS_WITH_REASONING_EFFORT.filter(
|
||||||
(m) => m.includes('gpt-5') && !m.includes('chat-latest')
|
(m) => m.includes('gpt-5') && !m.includes('chat-latest')
|
||||||
)
|
)
|
||||||
@@ -448,11 +453,229 @@ describe('Model Capabilities', () => {
|
|||||||
)
|
)
|
||||||
expect(gpt5ModelsWithReasoningEffort.sort()).toEqual(gpt5ModelsWithVerbosity.sort())
|
expect(gpt5ModelsWithReasoningEffort.sort()).toEqual(gpt5ModelsWithVerbosity.sort())
|
||||||
|
|
||||||
// o-series models have reasoning effort but NOT verbosity
|
|
||||||
expect(MODELS_WITH_REASONING_EFFORT).toContain('o1')
|
expect(MODELS_WITH_REASONING_EFFORT).toContain('o1')
|
||||||
expect(MODELS_WITH_VERBOSITY).not.toContain('o1')
|
expect(MODELS_WITH_VERBOSITY).not.toContain('o1')
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
describe('Reasoning Effort Values Per Model', () => {
|
||||||
|
it.concurrent('should return correct values for GPT-5.2', () => {
|
||||||
|
const values = getReasoningEffortValuesForModel('gpt-5.2')
|
||||||
|
expect(values).toBeDefined()
|
||||||
|
expect(values).toContain('none')
|
||||||
|
expect(values).toContain('low')
|
||||||
|
expect(values).toContain('medium')
|
||||||
|
expect(values).toContain('high')
|
||||||
|
expect(values).toContain('xhigh')
|
||||||
|
expect(values).not.toContain('minimal')
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return correct values for GPT-5', () => {
|
||||||
|
const values = getReasoningEffortValuesForModel('gpt-5')
|
||||||
|
expect(values).toBeDefined()
|
||||||
|
expect(values).toContain('minimal')
|
||||||
|
expect(values).toContain('low')
|
||||||
|
expect(values).toContain('medium')
|
||||||
|
expect(values).toContain('high')
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return correct values for o-series models', () => {
|
||||||
|
for (const model of ['o1', 'o3', 'o4-mini']) {
|
||||||
|
const values = getReasoningEffortValuesForModel(model)
|
||||||
|
expect(values).toBeDefined()
|
||||||
|
expect(values).toContain('low')
|
||||||
|
expect(values).toContain('medium')
|
||||||
|
expect(values).toContain('high')
|
||||||
|
expect(values).not.toContain('none')
|
||||||
|
expect(values).not.toContain('minimal')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return null for non-reasoning models', () => {
|
||||||
|
expect(getReasoningEffortValuesForModel('gpt-4o')).toBeNull()
|
||||||
|
expect(getReasoningEffortValuesForModel('claude-sonnet-4-5')).toBeNull()
|
||||||
|
expect(getReasoningEffortValuesForModel('gemini-2.5-flash')).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return correct values for Azure GPT-5.2', () => {
|
||||||
|
const values = getReasoningEffortValuesForModel('azure/gpt-5.2')
|
||||||
|
expect(values).toBeDefined()
|
||||||
|
expect(values).not.toContain('minimal')
|
||||||
|
expect(values).toContain('xhigh')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Verbosity Values Per Model', () => {
|
||||||
|
it.concurrent('should return correct values for GPT-5 family', () => {
|
||||||
|
for (const model of ['gpt-5.2', 'gpt-5.1', 'gpt-5', 'gpt-5-mini', 'gpt-5-nano']) {
|
||||||
|
const values = getVerbosityValuesForModel(model)
|
||||||
|
expect(values).toBeDefined()
|
||||||
|
expect(values).toContain('low')
|
||||||
|
expect(values).toContain('medium')
|
||||||
|
expect(values).toContain('high')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return null for o-series models', () => {
|
||||||
|
expect(getVerbosityValuesForModel('o1')).toBeNull()
|
||||||
|
expect(getVerbosityValuesForModel('o3')).toBeNull()
|
||||||
|
expect(getVerbosityValuesForModel('o4-mini')).toBeNull()
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return null for non-reasoning models', () => {
|
||||||
|
expect(getVerbosityValuesForModel('gpt-4o')).toBeNull()
|
||||||
|
expect(getVerbosityValuesForModel('claude-sonnet-4-5')).toBeNull()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Thinking Levels Per Model', () => {
|
||||||
|
it.concurrent('should return correct levels for Claude Opus 4.6 (adaptive)', () => {
|
||||||
|
const levels = getThinkingLevelsForModel('claude-opus-4-6')
|
||||||
|
expect(levels).toBeDefined()
|
||||||
|
expect(levels).toContain('low')
|
||||||
|
expect(levels).toContain('medium')
|
||||||
|
expect(levels).toContain('high')
|
||||||
|
expect(levels).toContain('max')
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return correct levels for other Claude models (budget_tokens)', () => {
|
||||||
|
for (const model of ['claude-opus-4-5', 'claude-sonnet-4-5', 'claude-sonnet-4-0']) {
|
||||||
|
const levels = getThinkingLevelsForModel(model)
|
||||||
|
expect(levels).toBeDefined()
|
||||||
|
expect(levels).toContain('low')
|
||||||
|
expect(levels).toContain('medium')
|
||||||
|
expect(levels).toContain('high')
|
||||||
|
expect(levels).not.toContain('max')
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return correct levels for Gemini 3 models', () => {
|
||||||
|
const proLevels = getThinkingLevelsForModel('gemini-3-pro-preview')
|
||||||
|
expect(proLevels).toBeDefined()
|
||||||
|
expect(proLevels).toContain('low')
|
||||||
|
expect(proLevels).toContain('high')
|
||||||
|
|
||||||
|
const flashLevels = getThinkingLevelsForModel('gemini-3-flash-preview')
|
||||||
|
expect(flashLevels).toBeDefined()
|
||||||
|
expect(flashLevels).toContain('minimal')
|
||||||
|
expect(flashLevels).toContain('low')
|
||||||
|
expect(flashLevels).toContain('medium')
|
||||||
|
expect(flashLevels).toContain('high')
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return correct levels for Claude Haiku 4.5', () => {
|
||||||
|
const levels = getThinkingLevelsForModel('claude-haiku-4-5')
|
||||||
|
expect(levels).toBeDefined()
|
||||||
|
expect(levels).toContain('low')
|
||||||
|
expect(levels).toContain('medium')
|
||||||
|
expect(levels).toContain('high')
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return null for non-thinking models', () => {
|
||||||
|
expect(getThinkingLevelsForModel('gpt-4o')).toBeNull()
|
||||||
|
expect(getThinkingLevelsForModel('gpt-5')).toBeNull()
|
||||||
|
expect(getThinkingLevelsForModel('o3')).toBeNull()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Max Output Tokens', () => {
|
||||||
|
describe('getMaxOutputTokensForModel', () => {
|
||||||
|
it.concurrent('should return higher value for streaming than non-streaming (Anthropic)', () => {
|
||||||
|
const streamingTokens = getMaxOutputTokensForModel('claude-opus-4-6', true)
|
||||||
|
const nonStreamingTokens = getMaxOutputTokensForModel('claude-opus-4-6', false)
|
||||||
|
expect(streamingTokens).toBeGreaterThan(nonStreamingTokens)
|
||||||
|
expect(streamingTokens).toBe(128000)
|
||||||
|
expect(nonStreamingTokens).toBe(8192)
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return correct values for Claude Sonnet 4.5', () => {
|
||||||
|
expect(getMaxOutputTokensForModel('claude-sonnet-4-5', true)).toBe(64000)
|
||||||
|
expect(getMaxOutputTokensForModel('claude-sonnet-4-5', false)).toBe(8192)
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return correct values for Claude Opus 4.1', () => {
|
||||||
|
expect(getMaxOutputTokensForModel('claude-opus-4-1', true)).toBe(32000)
|
||||||
|
expect(getMaxOutputTokensForModel('claude-opus-4-1', false)).toBe(8192)
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return standard default for models without maxOutputTokens', () => {
|
||||||
|
expect(getMaxOutputTokensForModel('gpt-4o', false)).toBe(4096)
|
||||||
|
expect(getMaxOutputTokensForModel('gpt-4o', true)).toBe(4096)
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return standard default for unknown models', () => {
|
||||||
|
expect(getMaxOutputTokensForModel('unknown-model', false)).toBe(4096)
|
||||||
|
expect(getMaxOutputTokensForModel('unknown-model', true)).toBe(4096)
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent(
|
||||||
|
'non-streaming default should be within Anthropic SDK non-streaming threshold',
|
||||||
|
() => {
|
||||||
|
const SDK_NON_STREAMING_THRESHOLD = 21333
|
||||||
|
const models = [
|
||||||
|
'claude-opus-4-6',
|
||||||
|
'claude-opus-4-5',
|
||||||
|
'claude-opus-4-1',
|
||||||
|
'claude-sonnet-4-5',
|
||||||
|
'claude-sonnet-4-0',
|
||||||
|
'claude-haiku-4-5',
|
||||||
|
]
|
||||||
|
|
||||||
|
for (const model of models) {
|
||||||
|
const nonStreamingDefault = getMaxOutputTokensForModel(model, false)
|
||||||
|
expect(nonStreamingDefault).toBeLessThan(SDK_NON_STREAMING_THRESHOLD)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Model Pricing Validation', () => {
|
||||||
|
it.concurrent('should have correct pricing for key Anthropic models', () => {
|
||||||
|
const opus46 = getModelPricing('claude-opus-4-6')
|
||||||
|
expect(opus46).toBeDefined()
|
||||||
|
expect(opus46.input).toBe(5.0)
|
||||||
|
expect(opus46.output).toBe(25.0)
|
||||||
|
|
||||||
|
const sonnet45 = getModelPricing('claude-sonnet-4-5')
|
||||||
|
expect(sonnet45).toBeDefined()
|
||||||
|
expect(sonnet45.input).toBe(3.0)
|
||||||
|
expect(sonnet45.output).toBe(15.0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should have correct pricing for key OpenAI models', () => {
|
||||||
|
const gpt4o = getModelPricing('gpt-4o')
|
||||||
|
expect(gpt4o).toBeDefined()
|
||||||
|
expect(gpt4o.input).toBe(2.5)
|
||||||
|
expect(gpt4o.output).toBe(10.0)
|
||||||
|
|
||||||
|
const o3 = getModelPricing('o3')
|
||||||
|
expect(o3).toBeDefined()
|
||||||
|
expect(o3.input).toBe(2.0)
|
||||||
|
expect(o3.output).toBe(8.0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should have correct pricing for Azure OpenAI o3', () => {
|
||||||
|
const azureO3 = getModelPricing('azure/o3')
|
||||||
|
expect(azureO3).toBeDefined()
|
||||||
|
expect(azureO3.input).toBe(2.0)
|
||||||
|
expect(azureO3.output).toBe(8.0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it.concurrent('should return null for unknown models', () => {
|
||||||
|
expect(getModelPricing('unknown-model')).toBeNull()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('Context Window Validation', () => {
|
||||||
|
it.concurrent('should have correct context windows for key models', () => {
|
||||||
|
const allModels = getAllModels()
|
||||||
|
|
||||||
|
expect(allModels).toContain('gpt-5-chat-latest')
|
||||||
|
|
||||||
|
expect(allModels).toContain('o3')
|
||||||
|
expect(allModels).toContain('o4-mini')
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('Cost Calculation', () => {
|
describe('Cost Calculation', () => {
|
||||||
@@ -464,7 +687,7 @@ describe('Cost Calculation', () => {
|
|||||||
expect(result.output).toBeGreaterThan(0)
|
expect(result.output).toBeGreaterThan(0)
|
||||||
expect(result.total).toBeCloseTo(result.input + result.output, 6)
|
expect(result.total).toBeCloseTo(result.input + result.output, 6)
|
||||||
expect(result.pricing).toBeDefined()
|
expect(result.pricing).toBeDefined()
|
||||||
expect(result.pricing.input).toBe(2.5) // GPT-4o pricing
|
expect(result.pricing.input).toBe(2.5)
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should handle cached input pricing when enabled', () => {
|
it.concurrent('should handle cached input pricing when enabled', () => {
|
||||||
@@ -472,7 +695,7 @@ describe('Cost Calculation', () => {
|
|||||||
const cachedCost = calculateCost('gpt-4o', 1000, 500, true)
|
const cachedCost = calculateCost('gpt-4o', 1000, 500, true)
|
||||||
|
|
||||||
expect(cachedCost.input).toBeLessThan(regularCost.input)
|
expect(cachedCost.input).toBeLessThan(regularCost.input)
|
||||||
expect(cachedCost.output).toBe(regularCost.output) // Output cost should be same
|
expect(cachedCost.output).toBe(regularCost.output)
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should return default pricing for unknown models', () => {
|
it.concurrent('should return default pricing for unknown models', () => {
|
||||||
@@ -481,7 +704,7 @@ describe('Cost Calculation', () => {
|
|||||||
expect(result.input).toBe(0)
|
expect(result.input).toBe(0)
|
||||||
expect(result.output).toBe(0)
|
expect(result.output).toBe(0)
|
||||||
expect(result.total).toBe(0)
|
expect(result.total).toBe(0)
|
||||||
expect(result.pricing.input).toBe(1.0) // Default pricing
|
expect(result.pricing.input).toBe(1.0)
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should handle zero tokens', () => {
|
it.concurrent('should handle zero tokens', () => {
|
||||||
@@ -528,19 +751,15 @@ describe('getHostedModels', () => {
|
|||||||
it.concurrent('should return OpenAI, Anthropic, and Google models as hosted', () => {
|
it.concurrent('should return OpenAI, Anthropic, and Google models as hosted', () => {
|
||||||
const hostedModels = getHostedModels()
|
const hostedModels = getHostedModels()
|
||||||
|
|
||||||
// OpenAI models
|
|
||||||
expect(hostedModels).toContain('gpt-4o')
|
expect(hostedModels).toContain('gpt-4o')
|
||||||
expect(hostedModels).toContain('o1')
|
expect(hostedModels).toContain('o1')
|
||||||
|
|
||||||
// Anthropic models
|
|
||||||
expect(hostedModels).toContain('claude-sonnet-4-0')
|
expect(hostedModels).toContain('claude-sonnet-4-0')
|
||||||
expect(hostedModels).toContain('claude-opus-4-0')
|
expect(hostedModels).toContain('claude-opus-4-0')
|
||||||
|
|
||||||
// Google models
|
|
||||||
expect(hostedModels).toContain('gemini-2.5-pro')
|
expect(hostedModels).toContain('gemini-2.5-pro')
|
||||||
expect(hostedModels).toContain('gemini-2.5-flash')
|
expect(hostedModels).toContain('gemini-2.5-flash')
|
||||||
|
|
||||||
// Should not contain models from other providers
|
|
||||||
expect(hostedModels).not.toContain('deepseek-v3')
|
expect(hostedModels).not.toContain('deepseek-v3')
|
||||||
expect(hostedModels).not.toContain('grok-4-latest')
|
expect(hostedModels).not.toContain('grok-4-latest')
|
||||||
})
|
})
|
||||||
@@ -558,31 +777,24 @@ describe('getHostedModels', () => {
|
|||||||
|
|
||||||
describe('shouldBillModelUsage', () => {
|
describe('shouldBillModelUsage', () => {
|
||||||
it.concurrent('should return true for exact matches of hosted models', () => {
|
it.concurrent('should return true for exact matches of hosted models', () => {
|
||||||
// OpenAI models
|
|
||||||
expect(shouldBillModelUsage('gpt-4o')).toBe(true)
|
expect(shouldBillModelUsage('gpt-4o')).toBe(true)
|
||||||
expect(shouldBillModelUsage('o1')).toBe(true)
|
expect(shouldBillModelUsage('o1')).toBe(true)
|
||||||
|
|
||||||
// Anthropic models
|
|
||||||
expect(shouldBillModelUsage('claude-sonnet-4-0')).toBe(true)
|
expect(shouldBillModelUsage('claude-sonnet-4-0')).toBe(true)
|
||||||
expect(shouldBillModelUsage('claude-opus-4-0')).toBe(true)
|
expect(shouldBillModelUsage('claude-opus-4-0')).toBe(true)
|
||||||
|
|
||||||
// Google models
|
|
||||||
expect(shouldBillModelUsage('gemini-2.5-pro')).toBe(true)
|
expect(shouldBillModelUsage('gemini-2.5-pro')).toBe(true)
|
||||||
expect(shouldBillModelUsage('gemini-2.5-flash')).toBe(true)
|
expect(shouldBillModelUsage('gemini-2.5-flash')).toBe(true)
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should return false for non-hosted models', () => {
|
it.concurrent('should return false for non-hosted models', () => {
|
||||||
// Other providers
|
|
||||||
expect(shouldBillModelUsage('deepseek-v3')).toBe(false)
|
expect(shouldBillModelUsage('deepseek-v3')).toBe(false)
|
||||||
expect(shouldBillModelUsage('grok-4-latest')).toBe(false)
|
expect(shouldBillModelUsage('grok-4-latest')).toBe(false)
|
||||||
|
|
||||||
// Unknown models
|
|
||||||
expect(shouldBillModelUsage('unknown-model')).toBe(false)
|
expect(shouldBillModelUsage('unknown-model')).toBe(false)
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should return false for versioned model names not in hosted list', () => {
|
it.concurrent('should return false for versioned model names not in hosted list', () => {
|
||||||
// Versioned model names that are NOT in the hosted list
|
|
||||||
// These should NOT be billed (user provides own API key)
|
|
||||||
expect(shouldBillModelUsage('claude-sonnet-4-20250514')).toBe(false)
|
expect(shouldBillModelUsage('claude-sonnet-4-20250514')).toBe(false)
|
||||||
expect(shouldBillModelUsage('gpt-4o-2024-08-06')).toBe(false)
|
expect(shouldBillModelUsage('gpt-4o-2024-08-06')).toBe(false)
|
||||||
expect(shouldBillModelUsage('claude-3-5-sonnet-20241022')).toBe(false)
|
expect(shouldBillModelUsage('claude-3-5-sonnet-20241022')).toBe(false)
|
||||||
@@ -595,8 +807,7 @@ describe('shouldBillModelUsage', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should not match partial model names', () => {
|
it.concurrent('should not match partial model names', () => {
|
||||||
// Should not match partial/prefix models
|
expect(shouldBillModelUsage('gpt-4')).toBe(false)
|
||||||
expect(shouldBillModelUsage('gpt-4')).toBe(false) // gpt-4o is hosted, not gpt-4
|
|
||||||
expect(shouldBillModelUsage('claude-sonnet')).toBe(false)
|
expect(shouldBillModelUsage('claude-sonnet')).toBe(false)
|
||||||
expect(shouldBillModelUsage('gemini')).toBe(false)
|
expect(shouldBillModelUsage('gemini')).toBe(false)
|
||||||
})
|
})
|
||||||
@@ -612,8 +823,8 @@ describe('Provider Management', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should use model patterns for pattern matching', () => {
|
it.concurrent('should use model patterns for pattern matching', () => {
|
||||||
expect(getProviderFromModel('gpt-5-custom')).toBe('openai') // Matches /^gpt/ pattern
|
expect(getProviderFromModel('gpt-5-custom')).toBe('openai')
|
||||||
expect(getProviderFromModel('claude-custom-model')).toBe('anthropic') // Matches /^claude/ pattern
|
expect(getProviderFromModel('claude-custom-model')).toBe('anthropic')
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should default to ollama for unknown models', () => {
|
it.concurrent('should default to ollama for unknown models', () => {
|
||||||
@@ -667,7 +878,6 @@ describe('Provider Management', () => {
|
|||||||
expect(Array.isArray(allModels)).toBe(true)
|
expect(Array.isArray(allModels)).toBe(true)
|
||||||
expect(allModels.length).toBeGreaterThan(0)
|
expect(allModels.length).toBeGreaterThan(0)
|
||||||
|
|
||||||
// Should contain models from different providers
|
|
||||||
expect(allModels).toContain('gpt-4o')
|
expect(allModels).toContain('gpt-4o')
|
||||||
expect(allModels).toContain('claude-sonnet-4-0')
|
expect(allModels).toContain('claude-sonnet-4-0')
|
||||||
expect(allModels).toContain('gemini-2.5-pro')
|
expect(allModels).toContain('gemini-2.5-pro')
|
||||||
@@ -712,7 +922,6 @@ describe('Provider Management', () => {
|
|||||||
|
|
||||||
const baseProviders = getBaseModelProviders()
|
const baseProviders = getBaseModelProviders()
|
||||||
expect(typeof baseProviders).toBe('object')
|
expect(typeof baseProviders).toBe('object')
|
||||||
// Should exclude ollama models
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -720,10 +929,8 @@ describe('Provider Management', () => {
|
|||||||
it.concurrent('should update ollama models', () => {
|
it.concurrent('should update ollama models', () => {
|
||||||
const mockModels = ['llama2', 'codellama', 'mistral']
|
const mockModels = ['llama2', 'codellama', 'mistral']
|
||||||
|
|
||||||
// This should not throw
|
|
||||||
expect(() => updateOllamaProviderModels(mockModels)).not.toThrow()
|
expect(() => updateOllamaProviderModels(mockModels)).not.toThrow()
|
||||||
|
|
||||||
// Verify the models were updated
|
|
||||||
const ollamaModels = getProviderModels('ollama')
|
const ollamaModels = getProviderModels('ollama')
|
||||||
expect(ollamaModels).toEqual(mockModels)
|
expect(ollamaModels).toEqual(mockModels)
|
||||||
})
|
})
|
||||||
@@ -754,7 +961,7 @@ describe('JSON and Structured Output', () => {
|
|||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should clean up common JSON issues', () => {
|
it.concurrent('should clean up common JSON issues', () => {
|
||||||
const content = '{\n "key": "value",\n "number": 42,\n}' // Trailing comma
|
const content = '{\n "key": "value",\n "number": 42,\n}'
|
||||||
const result = extractAndParseJSON(content)
|
const result = extractAndParseJSON(content)
|
||||||
expect(result).toEqual({ key: 'value', number: 42 })
|
expect(result).toEqual({ key: 'value', number: 42 })
|
||||||
})
|
})
|
||||||
@@ -945,13 +1152,13 @@ describe('prepareToolExecution', () => {
|
|||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
|
|
||||||
expect(toolParams.apiKey).toBe('user-key')
|
expect(toolParams.apiKey).toBe('user-key')
|
||||||
expect(toolParams.channel).toBe('#general') // User value wins
|
expect(toolParams.channel).toBe('#general')
|
||||||
expect(toolParams.message).toBe('Hello world')
|
expect(toolParams.message).toBe('Hello world')
|
||||||
})
|
})
|
||||||
|
|
||||||
it.concurrent('should filter out empty string user params', () => {
|
it.concurrent('should filter out empty string user params', () => {
|
||||||
const tool = {
|
const tool = {
|
||||||
params: { apiKey: 'user-key', channel: '' }, // Empty channel
|
params: { apiKey: 'user-key', channel: '' },
|
||||||
}
|
}
|
||||||
const llmArgs = { message: 'Hello', channel: '#llm-channel' }
|
const llmArgs = { message: 'Hello', channel: '#llm-channel' }
|
||||||
const request = {}
|
const request = {}
|
||||||
@@ -959,7 +1166,7 @@ describe('prepareToolExecution', () => {
|
|||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
|
|
||||||
expect(toolParams.apiKey).toBe('user-key')
|
expect(toolParams.apiKey).toBe('user-key')
|
||||||
expect(toolParams.channel).toBe('#llm-channel') // LLM value used since user is empty
|
expect(toolParams.channel).toBe('#llm-channel')
|
||||||
expect(toolParams.message).toBe('Hello')
|
expect(toolParams.message).toBe('Hello')
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
@@ -969,7 +1176,7 @@ describe('prepareToolExecution', () => {
|
|||||||
const tool = {
|
const tool = {
|
||||||
params: {
|
params: {
|
||||||
workflowId: 'child-workflow-123',
|
workflowId: 'child-workflow-123',
|
||||||
inputMapping: '{}', // Empty JSON string from UI
|
inputMapping: '{}',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
const llmArgs = {
|
const llmArgs = {
|
||||||
@@ -979,7 +1186,6 @@ describe('prepareToolExecution', () => {
|
|||||||
|
|
||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
|
|
||||||
// LLM values should be used since user object is empty
|
|
||||||
expect(toolParams.inputMapping).toEqual({ query: 'search term', limit: 10 })
|
expect(toolParams.inputMapping).toEqual({ query: 'search term', limit: 10 })
|
||||||
expect(toolParams.workflowId).toBe('child-workflow-123')
|
expect(toolParams.workflowId).toBe('child-workflow-123')
|
||||||
})
|
})
|
||||||
@@ -988,7 +1194,7 @@ describe('prepareToolExecution', () => {
|
|||||||
const tool = {
|
const tool = {
|
||||||
params: {
|
params: {
|
||||||
workflowId: 'child-workflow',
|
workflowId: 'child-workflow',
|
||||||
inputMapping: '{"query": "", "customField": "user-value"}', // Partial values
|
inputMapping: '{"query": "", "customField": "user-value"}',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
const llmArgs = {
|
const llmArgs = {
|
||||||
@@ -998,7 +1204,6 @@ describe('prepareToolExecution', () => {
|
|||||||
|
|
||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
|
|
||||||
// LLM fills empty query, user's customField preserved, LLM's limit included
|
|
||||||
expect(toolParams.inputMapping).toEqual({
|
expect(toolParams.inputMapping).toEqual({
|
||||||
query: 'llm-search',
|
query: 'llm-search',
|
||||||
limit: 10,
|
limit: 10,
|
||||||
@@ -1020,7 +1225,6 @@ describe('prepareToolExecution', () => {
|
|||||||
|
|
||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
|
|
||||||
// User values win, but LLM's extra field is included
|
|
||||||
expect(toolParams.inputMapping).toEqual({
|
expect(toolParams.inputMapping).toEqual({
|
||||||
query: 'user-search',
|
query: 'user-search',
|
||||||
limit: 5,
|
limit: 5,
|
||||||
@@ -1032,7 +1236,7 @@ describe('prepareToolExecution', () => {
|
|||||||
const tool = {
|
const tool = {
|
||||||
params: {
|
params: {
|
||||||
workflowId: 'child-workflow',
|
workflowId: 'child-workflow',
|
||||||
inputMapping: { query: '', customField: 'user-value' }, // Object, not string
|
inputMapping: { query: '', customField: 'user-value' },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
const llmArgs = {
|
const llmArgs = {
|
||||||
@@ -1051,7 +1255,7 @@ describe('prepareToolExecution', () => {
|
|||||||
|
|
||||||
it.concurrent('should use LLM inputMapping when user does not provide it', () => {
|
it.concurrent('should use LLM inputMapping when user does not provide it', () => {
|
||||||
const tool = {
|
const tool = {
|
||||||
params: { workflowId: 'child-workflow' }, // No inputMapping
|
params: { workflowId: 'child-workflow' },
|
||||||
}
|
}
|
||||||
const llmArgs = {
|
const llmArgs = {
|
||||||
inputMapping: { query: 'llm-search', limit: 10 },
|
inputMapping: { query: 'llm-search', limit: 10 },
|
||||||
@@ -1070,7 +1274,7 @@ describe('prepareToolExecution', () => {
|
|||||||
inputMapping: '{"query": "user-search"}',
|
inputMapping: '{"query": "user-search"}',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
const llmArgs = {} // No inputMapping from LLM
|
const llmArgs = {}
|
||||||
const request = {}
|
const request = {}
|
||||||
|
|
||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
@@ -1092,7 +1296,6 @@ describe('prepareToolExecution', () => {
|
|||||||
|
|
||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
|
|
||||||
// Should use LLM values since user JSON is invalid
|
|
||||||
expect(toolParams.inputMapping).toEqual({ query: 'llm-search' })
|
expect(toolParams.inputMapping).toEqual({ query: 'llm-search' })
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -1105,9 +1308,8 @@ describe('prepareToolExecution', () => {
|
|||||||
|
|
||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
|
|
||||||
// Normal behavior: user values override LLM values
|
|
||||||
expect(toolParams.apiKey).toBe('user-key')
|
expect(toolParams.apiKey).toBe('user-key')
|
||||||
expect(toolParams.channel).toBe('#general') // User value wins
|
expect(toolParams.channel).toBe('#general')
|
||||||
expect(toolParams.message).toBe('Hello')
|
expect(toolParams.message).toBe('Hello')
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -1125,8 +1327,6 @@ describe('prepareToolExecution', () => {
|
|||||||
|
|
||||||
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
const { toolParams } = prepareToolExecution(tool, llmArgs, request)
|
||||||
|
|
||||||
// 0 and false should be preserved (they're valid values)
|
|
||||||
// empty string should be filled by LLM
|
|
||||||
expect(toolParams.inputMapping).toEqual({
|
expect(toolParams.inputMapping).toEqual({
|
||||||
limit: 0,
|
limit: 0,
|
||||||
enabled: false,
|
enabled: false,
|
||||||
|
|||||||
Reference in New Issue
Block a user