Compare commits

...

1 Commits

Author SHA1 Message Date
waleed
d222e924b1 fix(azure): conditionally added responses api 2025-12-16 21:18:40 -08:00

View File

@@ -18,11 +18,70 @@ import { executeTool } from '@/tools'
const logger = createLogger('AzureOpenAIProvider') const logger = createLogger('AzureOpenAIProvider')
/**
* Determines if the API version uses the Responses API (2025+) or Chat Completions API
*/
function useResponsesApi(apiVersion: string): boolean {
// 2025-* versions use the Responses API
// 2024-* and earlier versions use the Chat Completions API
return apiVersion.startsWith('2025-')
}
/**
* Helper function to convert an Azure OpenAI Responses API stream to a standard ReadableStream
* and collect completion metrics
*/
function createReadableStreamFromResponsesApiStream(
responsesStream: any,
onComplete?: (content: string, usage?: any) => void
): ReadableStream {
let fullContent = ''
let usageData: any = null
return new ReadableStream({
async start(controller) {
try {
for await (const event of responsesStream) {
if (event.usage) {
usageData = event.usage
}
if (event.type === 'response.output_text.delta') {
const content = event.delta || ''
if (content) {
fullContent += content
controller.enqueue(new TextEncoder().encode(content))
}
} else if (event.type === 'response.content_part.delta') {
const content = event.delta?.text || ''
if (content) {
fullContent += content
controller.enqueue(new TextEncoder().encode(content))
}
} else if (event.type === 'response.completed' || event.type === 'response.done') {
if (event.response?.usage) {
usageData = event.response.usage
}
}
}
if (onComplete) {
onComplete(fullContent, usageData)
}
controller.close()
} catch (error) {
controller.error(error)
}
},
})
}
/** /**
* Helper function to convert an Azure OpenAI stream to a standard ReadableStream * Helper function to convert an Azure OpenAI stream to a standard ReadableStream
* and collect completion metrics * and collect completion metrics
*/ */
function createReadableStreamFromAzureOpenAIStream( function createReadableStreamFromChatCompletionsStream(
azureOpenAIStream: any, azureOpenAIStream: any,
onComplete?: (content: string, usage?: any) => void onComplete?: (content: string, usage?: any) => void
): ReadableStream { ): ReadableStream {
@@ -33,7 +92,6 @@ function createReadableStreamFromAzureOpenAIStream(
async start(controller) { async start(controller) {
try { try {
for await (const chunk of azureOpenAIStream) { for await (const chunk of azureOpenAIStream) {
// Check for usage data in the final chunk
if (chunk.usage) { if (chunk.usage) {
usageData = chunk.usage usageData = chunk.usage
} }
@@ -45,7 +103,6 @@ function createReadableStreamFromAzureOpenAIStream(
} }
} }
// Once stream is complete, call the completion callback with the final content and usage
if (onComplete) { if (onComplete) {
onComplete(fullContent, usageData) onComplete(fullContent, usageData)
} }
@@ -58,6 +115,430 @@ function createReadableStreamFromAzureOpenAIStream(
}) })
} }
/**
* Executes a request using the Responses API (for 2025+ API versions)
*/
async function executeWithResponsesApi(
azureOpenAI: AzureOpenAI,
request: ProviderRequest,
deploymentName: string,
providerStartTime: number,
providerStartTimeISO: string
): Promise<ProviderResponse | StreamingExecution> {
const inputMessages: any[] = []
if (request.context) {
inputMessages.push({
role: 'user',
content: request.context,
})
}
if (request.messages) {
inputMessages.push(...request.messages)
}
const tools = request.tools?.length
? request.tools.map((tool) => ({
type: 'function' as const,
function: {
name: tool.id,
description: tool.description,
parameters: tool.parameters,
},
}))
: undefined
const payload: any = {
model: deploymentName,
input: inputMessages.length > 0 ? inputMessages : request.systemPrompt || '',
}
if (request.systemPrompt) {
payload.instructions = request.systemPrompt
}
if (request.temperature !== undefined) payload.temperature = request.temperature
if (request.maxTokens !== undefined) payload.max_output_tokens = request.maxTokens
if (request.reasoningEffort !== undefined) {
payload.reasoning = { effort: request.reasoningEffort }
}
if (request.responseFormat) {
payload.text = {
format: {
type: 'json_schema',
json_schema: {
name: request.responseFormat.name || 'response_schema',
schema: request.responseFormat.schema || request.responseFormat,
strict: request.responseFormat.strict !== false,
},
},
}
logger.info('Added JSON schema text format to Responses API request')
}
if (tools?.length) {
payload.tools = tools
const forcedTools = request.tools?.filter((t) => t.usageControl === 'force') || []
if (forcedTools.length > 0) {
if (forcedTools.length === 1) {
payload.tool_choice = {
type: 'function',
function: { name: forcedTools[0].id },
}
} else {
payload.tool_choice = 'required'
}
} else {
payload.tool_choice = 'auto'
}
logger.info('Responses API request configuration:', {
toolCount: tools.length,
model: deploymentName,
})
}
try {
if (request.stream && (!tools || tools.length === 0)) {
logger.info('Using streaming response for Responses API request')
const streamResponse = await (azureOpenAI as any).responses.create({
...payload,
stream: true,
})
const tokenUsage = {
prompt: 0,
completion: 0,
total: 0,
}
const streamingResult = {
stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => {
streamingResult.execution.output.content = content
const streamEndTime = Date.now()
const streamEndTimeISO = new Date(streamEndTime).toISOString()
if (streamingResult.execution.output.providerTiming) {
streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
streamingResult.execution.output.providerTiming.duration =
streamEndTime - providerStartTime
if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
streamEndTime
streamingResult.execution.output.providerTiming.timeSegments[0].duration =
streamEndTime - providerStartTime
}
}
if (usage) {
streamingResult.execution.output.tokens = {
prompt: usage.input_tokens || usage.prompt_tokens || 0,
completion: usage.output_tokens || usage.completion_tokens || 0,
total:
(usage.input_tokens || usage.prompt_tokens || 0) +
(usage.output_tokens || usage.completion_tokens || 0),
}
}
}),
execution: {
success: true,
output: {
content: '',
model: request.model,
tokens: tokenUsage,
toolCalls: undefined,
providerTiming: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
timeSegments: [
{
type: 'model',
name: 'Streaming response',
startTime: providerStartTime,
endTime: Date.now(),
duration: Date.now() - providerStartTime,
},
],
},
},
logs: [],
metadata: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
},
},
} as StreamingExecution
return streamingResult
}
const initialCallTime = Date.now()
let currentResponse = await (azureOpenAI as any).responses.create(payload)
const firstResponseTime = Date.now() - initialCallTime
let content = currentResponse.output_text || ''
const tokens = {
prompt: currentResponse.usage?.input_tokens || 0,
completion: currentResponse.usage?.output_tokens || 0,
total:
(currentResponse.usage?.input_tokens || 0) + (currentResponse.usage?.output_tokens || 0),
}
const toolCalls: any[] = []
const toolResults: any[] = []
let iterationCount = 0
const MAX_ITERATIONS = 10
let modelTime = firstResponseTime
let toolsTime = 0
const timeSegments: TimeSegment[] = [
{
type: 'model',
name: 'Initial response',
startTime: initialCallTime,
endTime: initialCallTime + firstResponseTime,
duration: firstResponseTime,
},
]
while (iterationCount < MAX_ITERATIONS) {
const toolCallsInResponse =
currentResponse.output?.filter((item: any) => item.type === 'function_call') || []
if (toolCallsInResponse.length === 0) {
break
}
logger.info(
`Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
)
const toolsStartTime = Date.now()
for (const toolCall of toolCallsInResponse) {
try {
const toolName = toolCall.name
const toolArgs =
typeof toolCall.arguments === 'string'
? JSON.parse(toolCall.arguments)
: toolCall.arguments
const tool = request.tools?.find((t) => t.id === toolName)
if (!tool) continue
const toolCallStartTime = Date.now()
const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
const result = await executeTool(toolName, executionParams, true)
const toolCallEndTime = Date.now()
const toolCallDuration = toolCallEndTime - toolCallStartTime
timeSegments.push({
type: 'tool',
name: toolName,
startTime: toolCallStartTime,
endTime: toolCallEndTime,
duration: toolCallDuration,
})
let resultContent: any
if (result.success) {
toolResults.push(result.output)
resultContent = result.output
} else {
resultContent = {
error: true,
message: result.error || 'Tool execution failed',
tool: toolName,
}
}
toolCalls.push({
name: toolName,
arguments: toolParams,
startTime: new Date(toolCallStartTime).toISOString(),
endTime: new Date(toolCallEndTime).toISOString(),
duration: toolCallDuration,
result: resultContent,
success: result.success,
})
// Add function call output to input for next request
inputMessages.push({
type: 'function_call_output',
call_id: toolCall.call_id || toolCall.id,
output: JSON.stringify(resultContent),
})
} catch (error) {
logger.error('Error processing tool call:', {
error,
toolName: toolCall?.name,
})
}
}
const thisToolsTime = Date.now() - toolsStartTime
toolsTime += thisToolsTime
// Make the next request
const nextModelStartTime = Date.now()
const nextPayload = {
...payload,
input: inputMessages,
tool_choice: 'auto',
}
currentResponse = await (azureOpenAI as any).responses.create(nextPayload)
const nextModelEndTime = Date.now()
const thisModelTime = nextModelEndTime - nextModelStartTime
timeSegments.push({
type: 'model',
name: `Model response (iteration ${iterationCount + 1})`,
startTime: nextModelStartTime,
endTime: nextModelEndTime,
duration: thisModelTime,
})
modelTime += thisModelTime
// Update content
if (currentResponse.output_text) {
content = currentResponse.output_text
}
// Update token counts
if (currentResponse.usage) {
tokens.prompt += currentResponse.usage.input_tokens || 0
tokens.completion += currentResponse.usage.output_tokens || 0
tokens.total = tokens.prompt + tokens.completion
}
iterationCount++
}
// Handle streaming for final response after tool processing
if (request.stream) {
logger.info('Using streaming for final response after tool processing (Responses API)')
const streamingPayload = {
...payload,
input: inputMessages,
tool_choice: 'auto',
stream: true,
}
const streamResponse = await (azureOpenAI as any).responses.create(streamingPayload)
const streamingResult = {
stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => {
streamingResult.execution.output.content = content
if (usage) {
streamingResult.execution.output.tokens = {
prompt: usage.input_tokens || tokens.prompt,
completion: usage.output_tokens || tokens.completion,
total:
(usage.input_tokens || tokens.prompt) + (usage.output_tokens || tokens.completion),
}
}
}),
execution: {
success: true,
output: {
content: '',
model: request.model,
tokens: {
prompt: tokens.prompt,
completion: tokens.completion,
total: tokens.total,
},
toolCalls:
toolCalls.length > 0
? {
list: toolCalls,
count: toolCalls.length,
}
: undefined,
providerTiming: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
modelTime: modelTime,
toolsTime: toolsTime,
firstResponseTime: firstResponseTime,
iterations: iterationCount + 1,
timeSegments: timeSegments,
},
},
logs: [],
metadata: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
},
},
} as StreamingExecution
return streamingResult
}
// Calculate overall timing
const providerEndTime = Date.now()
const providerEndTimeISO = new Date(providerEndTime).toISOString()
const totalDuration = providerEndTime - providerStartTime
return {
content,
model: request.model,
tokens,
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
toolResults: toolResults.length > 0 ? toolResults : undefined,
timing: {
startTime: providerStartTimeISO,
endTime: providerEndTimeISO,
duration: totalDuration,
modelTime: modelTime,
toolsTime: toolsTime,
firstResponseTime: firstResponseTime,
iterations: iterationCount + 1,
timeSegments: timeSegments,
},
}
} catch (error) {
const providerEndTime = Date.now()
const providerEndTimeISO = new Date(providerEndTime).toISOString()
const totalDuration = providerEndTime - providerStartTime
logger.error('Error in Responses API request:', {
error,
duration: totalDuration,
})
const enhancedError = new Error(error instanceof Error ? error.message : String(error))
// @ts-ignore - Adding timing property to the error
enhancedError.timing = {
startTime: providerStartTimeISO,
endTime: providerEndTimeISO,
duration: totalDuration,
}
throw enhancedError
}
}
/** /**
* Azure OpenAI provider configuration * Azure OpenAI provider configuration
*/ */
@@ -85,8 +566,7 @@ export const azureOpenAIProvider: ProviderConfig = {
// Extract Azure-specific configuration from request or environment // Extract Azure-specific configuration from request or environment
// Priority: request parameters > environment variables // Priority: request parameters > environment variables
const azureEndpoint = request.azureEndpoint || env.AZURE_OPENAI_ENDPOINT const azureEndpoint = request.azureEndpoint || env.AZURE_OPENAI_ENDPOINT
const azureApiVersion = const azureApiVersion = request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-10-21'
request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-07-01-preview'
if (!azureEndpoint) { if (!azureEndpoint) {
throw new Error( throw new Error(
@@ -101,6 +581,34 @@ export const azureOpenAIProvider: ProviderConfig = {
endpoint: azureEndpoint, endpoint: azureEndpoint,
}) })
// Build deployment name - use deployment name instead of model name
const deploymentName = (request.model || 'azure/gpt-4o').replace('azure/', '')
// Start execution timer for the entire provider execution
const providerStartTime = Date.now()
const providerStartTimeISO = new Date(providerStartTime).toISOString()
// Check if we should use the Responses API (2025+ versions)
if (useResponsesApi(azureApiVersion)) {
logger.info('Using Responses API for Azure OpenAI request', {
apiVersion: azureApiVersion,
model: deploymentName,
})
return executeWithResponsesApi(
azureOpenAI,
request,
deploymentName,
providerStartTime,
providerStartTimeISO
)
}
// Continue with Chat Completions API for 2024 and earlier versions
logger.info('Using Chat Completions API for Azure OpenAI request', {
apiVersion: azureApiVersion,
model: deploymentName,
})
// Start with an empty array for all messages // Start with an empty array for all messages
const allMessages = [] const allMessages = []
@@ -137,8 +645,7 @@ export const azureOpenAIProvider: ProviderConfig = {
})) }))
: undefined : undefined
// Build the request payload - use deployment name instead of model name // Build the request payload
const deploymentName = (request.model || 'azure/gpt-4o').replace('azure/', '')
const payload: any = { const payload: any = {
model: deploymentName, // Azure OpenAI uses deployment name model: deploymentName, // Azure OpenAI uses deployment name
messages: allMessages, messages: allMessages,
@@ -195,23 +702,16 @@ export const azureOpenAIProvider: ProviderConfig = {
} }
} }
// Start execution timer for the entire provider execution
const providerStartTime = Date.now()
const providerStartTimeISO = new Date(providerStartTime).toISOString()
try { try {
// Check if we can stream directly (no tools required)
if (request.stream && (!tools || tools.length === 0)) { if (request.stream && (!tools || tools.length === 0)) {
logger.info('Using streaming response for Azure OpenAI request') logger.info('Using streaming response for Azure OpenAI request')
// Create a streaming request with token usage tracking
const streamResponse = await azureOpenAI.chat.completions.create({ const streamResponse = await azureOpenAI.chat.completions.create({
...payload, ...payload,
stream: true, stream: true,
stream_options: { include_usage: true }, stream_options: { include_usage: true },
}) })
// Start collecting token usage from the stream
const tokenUsage = { const tokenUsage = {
prompt: 0, prompt: 0,
completion: 0, completion: 0,
@@ -220,47 +720,44 @@ export const azureOpenAIProvider: ProviderConfig = {
let _streamContent = '' let _streamContent = ''
// Create a StreamingExecution response with a callback to update content and tokens
const streamingResult = { const streamingResult = {
stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => { stream: createReadableStreamFromChatCompletionsStream(
// Update the execution data with the final content and token usage streamResponse,
_streamContent = content (content, usage) => {
streamingResult.execution.output.content = content _streamContent = content
streamingResult.execution.output.content = content
// Update the timing information with the actual completion time const streamEndTime = Date.now()
const streamEndTime = Date.now() const streamEndTimeISO = new Date(streamEndTime).toISOString()
const streamEndTimeISO = new Date(streamEndTime).toISOString()
if (streamingResult.execution.output.providerTiming) { if (streamingResult.execution.output.providerTiming) {
streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
streamingResult.execution.output.providerTiming.duration = streamingResult.execution.output.providerTiming.duration =
streamEndTime - providerStartTime
// Update the time segment as well
if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
streamEndTime
streamingResult.execution.output.providerTiming.timeSegments[0].duration =
streamEndTime - providerStartTime streamEndTime - providerStartTime
}
}
// Update token usage if available from the stream if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
if (usage) { streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
const newTokens = { streamEndTime
prompt: usage.prompt_tokens || tokenUsage.prompt, streamingResult.execution.output.providerTiming.timeSegments[0].duration =
completion: usage.completion_tokens || tokenUsage.completion, streamEndTime - providerStartTime
total: usage.total_tokens || tokenUsage.total, }
} }
streamingResult.execution.output.tokens = newTokens if (usage) {
const newTokens = {
prompt: usage.prompt_tokens || tokenUsage.prompt,
completion: usage.completion_tokens || tokenUsage.completion,
total: usage.total_tokens || tokenUsage.total,
}
streamingResult.execution.output.tokens = newTokens
}
} }
// We don't need to estimate tokens here as logger.ts will handle that ),
}),
execution: { execution: {
success: true, success: true,
output: { output: {
content: '', // Will be filled by the stream completion callback content: '',
model: request.model, model: request.model,
tokens: tokenUsage, tokens: tokenUsage,
toolCalls: undefined, toolCalls: undefined,
@@ -278,9 +775,8 @@ export const azureOpenAIProvider: ProviderConfig = {
}, },
], ],
}, },
// Cost will be calculated in logger
}, },
logs: [], // No block logs for direct streaming logs: [],
metadata: { metadata: {
startTime: providerStartTimeISO, startTime: providerStartTimeISO,
endTime: new Date().toISOString(), endTime: new Date().toISOString(),
@@ -289,21 +785,16 @@ export const azureOpenAIProvider: ProviderConfig = {
}, },
} as StreamingExecution } as StreamingExecution
// Return the streaming execution object with explicit casting
return streamingResult as StreamingExecution return streamingResult as StreamingExecution
} }
// Make the initial API request
const initialCallTime = Date.now() const initialCallTime = Date.now()
// Track the original tool_choice for forced tool tracking
const originalToolChoice = payload.tool_choice const originalToolChoice = payload.tool_choice
// Track forced tools and their usage
const forcedTools = preparedTools?.forcedTools || [] const forcedTools = preparedTools?.forcedTools || []
let usedForcedTools: string[] = [] let usedForcedTools: string[] = []
// Helper function to check for forced tool usage in responses
const checkForForcedToolUsage = ( const checkForForcedToolUsage = (
response: any, response: any,
toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any } toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any }
@@ -327,7 +818,6 @@ export const azureOpenAIProvider: ProviderConfig = {
const firstResponseTime = Date.now() - initialCallTime const firstResponseTime = Date.now() - initialCallTime
let content = currentResponse.choices[0]?.message?.content || '' let content = currentResponse.choices[0]?.message?.content || ''
// Collect token information but don't calculate costs - that will be done in logger.ts
const tokens = { const tokens = {
prompt: currentResponse.usage?.prompt_tokens || 0, prompt: currentResponse.usage?.prompt_tokens || 0,
completion: currentResponse.usage?.completion_tokens || 0, completion: currentResponse.usage?.completion_tokens || 0,
@@ -337,16 +827,13 @@ export const azureOpenAIProvider: ProviderConfig = {
const toolResults = [] const toolResults = []
const currentMessages = [...allMessages] const currentMessages = [...allMessages]
let iterationCount = 0 let iterationCount = 0
const MAX_ITERATIONS = 10 // Prevent infinite loops const MAX_ITERATIONS = 10
// Track time spent in model vs tools
let modelTime = firstResponseTime let modelTime = firstResponseTime
let toolsTime = 0 let toolsTime = 0
// Track if a forced tool has been used
let hasUsedForcedTool = false let hasUsedForcedTool = false
// Track each model and tool call segment with timestamps
const timeSegments: TimeSegment[] = [ const timeSegments: TimeSegment[] = [
{ {
type: 'model', type: 'model',
@@ -357,11 +844,9 @@ export const azureOpenAIProvider: ProviderConfig = {
}, },
] ]
// Check if a forced tool was used in the first response
checkForForcedToolUsage(currentResponse, originalToolChoice) checkForForcedToolUsage(currentResponse, originalToolChoice)
while (iterationCount < MAX_ITERATIONS) { while (iterationCount < MAX_ITERATIONS) {
// Check for tool calls
const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
if (!toolCallsInResponse || toolCallsInResponse.length === 0) { if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
break break
@@ -371,20 +856,16 @@ export const azureOpenAIProvider: ProviderConfig = {
`Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})` `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
) )
// Track time for tool calls in this batch
const toolsStartTime = Date.now() const toolsStartTime = Date.now()
// Process each tool call
for (const toolCall of toolCallsInResponse) { for (const toolCall of toolCallsInResponse) {
try { try {
const toolName = toolCall.function.name const toolName = toolCall.function.name
const toolArgs = JSON.parse(toolCall.function.arguments) const toolArgs = JSON.parse(toolCall.function.arguments)
// Get the tool from the tools registry
const tool = request.tools?.find((t) => t.id === toolName) const tool = request.tools?.find((t) => t.id === toolName)
if (!tool) continue if (!tool) continue
// Execute the tool
const toolCallStartTime = Date.now() const toolCallStartTime = Date.now()
const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request) const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
@@ -393,7 +874,6 @@ export const azureOpenAIProvider: ProviderConfig = {
const toolCallEndTime = Date.now() const toolCallEndTime = Date.now()
const toolCallDuration = toolCallEndTime - toolCallStartTime const toolCallDuration = toolCallEndTime - toolCallStartTime
// Add to time segments for both success and failure
timeSegments.push({ timeSegments.push({
type: 'tool', type: 'tool',
name: toolName, name: toolName,
@@ -402,13 +882,11 @@ export const azureOpenAIProvider: ProviderConfig = {
duration: toolCallDuration, duration: toolCallDuration,
}) })
// Prepare result content for the LLM
let resultContent: any let resultContent: any
if (result.success) { if (result.success) {
toolResults.push(result.output) toolResults.push(result.output)
resultContent = result.output resultContent = result.output
} else { } else {
// Include error information so LLM can respond appropriately
resultContent = { resultContent = {
error: true, error: true,
message: result.error || 'Tool execution failed', message: result.error || 'Tool execution failed',
@@ -426,7 +904,6 @@ export const azureOpenAIProvider: ProviderConfig = {
success: result.success, success: result.success,
}) })
// Add the tool call and result to messages (both success and failure)
currentMessages.push({ currentMessages.push({
role: 'assistant', role: 'assistant',
content: null, content: null,
@@ -455,48 +932,38 @@ export const azureOpenAIProvider: ProviderConfig = {
} }
} }
// Calculate tool call time for this iteration
const thisToolsTime = Date.now() - toolsStartTime const thisToolsTime = Date.now() - toolsStartTime
toolsTime += thisToolsTime toolsTime += thisToolsTime
// Make the next request with updated messages
const nextPayload = { const nextPayload = {
...payload, ...payload,
messages: currentMessages, messages: currentMessages,
} }
// Update tool_choice based on which forced tools have been used
if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) { if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
// If we have remaining forced tools, get the next one to force
const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool)) const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
if (remainingTools.length > 0) { if (remainingTools.length > 0) {
// Force the next tool
nextPayload.tool_choice = { nextPayload.tool_choice = {
type: 'function', type: 'function',
function: { name: remainingTools[0] }, function: { name: remainingTools[0] },
} }
logger.info(`Forcing next tool: ${remainingTools[0]}`) logger.info(`Forcing next tool: ${remainingTools[0]}`)
} else { } else {
// All forced tools have been used, switch to auto
nextPayload.tool_choice = 'auto' nextPayload.tool_choice = 'auto'
logger.info('All forced tools have been used, switching to auto tool_choice') logger.info('All forced tools have been used, switching to auto tool_choice')
} }
} }
// Time the next model call
const nextModelStartTime = Date.now() const nextModelStartTime = Date.now()
// Make the next request
currentResponse = await azureOpenAI.chat.completions.create(nextPayload) currentResponse = await azureOpenAI.chat.completions.create(nextPayload)
// Check if any forced tools were used in this response
checkForForcedToolUsage(currentResponse, nextPayload.tool_choice) checkForForcedToolUsage(currentResponse, nextPayload.tool_choice)
const nextModelEndTime = Date.now() const nextModelEndTime = Date.now()
const thisModelTime = nextModelEndTime - nextModelStartTime const thisModelTime = nextModelEndTime - nextModelStartTime
// Add to time segments
timeSegments.push({ timeSegments.push({
type: 'model', type: 'model',
name: `Model response (iteration ${iterationCount + 1})`, name: `Model response (iteration ${iterationCount + 1})`,
@@ -505,15 +972,12 @@ export const azureOpenAIProvider: ProviderConfig = {
duration: thisModelTime, duration: thisModelTime,
}) })
// Add to model time
modelTime += thisModelTime modelTime += thisModelTime
// Update content if we have a text response
if (currentResponse.choices[0]?.message?.content) { if (currentResponse.choices[0]?.message?.content) {
content = currentResponse.choices[0].message.content content = currentResponse.choices[0].message.content
} }
// Update token counts
if (currentResponse.usage) { if (currentResponse.usage) {
tokens.prompt += currentResponse.usage.prompt_tokens || 0 tokens.prompt += currentResponse.usage.prompt_tokens || 0
tokens.completion += currentResponse.usage.completion_tokens || 0 tokens.completion += currentResponse.usage.completion_tokens || 0
@@ -523,46 +987,43 @@ export const azureOpenAIProvider: ProviderConfig = {
iterationCount++ iterationCount++
} }
// After all tool processing complete, if streaming was requested, use streaming for the final response
if (request.stream) { if (request.stream) {
logger.info('Using streaming for final response after tool processing') logger.info('Using streaming for final response after tool processing')
// When streaming after tool calls with forced tools, make sure tool_choice is set to 'auto'
// This prevents Azure OpenAI API from trying to force tool usage again in the final streaming response
const streamingPayload = { const streamingPayload = {
...payload, ...payload,
messages: currentMessages, messages: currentMessages,
tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls tool_choice: 'auto',
stream: true, stream: true,
stream_options: { include_usage: true }, stream_options: { include_usage: true },
} }
const streamResponse = await azureOpenAI.chat.completions.create(streamingPayload) const streamResponse = await azureOpenAI.chat.completions.create(streamingPayload)
// Create the StreamingExecution object with all collected data
let _streamContent = '' let _streamContent = ''
const streamingResult = { const streamingResult = {
stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => { stream: createReadableStreamFromChatCompletionsStream(
// Update the execution data with the final content and token usage streamResponse,
_streamContent = content (content, usage) => {
streamingResult.execution.output.content = content _streamContent = content
streamingResult.execution.output.content = content
// Update token usage if available from the stream if (usage) {
if (usage) { const newTokens = {
const newTokens = { prompt: usage.prompt_tokens || tokens.prompt,
prompt: usage.prompt_tokens || tokens.prompt, completion: usage.completion_tokens || tokens.completion,
completion: usage.completion_tokens || tokens.completion, total: usage.total_tokens || tokens.total,
total: usage.total_tokens || tokens.total, }
streamingResult.execution.output.tokens = newTokens
} }
streamingResult.execution.output.tokens = newTokens
} }
}), ),
execution: { execution: {
success: true, success: true,
output: { output: {
content: '', // Will be filled by the callback content: '',
model: request.model, model: request.model,
tokens: { tokens: {
prompt: tokens.prompt, prompt: tokens.prompt,
@@ -597,11 +1058,9 @@ export const azureOpenAIProvider: ProviderConfig = {
}, },
} as StreamingExecution } as StreamingExecution
// Return the streaming execution object with explicit casting
return streamingResult as StreamingExecution return streamingResult as StreamingExecution
} }
// Calculate overall timing
const providerEndTime = Date.now() const providerEndTime = Date.now()
const providerEndTimeISO = new Date(providerEndTime).toISOString() const providerEndTimeISO = new Date(providerEndTime).toISOString()
const totalDuration = providerEndTime - providerStartTime const totalDuration = providerEndTime - providerStartTime
@@ -622,10 +1081,8 @@ export const azureOpenAIProvider: ProviderConfig = {
iterations: iterationCount + 1, iterations: iterationCount + 1,
timeSegments: timeSegments, timeSegments: timeSegments,
}, },
// We're not calculating cost here as it will be handled in logger.ts
} }
} catch (error) { } catch (error) {
// Include timing information even for errors
const providerEndTime = Date.now() const providerEndTime = Date.now()
const providerEndTimeISO = new Date(providerEndTime).toISOString() const providerEndTimeISO = new Date(providerEndTime).toISOString()
const totalDuration = providerEndTime - providerStartTime const totalDuration = providerEndTime - providerStartTime
@@ -635,7 +1092,6 @@ export const azureOpenAIProvider: ProviderConfig = {
duration: totalDuration, duration: totalDuration,
}) })
// Create a new error with timing information
const enhancedError = new Error(error instanceof Error ? error.message : String(error)) const enhancedError = new Error(error instanceof Error ? error.message : String(error))
// @ts-ignore - Adding timing property to the error // @ts-ignore - Adding timing property to the error
enhancedError.timing = { enhancedError.timing = {