fix(azure): conditionally added responses api

2026-01-09 23:17:59 -05:00 · 2025-12-16 21:18:40 -08:00
1 changed files with 559 additions and 103 deletions
--- a/apps/sim/providers/azure-openai/index.ts
+++ b/apps/sim/providers/azure-openai/index.ts
@@ -18,11 +18,70 @@ import { executeTool } from '@/tools'

 const logger = createLogger('AzureOpenAIProvider')

+/**
+ * Determines if the API version uses the Responses API (2025+) or Chat Completions API
+ */
+function useResponsesApi(apiVersion: string): boolean {
+  // 2025-* versions use the Responses API
+  // 2024-* and earlier versions use the Chat Completions API
+  return apiVersion.startsWith('2025-')
+}
+
+/**
+ * Helper function to convert an Azure OpenAI Responses API stream to a standard ReadableStream
+ * and collect completion metrics
+ */
+function createReadableStreamFromResponsesApiStream(
+  responsesStream: any,
+  onComplete?: (content: string, usage?: any) => void
+): ReadableStream {
+  let fullContent = ''
+  let usageData: any = null
+
+  return new ReadableStream({
+    async start(controller) {
+      try {
+        for await (const event of responsesStream) {
+          if (event.usage) {
+            usageData = event.usage
+          }
+
+          if (event.type === 'response.output_text.delta') {
+            const content = event.delta || ''
+            if (content) {
+              fullContent += content
+              controller.enqueue(new TextEncoder().encode(content))
+            }
+          } else if (event.type === 'response.content_part.delta') {
+            const content = event.delta?.text || ''
+            if (content) {
+              fullContent += content
+              controller.enqueue(new TextEncoder().encode(content))
+            }
+          } else if (event.type === 'response.completed' || event.type === 'response.done') {
+            if (event.response?.usage) {
+              usageData = event.response.usage
+            }
+          }
+        }
+
+        if (onComplete) {
+          onComplete(fullContent, usageData)
+        }
+
+        controller.close()
+      } catch (error) {
+        controller.error(error)
+      }
+    },
+  })
+}
+
 /**
 * Helper function to convert an Azure OpenAI stream to a standard ReadableStream
 * and collect completion metrics
 */
-function createReadableStreamFromAzureOpenAIStream(
+function createReadableStreamFromChatCompletionsStream(
  azureOpenAIStream: any,
  onComplete?: (content: string, usage?: any) => void
 ): ReadableStream {
@@ -33,7 +92,6 @@ function createReadableStreamFromAzureOpenAIStream(
    async start(controller) {
      try {
        for await (const chunk of azureOpenAIStream) {
-          // Check for usage data in the final chunk
          if (chunk.usage) {
            usageData = chunk.usage
          }
@@ -45,7 +103,6 @@ function createReadableStreamFromAzureOpenAIStream(
          }
        }

-        // Once stream is complete, call the completion callback with the final content and usage
        if (onComplete) {
          onComplete(fullContent, usageData)
        }
@@ -58,6 +115,430 @@ function createReadableStreamFromAzureOpenAIStream(
  })
 }

+/**
+ * Executes a request using the Responses API (for 2025+ API versions)
+ */
+async function executeWithResponsesApi(
+  azureOpenAI: AzureOpenAI,
+  request: ProviderRequest,
+  deploymentName: string,
+  providerStartTime: number,
+  providerStartTimeISO: string
+): Promise<ProviderResponse | StreamingExecution> {
+  const inputMessages: any[] = []
+
+  if (request.context) {
+    inputMessages.push({
+      role: 'user',
+      content: request.context,
+    })
+  }
+
+  if (request.messages) {
+    inputMessages.push(...request.messages)
+  }
+
+  const tools = request.tools?.length
+    ? request.tools.map((tool) => ({
+        type: 'function' as const,
+        function: {
+          name: tool.id,
+          description: tool.description,
+          parameters: tool.parameters,
+        },
+      }))
+    : undefined
+
+  const payload: any = {
+    model: deploymentName,
+    input: inputMessages.length > 0 ? inputMessages : request.systemPrompt || '',
+  }
+
+  if (request.systemPrompt) {
+    payload.instructions = request.systemPrompt
+  }
+
+  if (request.temperature !== undefined) payload.temperature = request.temperature
+  if (request.maxTokens !== undefined) payload.max_output_tokens = request.maxTokens
+
+  if (request.reasoningEffort !== undefined) {
+    payload.reasoning = { effort: request.reasoningEffort }
+  }
+
+  if (request.responseFormat) {
+    payload.text = {
+      format: {
+        type: 'json_schema',
+        json_schema: {
+          name: request.responseFormat.name || 'response_schema',
+          schema: request.responseFormat.schema || request.responseFormat,
+          strict: request.responseFormat.strict !== false,
+        },
+      },
+    }
+    logger.info('Added JSON schema text format to Responses API request')
+  }
+
+  if (tools?.length) {
+    payload.tools = tools
+
+    const forcedTools = request.tools?.filter((t) => t.usageControl === 'force') || []
+    if (forcedTools.length > 0) {
+      if (forcedTools.length === 1) {
+        payload.tool_choice = {
+          type: 'function',
+          function: { name: forcedTools[0].id },
+        }
+      } else {
+        payload.tool_choice = 'required'
+      }
+    } else {
+      payload.tool_choice = 'auto'
+    }
+
+    logger.info('Responses API request configuration:', {
+      toolCount: tools.length,
+      model: deploymentName,
+    })
+  }
+
+  try {
+    if (request.stream && (!tools || tools.length === 0)) {
+      logger.info('Using streaming response for Responses API request')
+
+      const streamResponse = await (azureOpenAI as any).responses.create({
+        ...payload,
+        stream: true,
+      })
+
+      const tokenUsage = {
+        prompt: 0,
+        completion: 0,
+        total: 0,
+      }
+
+      const streamingResult = {
+        stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => {
+          streamingResult.execution.output.content = content
+
+          const streamEndTime = Date.now()
+          const streamEndTimeISO = new Date(streamEndTime).toISOString()
+
+          if (streamingResult.execution.output.providerTiming) {
+            streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
+            streamingResult.execution.output.providerTiming.duration =
+              streamEndTime - providerStartTime
+
+            if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
+              streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
+                streamEndTime
+              streamingResult.execution.output.providerTiming.timeSegments[0].duration =
+                streamEndTime - providerStartTime
+            }
+          }
+
+          if (usage) {
+            streamingResult.execution.output.tokens = {
+              prompt: usage.input_tokens || usage.prompt_tokens || 0,
+              completion: usage.output_tokens || usage.completion_tokens || 0,
+              total:
+                (usage.input_tokens || usage.prompt_tokens || 0) +
+                (usage.output_tokens || usage.completion_tokens || 0),
+            }
+          }
+        }),
+        execution: {
+          success: true,
+          output: {
+            content: '',
+            model: request.model,
+            tokens: tokenUsage,
+            toolCalls: undefined,
+            providerTiming: {
+              startTime: providerStartTimeISO,
+              endTime: new Date().toISOString(),
+              duration: Date.now() - providerStartTime,
+              timeSegments: [
+                {
+                  type: 'model',
+                  name: 'Streaming response',
+                  startTime: providerStartTime,
+                  endTime: Date.now(),
+                  duration: Date.now() - providerStartTime,
+                },
+              ],
+            },
+          },
+          logs: [],
+          metadata: {
+            startTime: providerStartTimeISO,
+            endTime: new Date().toISOString(),
+            duration: Date.now() - providerStartTime,
+          },
+        },
+      } as StreamingExecution
+
+      return streamingResult
+    }
+
+    const initialCallTime = Date.now()
+    let currentResponse = await (azureOpenAI as any).responses.create(payload)
+    const firstResponseTime = Date.now() - initialCallTime
+
+    let content = currentResponse.output_text || ''
+
+    const tokens = {
+      prompt: currentResponse.usage?.input_tokens || 0,
+      completion: currentResponse.usage?.output_tokens || 0,
+      total:
+        (currentResponse.usage?.input_tokens || 0) + (currentResponse.usage?.output_tokens || 0),
+    }
+
+    const toolCalls: any[] = []
+    const toolResults: any[] = []
+    let iterationCount = 0
+    const MAX_ITERATIONS = 10
+
+    let modelTime = firstResponseTime
+    let toolsTime = 0
+
+    const timeSegments: TimeSegment[] = [
+      {
+        type: 'model',
+        name: 'Initial response',
+        startTime: initialCallTime,
+        endTime: initialCallTime + firstResponseTime,
+        duration: firstResponseTime,
+      },
+    ]
+
+    while (iterationCount < MAX_ITERATIONS) {
+      const toolCallsInResponse =
+        currentResponse.output?.filter((item: any) => item.type === 'function_call') || []
+
+      if (toolCallsInResponse.length === 0) {
+        break
+      }
+
+      logger.info(
+        `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
+      )
+
+      const toolsStartTime = Date.now()
+
+      for (const toolCall of toolCallsInResponse) {
+        try {
+          const toolName = toolCall.name
+          const toolArgs =
+            typeof toolCall.arguments === 'string'
+              ? JSON.parse(toolCall.arguments)
+              : toolCall.arguments
+
+          const tool = request.tools?.find((t) => t.id === toolName)
+          if (!tool) continue
+
+          const toolCallStartTime = Date.now()
+          const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
+
+          const result = await executeTool(toolName, executionParams, true)
+          const toolCallEndTime = Date.now()
+          const toolCallDuration = toolCallEndTime - toolCallStartTime
+
+          timeSegments.push({
+            type: 'tool',
+            name: toolName,
+            startTime: toolCallStartTime,
+            endTime: toolCallEndTime,
+            duration: toolCallDuration,
+          })
+
+          let resultContent: any
+          if (result.success) {
+            toolResults.push(result.output)
+            resultContent = result.output
+          } else {
+            resultContent = {
+              error: true,
+              message: result.error || 'Tool execution failed',
+              tool: toolName,
+            }
+          }
+
+          toolCalls.push({
+            name: toolName,
+            arguments: toolParams,
+            startTime: new Date(toolCallStartTime).toISOString(),
+            endTime: new Date(toolCallEndTime).toISOString(),
+            duration: toolCallDuration,
+            result: resultContent,
+            success: result.success,
+          })
+
+          // Add function call output to input for next request
+          inputMessages.push({
+            type: 'function_call_output',
+            call_id: toolCall.call_id || toolCall.id,
+            output: JSON.stringify(resultContent),
+          })
+        } catch (error) {
+          logger.error('Error processing tool call:', {
+            error,
+            toolName: toolCall?.name,
+          })
+        }
+      }
+
+      const thisToolsTime = Date.now() - toolsStartTime
+      toolsTime += thisToolsTime
+
+      // Make the next request
+      const nextModelStartTime = Date.now()
+      const nextPayload = {
+        ...payload,
+        input: inputMessages,
+        tool_choice: 'auto',
+      }
+
+      currentResponse = await (azureOpenAI as any).responses.create(nextPayload)
+
+      const nextModelEndTime = Date.now()
+      const thisModelTime = nextModelEndTime - nextModelStartTime
+
+      timeSegments.push({
+        type: 'model',
+        name: `Model response (iteration ${iterationCount + 1})`,
+        startTime: nextModelStartTime,
+        endTime: nextModelEndTime,
+        duration: thisModelTime,
+      })
+
+      modelTime += thisModelTime
+
+      // Update content
+      if (currentResponse.output_text) {
+        content = currentResponse.output_text
+      }
+
+      // Update token counts
+      if (currentResponse.usage) {
+        tokens.prompt += currentResponse.usage.input_tokens || 0
+        tokens.completion += currentResponse.usage.output_tokens || 0
+        tokens.total = tokens.prompt + tokens.completion
+      }
+
+      iterationCount++
+    }
+
+    // Handle streaming for final response after tool processing
+    if (request.stream) {
+      logger.info('Using streaming for final response after tool processing (Responses API)')
+
+      const streamingPayload = {
+        ...payload,
+        input: inputMessages,
+        tool_choice: 'auto',
+        stream: true,
+      }
+
+      const streamResponse = await (azureOpenAI as any).responses.create(streamingPayload)
+
+      const streamingResult = {
+        stream: createReadableStreamFromResponsesApiStream(streamResponse, (content, usage) => {
+          streamingResult.execution.output.content = content
+
+          if (usage) {
+            streamingResult.execution.output.tokens = {
+              prompt: usage.input_tokens || tokens.prompt,
+              completion: usage.output_tokens || tokens.completion,
+              total:
+                (usage.input_tokens || tokens.prompt) + (usage.output_tokens || tokens.completion),
+            }
+          }
+        }),
+        execution: {
+          success: true,
+          output: {
+            content: '',
+            model: request.model,
+            tokens: {
+              prompt: tokens.prompt,
+              completion: tokens.completion,
+              total: tokens.total,
+            },
+            toolCalls:
+              toolCalls.length > 0
+                ? {
+                    list: toolCalls,
+                    count: toolCalls.length,
+                  }
+                : undefined,
+            providerTiming: {
+              startTime: providerStartTimeISO,
+              endTime: new Date().toISOString(),
+              duration: Date.now() - providerStartTime,
+              modelTime: modelTime,
+              toolsTime: toolsTime,
+              firstResponseTime: firstResponseTime,
+              iterations: iterationCount + 1,
+              timeSegments: timeSegments,
+            },
+          },
+          logs: [],
+          metadata: {
+            startTime: providerStartTimeISO,
+            endTime: new Date().toISOString(),
+            duration: Date.now() - providerStartTime,
+          },
+        },
+      } as StreamingExecution
+
+      return streamingResult
+    }
+
+    // Calculate overall timing
+    const providerEndTime = Date.now()
+    const providerEndTimeISO = new Date(providerEndTime).toISOString()
+    const totalDuration = providerEndTime - providerStartTime
+
+    return {
+      content,
+      model: request.model,
+      tokens,
+      toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+      toolResults: toolResults.length > 0 ? toolResults : undefined,
+      timing: {
+        startTime: providerStartTimeISO,
+        endTime: providerEndTimeISO,
+        duration: totalDuration,
+        modelTime: modelTime,
+        toolsTime: toolsTime,
+        firstResponseTime: firstResponseTime,
+        iterations: iterationCount + 1,
+        timeSegments: timeSegments,
+      },
+    }
+  } catch (error) {
+    const providerEndTime = Date.now()
+    const providerEndTimeISO = new Date(providerEndTime).toISOString()
+    const totalDuration = providerEndTime - providerStartTime
+
+    logger.error('Error in Responses API request:', {
+      error,
+      duration: totalDuration,
+    })
+
+    const enhancedError = new Error(error instanceof Error ? error.message : String(error))
+    // @ts-ignore - Adding timing property to the error
+    enhancedError.timing = {
+      startTime: providerStartTimeISO,
+      endTime: providerEndTimeISO,
+      duration: totalDuration,
+    }
+
+    throw enhancedError
+  }
+}
+
 /**
 * Azure OpenAI provider configuration
 */
@@ -85,8 +566,7 @@ export const azureOpenAIProvider: ProviderConfig = {
    // Extract Azure-specific configuration from request or environment
    // Priority: request parameters > environment variables
    const azureEndpoint = request.azureEndpoint || env.AZURE_OPENAI_ENDPOINT
-    const azureApiVersion =
-      request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-07-01-preview'
+    const azureApiVersion = request.azureApiVersion || env.AZURE_OPENAI_API_VERSION || '2024-10-21'

    if (!azureEndpoint) {
      throw new Error(
@@ -101,6 +581,34 @@ export const azureOpenAIProvider: ProviderConfig = {
      endpoint: azureEndpoint,
    })

+    // Build deployment name - use deployment name instead of model name
+    const deploymentName = (request.model || 'azure/gpt-4o').replace('azure/', '')
+
+    // Start execution timer for the entire provider execution
+    const providerStartTime = Date.now()
+    const providerStartTimeISO = new Date(providerStartTime).toISOString()
+
+    // Check if we should use the Responses API (2025+ versions)
+    if (useResponsesApi(azureApiVersion)) {
+      logger.info('Using Responses API for Azure OpenAI request', {
+        apiVersion: azureApiVersion,
+        model: deploymentName,
+      })
+      return executeWithResponsesApi(
+        azureOpenAI,
+        request,
+        deploymentName,
+        providerStartTime,
+        providerStartTimeISO
+      )
+    }
+
+    // Continue with Chat Completions API for 2024 and earlier versions
+    logger.info('Using Chat Completions API for Azure OpenAI request', {
+      apiVersion: azureApiVersion,
+      model: deploymentName,
+    })
+
    // Start with an empty array for all messages
    const allMessages = []

@@ -137,8 +645,7 @@ export const azureOpenAIProvider: ProviderConfig = {
        }))
      : undefined

-    // Build the request payload - use deployment name instead of model name
-    const deploymentName = (request.model || 'azure/gpt-4o').replace('azure/', '')
+    // Build the request payload
    const payload: any = {
      model: deploymentName, // Azure OpenAI uses deployment name
      messages: allMessages,
@@ -195,23 +702,16 @@ export const azureOpenAIProvider: ProviderConfig = {
      }
    }

-    // Start execution timer for the entire provider execution
-    const providerStartTime = Date.now()
-    const providerStartTimeISO = new Date(providerStartTime).toISOString()
-
    try {
-      // Check if we can stream directly (no tools required)
      if (request.stream && (!tools || tools.length === 0)) {
        logger.info('Using streaming response for Azure OpenAI request')

-        // Create a streaming request with token usage tracking
        const streamResponse = await azureOpenAI.chat.completions.create({
          ...payload,
          stream: true,
          stream_options: { include_usage: true },
        })

-        // Start collecting token usage from the stream
        const tokenUsage = {
          prompt: 0,
          completion: 0,
@@ -220,47 +720,44 @@ export const azureOpenAIProvider: ProviderConfig = {

        let _streamContent = ''

-        // Create a StreamingExecution response with a callback to update content and tokens
        const streamingResult = {
-          stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => {
-            // Update the execution data with the final content and token usage
-            _streamContent = content
-            streamingResult.execution.output.content = content
+          stream: createReadableStreamFromChatCompletionsStream(
+            streamResponse,
+            (content, usage) => {
+              _streamContent = content
+              streamingResult.execution.output.content = content

-            // Update the timing information with the actual completion time
-            const streamEndTime = Date.now()
-            const streamEndTimeISO = new Date(streamEndTime).toISOString()
+              const streamEndTime = Date.now()
+              const streamEndTimeISO = new Date(streamEndTime).toISOString()

-            if (streamingResult.execution.output.providerTiming) {
-              streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
-              streamingResult.execution.output.providerTiming.duration =
-                streamEndTime - providerStartTime
-
-              // Update the time segment as well
-              if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
-                streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
-                  streamEndTime
-                streamingResult.execution.output.providerTiming.timeSegments[0].duration =
+              if (streamingResult.execution.output.providerTiming) {
+                streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
+                streamingResult.execution.output.providerTiming.duration =
                  streamEndTime - providerStartTime
-              }
-            }

-            // Update token usage if available from the stream
-            if (usage) {
-              const newTokens = {
-                prompt: usage.prompt_tokens || tokenUsage.prompt,
-                completion: usage.completion_tokens || tokenUsage.completion,
-                total: usage.total_tokens || tokenUsage.total,
+                if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
+                  streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
+                    streamEndTime
+                  streamingResult.execution.output.providerTiming.timeSegments[0].duration =
+                    streamEndTime - providerStartTime
+                }
              }

-              streamingResult.execution.output.tokens = newTokens
+              if (usage) {
+                const newTokens = {
+                  prompt: usage.prompt_tokens || tokenUsage.prompt,
+                  completion: usage.completion_tokens || tokenUsage.completion,
+                  total: usage.total_tokens || tokenUsage.total,
+                }
+
+                streamingResult.execution.output.tokens = newTokens
+              }
            }
-            // We don't need to estimate tokens here as logger.ts will handle that
-          }),
+          ),
          execution: {
            success: true,
            output: {
-              content: '', // Will be filled by the stream completion callback
+              content: '',
              model: request.model,
              tokens: tokenUsage,
              toolCalls: undefined,
@@ -278,9 +775,8 @@ export const azureOpenAIProvider: ProviderConfig = {
                  },
                ],
              },
-              // Cost will be calculated in logger
            },
-            logs: [], // No block logs for direct streaming
+            logs: [],
            metadata: {
              startTime: providerStartTimeISO,
              endTime: new Date().toISOString(),
@@ -289,21 +785,16 @@ export const azureOpenAIProvider: ProviderConfig = {
          },
        } as StreamingExecution

-        // Return the streaming execution object with explicit casting
        return streamingResult as StreamingExecution
      }

-      // Make the initial API request
      const initialCallTime = Date.now()

-      // Track the original tool_choice for forced tool tracking
      const originalToolChoice = payload.tool_choice

-      // Track forced tools and their usage
      const forcedTools = preparedTools?.forcedTools || []
      let usedForcedTools: string[] = []

-      // Helper function to check for forced tool usage in responses
      const checkForForcedToolUsage = (
        response: any,
        toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any }
@@ -327,7 +818,6 @@ export const azureOpenAIProvider: ProviderConfig = {
      const firstResponseTime = Date.now() - initialCallTime

      let content = currentResponse.choices[0]?.message?.content || ''
-      // Collect token information but don't calculate costs - that will be done in logger.ts
      const tokens = {
        prompt: currentResponse.usage?.prompt_tokens || 0,
        completion: currentResponse.usage?.completion_tokens || 0,
@@ -337,16 +827,13 @@ export const azureOpenAIProvider: ProviderConfig = {
      const toolResults = []
      const currentMessages = [...allMessages]
      let iterationCount = 0
-      const MAX_ITERATIONS = 10 // Prevent infinite loops
+      const MAX_ITERATIONS = 10

-      // Track time spent in model vs tools
      let modelTime = firstResponseTime
      let toolsTime = 0

-      // Track if a forced tool has been used
      let hasUsedForcedTool = false

-      // Track each model and tool call segment with timestamps
      const timeSegments: TimeSegment[] = [
        {
          type: 'model',
@@ -357,11 +844,9 @@ export const azureOpenAIProvider: ProviderConfig = {
        },
      ]

-      // Check if a forced tool was used in the first response
      checkForForcedToolUsage(currentResponse, originalToolChoice)

      while (iterationCount < MAX_ITERATIONS) {
-        // Check for tool calls
        const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
        if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
          break
@@ -371,20 +856,16 @@ export const azureOpenAIProvider: ProviderConfig = {
          `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
        )

-        // Track time for tool calls in this batch
        const toolsStartTime = Date.now()

-        // Process each tool call
        for (const toolCall of toolCallsInResponse) {
          try {
            const toolName = toolCall.function.name
            const toolArgs = JSON.parse(toolCall.function.arguments)

-            // Get the tool from the tools registry
            const tool = request.tools?.find((t) => t.id === toolName)
            if (!tool) continue

-            // Execute the tool
            const toolCallStartTime = Date.now()

            const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
@@ -393,7 +874,6 @@ export const azureOpenAIProvider: ProviderConfig = {
            const toolCallEndTime = Date.now()
            const toolCallDuration = toolCallEndTime - toolCallStartTime

-            // Add to time segments for both success and failure
            timeSegments.push({
              type: 'tool',
              name: toolName,
@@ -402,13 +882,11 @@ export const azureOpenAIProvider: ProviderConfig = {
              duration: toolCallDuration,
            })

-            // Prepare result content for the LLM
            let resultContent: any
            if (result.success) {
              toolResults.push(result.output)
              resultContent = result.output
            } else {
-              // Include error information so LLM can respond appropriately
              resultContent = {
                error: true,
                message: result.error || 'Tool execution failed',
@@ -426,7 +904,6 @@ export const azureOpenAIProvider: ProviderConfig = {
              success: result.success,
            })

-            // Add the tool call and result to messages (both success and failure)
            currentMessages.push({
              role: 'assistant',
              content: null,
@@ -455,48 +932,38 @@ export const azureOpenAIProvider: ProviderConfig = {
          }
        }

-        // Calculate tool call time for this iteration
        const thisToolsTime = Date.now() - toolsStartTime
        toolsTime += thisToolsTime

-        // Make the next request with updated messages
        const nextPayload = {
          ...payload,
          messages: currentMessages,
        }

-        // Update tool_choice based on which forced tools have been used
        if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
-          // If we have remaining forced tools, get the next one to force
          const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))

          if (remainingTools.length > 0) {
-            // Force the next tool
            nextPayload.tool_choice = {
              type: 'function',
              function: { name: remainingTools[0] },
            }
            logger.info(`Forcing next tool: ${remainingTools[0]}`)
          } else {
-            // All forced tools have been used, switch to auto
            nextPayload.tool_choice = 'auto'
            logger.info('All forced tools have been used, switching to auto tool_choice')
          }
        }

-        // Time the next model call
        const nextModelStartTime = Date.now()

-        // Make the next request
        currentResponse = await azureOpenAI.chat.completions.create(nextPayload)

-        // Check if any forced tools were used in this response
        checkForForcedToolUsage(currentResponse, nextPayload.tool_choice)

        const nextModelEndTime = Date.now()
        const thisModelTime = nextModelEndTime - nextModelStartTime

-        // Add to time segments
        timeSegments.push({
          type: 'model',
          name: `Model response (iteration ${iterationCount + 1})`,
@@ -505,15 +972,12 @@ export const azureOpenAIProvider: ProviderConfig = {
          duration: thisModelTime,
        })

-        // Add to model time
        modelTime += thisModelTime

-        // Update content if we have a text response
        if (currentResponse.choices[0]?.message?.content) {
          content = currentResponse.choices[0].message.content
        }

-        // Update token counts
        if (currentResponse.usage) {
          tokens.prompt += currentResponse.usage.prompt_tokens || 0
          tokens.completion += currentResponse.usage.completion_tokens || 0
@@ -523,46 +987,43 @@ export const azureOpenAIProvider: ProviderConfig = {
        iterationCount++
      }

-      // After all tool processing complete, if streaming was requested, use streaming for the final response
      if (request.stream) {
        logger.info('Using streaming for final response after tool processing')

-        // When streaming after tool calls with forced tools, make sure tool_choice is set to 'auto'
-        // This prevents Azure OpenAI API from trying to force tool usage again in the final streaming response
        const streamingPayload = {
          ...payload,
          messages: currentMessages,
-          tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls
+          tool_choice: 'auto',
          stream: true,
          stream_options: { include_usage: true },
        }

        const streamResponse = await azureOpenAI.chat.completions.create(streamingPayload)

-        // Create the StreamingExecution object with all collected data
        let _streamContent = ''

        const streamingResult = {
-          stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => {
-            // Update the execution data with the final content and token usage
-            _streamContent = content
-            streamingResult.execution.output.content = content
+          stream: createReadableStreamFromChatCompletionsStream(
+            streamResponse,
+            (content, usage) => {
+              _streamContent = content
+              streamingResult.execution.output.content = content

-            // Update token usage if available from the stream
-            if (usage) {
-              const newTokens = {
-                prompt: usage.prompt_tokens || tokens.prompt,
-                completion: usage.completion_tokens || tokens.completion,
-                total: usage.total_tokens || tokens.total,
+              if (usage) {
+                const newTokens = {
+                  prompt: usage.prompt_tokens || tokens.prompt,
+                  completion: usage.completion_tokens || tokens.completion,
+                  total: usage.total_tokens || tokens.total,
+                }
+
+                streamingResult.execution.output.tokens = newTokens
              }
-
-              streamingResult.execution.output.tokens = newTokens
            }
-          }),
+          ),
          execution: {
            success: true,
            output: {
-              content: '', // Will be filled by the callback
+              content: '',
              model: request.model,
              tokens: {
                prompt: tokens.prompt,
@@ -597,11 +1058,9 @@ export const azureOpenAIProvider: ProviderConfig = {
          },
        } as StreamingExecution

-        // Return the streaming execution object with explicit casting
        return streamingResult as StreamingExecution
      }

-      // Calculate overall timing
      const providerEndTime = Date.now()
      const providerEndTimeISO = new Date(providerEndTime).toISOString()
      const totalDuration = providerEndTime - providerStartTime
@@ -622,10 +1081,8 @@ export const azureOpenAIProvider: ProviderConfig = {
          iterations: iterationCount + 1,
          timeSegments: timeSegments,
        },
-        // We're not calculating cost here as it will be handled in logger.ts
      }
    } catch (error) {
-      // Include timing information even for errors
      const providerEndTime = Date.now()
      const providerEndTimeISO = new Date(providerEndTime).toISOString()
      const totalDuration = providerEndTime - providerStartTime
@@ -635,7 +1092,6 @@ export const azureOpenAIProvider: ProviderConfig = {
        duration: totalDuration,
      })

-      // Create a new error with timing information
      const enhancedError = new Error(error instanceof Error ? error.message : String(error))
      // @ts-ignore - Adding timing property to the error
      enhancedError.timing = {