feat(ollama): added streaming & tool call support for ollama, updated docs (#884)

2026-01-09 15:07:55 -05:00 · 2025-08-05 15:04:50 -07:00
parent be65bf795f
commit 746b87743a
14 changed files with 893 additions and 332 deletions
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -164,10 +164,14 @@ Access the application at [http://localhost:3000/](http://localhost:3000/)
 To use local models with Sim:
-1. Pull models using our helper script:
+1. Install Ollama and pull models:
 ```bash
-./apps/sim/scripts/ollama_docker.sh pull <model_name>
+# Install Ollama (if not already installed)
 curl -fsSL https://ollama.ai/install.sh | sh
 # Pull a model (e.g., gemma3:4b)
 ollama pull gemma3:4b
 ```
 2. Start Sim with local model support:
--- a/README.md
+++ b/README.md
@@ -59,27 +59,21 @@ docker compose -f docker-compose.prod.yml up -d
 Access the application at [http://localhost:3000/](http://localhost:3000/)
-#### Using Local Models
+#### Using Local Models with Ollama
-To use local models with Sim:
+Run Sim with local AI models using [Ollama](https://ollama.ai) - no external APIs required:
 1. Pull models using our helper script:
 ```bash
-./apps/sim/scripts/ollama_docker.sh pull <model_name>
+# Start with GPU support (automatically downloads gemma3:4b model)
 docker compose -f docker-compose.ollama.yml --profile setup up -d
 # For CPU-only systems:
 docker compose -f docker-compose.ollama.yml --profile cpu --profile setup up -d
 ```
-2. Start Sim with local model support:
+Wait for the model to download, then visit [http://localhost:3000](http://localhost:3000). Add more models with:
 ```bash
-# With NVIDIA GPU support
+docker compose -f docker-compose.ollama.yml exec ollama ollama pull llama3.1:8b
 docker compose --profile local-gpu -f docker-compose.ollama.yml up -d
 # Without GPU (CPU only)
 docker compose --profile local-cpu -f docker-compose.ollama.yml up -d
 # If hosting on a server, update the environment variables in the docker-compose.prod.yml file to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434)
 docker compose -f docker-compose.prod.yml up -d
 ```
 ### Option 3: Dev Containers
--- a/apps/sim/.env.example
+++ b/apps/sim/.env.example
@@ -15,3 +15,6 @@ ENCRYPTION_KEY=your_encryption_key  # Use `openssl rand -hex 32` to generate
 # RESEND_API_KEY=  # Uncomment and add your key from https://resend.com to send actual emails
                   # If left commented out, emails will be logged to console instead
 # Local AI Models (Optional)
 # OLLAMA_URL=http://localhost:11434  # URL for local Ollama server - uncomment if using local models
--- a/apps/sim/app/api/providers/ollama/models/route.ts
+++ b/apps/sim/app/api/providers/ollama/models/route.ts
@@ -0,0 +1,52 @@
 import { type NextRequest, NextResponse } from 'next/server'
 import { env } from '@/lib/env'
 import { createLogger } from '@/lib/logs/console/logger'
 import type { ModelsObject } from '@/providers/ollama/types'
 const logger = createLogger('OllamaModelsAPI')
 const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
 export const dynamic = 'force-dynamic'
 /**
 * Get available Ollama models
 */
 export async function GET(request: NextRequest) {
  try {
    logger.info('Fetching Ollama models', {
      host: OLLAMA_HOST,
    })
    const response = await fetch(`${OLLAMA_HOST}/api/tags`, {
      headers: {
        'Content-Type': 'application/json',
      },
    })
    if (!response.ok) {
      logger.warn('Ollama service is not available', {
        status: response.status,
        statusText: response.statusText,
      })
      return NextResponse.json({ models: [] })
    }
    const data = (await response.json()) as ModelsObject
    const models = data.models.map((model) => model.name)
    logger.info('Successfully fetched Ollama models', {
      count: models.length,
      models,
    })
    return NextResponse.json({ models })
  } catch (error) {
    logger.error('Failed to fetch Ollama models', {
      error: error instanceof Error ? error.message : 'Unknown error',
      host: OLLAMA_HOST,
    })
    // Return empty array instead of error to avoid breaking the UI
    return NextResponse.json({ models: [] })
  }
 }
--- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx
@@ -405,33 +405,37 @@ export function WorkflowBlock({ id, data }: NodeProps<WorkflowBlockProps>) {
      // If there's no condition, the block should be shown
      if (!block.condition) return true
      // If condition is a function, call it to get the actual condition object
      const actualCondition =
        typeof block.condition === 'function' ? block.condition() : block.condition
      // Get the values of the fields this block depends on from the appropriate state
-      const fieldValue = stateToUse[block.condition.field]?.value
+      const fieldValue = stateToUse[actualCondition.field]?.value
-      const andFieldValue = block.condition.and
+      const andFieldValue = actualCondition.and
-        ? stateToUse[block.condition.and.field]?.value
+        ? stateToUse[actualCondition.and.field]?.value
        : undefined
      // Check if the condition value is an array
-      const isValueMatch = Array.isArray(block.condition.value)
+      const isValueMatch = Array.isArray(actualCondition.value)
        ? fieldValue != null &&
-          (block.condition.not
+          (actualCondition.not
-            ? !block.condition.value.includes(fieldValue as string | number | boolean)
+            ? !actualCondition.value.includes(fieldValue as string | number | boolean)
-            : block.condition.value.includes(fieldValue as string | number | boolean))
+            : actualCondition.value.includes(fieldValue as string | number | boolean))
-        : block.condition.not
+        : actualCondition.not
-          ? fieldValue !== block.condition.value
+          ? fieldValue !== actualCondition.value
-          : fieldValue === block.condition.value
+          : fieldValue === actualCondition.value
      // Check both conditions if 'and' is present
      const isAndValueMatch =
-        !block.condition.and ||
+        !actualCondition.and ||
-        (Array.isArray(block.condition.and.value)
+        (Array.isArray(actualCondition.and.value)
          ? andFieldValue != null &&
-            (block.condition.and.not
+            (actualCondition.and.not
-              ? !block.condition.and.value.includes(andFieldValue as string | number | boolean)
+              ? !actualCondition.and.value.includes(andFieldValue as string | number | boolean)
-              : block.condition.and.value.includes(andFieldValue as string | number | boolean))
+              : actualCondition.and.value.includes(andFieldValue as string | number | boolean))
-          : block.condition.and.not
+          : actualCondition.and.not
-            ? andFieldValue !== block.condition.and.value
+            ? andFieldValue !== actualCondition.and.value
-            : andFieldValue === block.condition.and.value)
+            : andFieldValue === actualCondition.and.value)
      return isValueMatch && isAndValueMatch
    })
--- a/apps/sim/blocks/blocks/agent.ts
+++ b/apps/sim/blocks/blocks/agent.ts
@@ -12,6 +12,12 @@ import {
  MODELS_WITH_TEMPERATURE_SUPPORT,
  providers,
 } from '@/providers/utils'
 // Get current Ollama models dynamically
 const getCurrentOllamaModels = () => {
  return useOllamaStore.getState().models
 }
 import { useOllamaStore } from '@/stores/ollama/store'
 import type { ToolResponse } from '@/tools/types'
@@ -213,14 +219,18 @@ Create a system prompt appropriately detailed for the request, using clear langu
      password: true,
      connectionDroppable: false,
      required: true,
-      // Hide API key for all hosted models when running on hosted version
+      // Hide API key for hosted models and Ollama models
      condition: isHosted
        ? {
            field: 'model',
            value: getHostedModels(),
            not: true, // Show for all models EXCEPT those listed
          }
-        : undefined, // Show for all models in non-hosted environments
+        : () => ({
            field: 'model',
            value: getCurrentOllamaModels(),
            not: true, // Show for all models EXCEPT Ollama models
          }),
    },
    {
      id: 'azureEndpoint',
--- a/apps/sim/blocks/types.ts
+++ b/apps/sim/blocks/types.ts
@@ -118,7 +118,8 @@ export interface SubBlockConfig {
  hidden?: boolean
  description?: string
  value?: (params: Record<string, any>) => string
-  condition?: {
+  condition?:
    | {
        field: string
        value: string | number | boolean | Array<string | number | boolean>
        not?: boolean
@@ -128,6 +129,16 @@ export interface SubBlockConfig {
          not?: boolean
        }
      }
    | (() => {
        field: string
        value: string | number | boolean | Array<string | number | boolean>
        not?: boolean
        and?: {
          field: string
          value: string | number | boolean | Array<string | number | boolean> | undefined
          not?: boolean
        }
      })
  // Props specific to 'code' sub-block type
  language?: 'javascript' | 'json'
  generationType?: GenerationType
--- a/apps/sim/executor/resolver/resolver.ts
+++ b/apps/sim/executor/resolver/resolver.ts
@@ -58,7 +58,7 @@ export class InputResolver {
  /**
   * Evaluates if a sub-block should be active based on its condition
-   * @param condition - The condition to evaluate
+   * @param condition - The condition to evaluate (can be static object or function)
   * @param currentValues - Current values of all inputs
   * @returns True if the sub-block should be active
   */
@@ -70,37 +70,46 @@ export class InputResolver {
          not?: boolean
          and?: { field: string; value: any; not?: boolean }
        }
      | (() => {
          field: string
          value: any
          not?: boolean
          and?: { field: string; value: any; not?: boolean }
        })
      | undefined,
    currentValues: Record<string, any>
  ): boolean {
    if (!condition) return true
    // If condition is a function, call it to get the actual condition object
    const actualCondition = typeof condition === 'function' ? condition() : condition
    // Get the field value
-    const fieldValue = currentValues[condition.field]
+    const fieldValue = currentValues[actualCondition.field]
    // Check if the condition value is an array
-    const isValueMatch = Array.isArray(condition.value)
+    const isValueMatch = Array.isArray(actualCondition.value)
      ? fieldValue != null &&
-        (condition.not
+        (actualCondition.not
-          ? !condition.value.includes(fieldValue)
+          ? !actualCondition.value.includes(fieldValue)
-          : condition.value.includes(fieldValue))
+          : actualCondition.value.includes(fieldValue))
-      : condition.not
+      : actualCondition.not
-        ? fieldValue !== condition.value
+        ? fieldValue !== actualCondition.value
-        : fieldValue === condition.value
+        : fieldValue === actualCondition.value
    // Check both conditions if 'and' is present
    const isAndValueMatch =
-      !condition.and ||
+      !actualCondition.and ||
      (() => {
-        const andFieldValue = currentValues[condition.and!.field]
+        const andFieldValue = currentValues[actualCondition.and!.field]
-        return Array.isArray(condition.and!.value)
+        return Array.isArray(actualCondition.and!.value)
          ? andFieldValue != null &&
-              (condition.and!.not
+              (actualCondition.and!.not
-                ? !condition.and!.value.includes(andFieldValue)
+                ? !actualCondition.and!.value.includes(andFieldValue)
-                : condition.and!.value.includes(andFieldValue))
+                : actualCondition.and!.value.includes(andFieldValue))
-          : condition.and!.not
+          : actualCondition.and!.not
-            ? andFieldValue !== condition.and!.value
+            ? andFieldValue !== actualCondition.and!.value
-            : andFieldValue === condition.and!.value
+            : andFieldValue === actualCondition.and!.value
      })()
    return isValueMatch && isAndValueMatch
--- a/apps/sim/providers/ollama/index.ts
+++ b/apps/sim/providers/ollama/index.ts
@@ -1,6 +1,7 @@
 import OpenAI from 'openai'
 import { env } from '@/lib/env'
 import { createLogger } from '@/lib/logs/console/logger'
 import type { StreamingExecution } from '@/executor/types'
 import type { ModelsObject } from '@/providers/ollama/types'
 import type {
  ProviderConfig,
@@ -8,12 +9,57 @@ import type {
  ProviderResponse,
  TimeSegment,
 } from '@/providers/types'
 import {
  prepareToolExecution,
  prepareToolsWithUsageControl,
  trackForcedToolUsage,
 } from '@/providers/utils'
 import { useOllamaStore } from '@/stores/ollama/store'
 import { executeTool } from '@/tools'
 const logger = createLogger('OllamaProvider')
 const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
 /**
 * Helper function to convert an Ollama stream to a standard ReadableStream
 * and collect completion metrics
 */
 function createReadableStreamFromOllamaStream(
  ollamaStream: any,
  onComplete?: (content: string, usage?: any) => void
 ): ReadableStream {
  let fullContent = ''
  let usageData: any = null
  return new ReadableStream({
    async start(controller) {
      try {
        for await (const chunk of ollamaStream) {
          // Check for usage data in the final chunk
          if (chunk.usage) {
            usageData = chunk.usage
          }
          const content = chunk.choices[0]?.delta?.content || ''
          if (content) {
            fullContent += content
            controller.enqueue(new TextEncoder().encode(content))
          }
        }
        // Once stream is complete, call the completion callback with the final content and usage
        if (onComplete) {
          onComplete(fullContent, usageData)
        }
        controller.close()
      } catch (error) {
        controller.error(error)
      }
    },
  })
 }
 export const ollamaProvider: ProviderConfig = {
  id: 'ollama',
  name: 'Ollama',
@@ -46,20 +92,20 @@ export const ollamaProvider: ProviderConfig = {
    }
  },
-  executeRequest: async (request: ProviderRequest): Promise<ProviderResponse> => {
+  executeRequest: async (
    request: ProviderRequest
  ): Promise<ProviderResponse | StreamingExecution> => {
    logger.info('Preparing Ollama request', {
      model: request.model,
      hasSystemPrompt: !!request.systemPrompt,
-      hasMessages: !!request.context,
+      hasMessages: !!request.messages?.length,
      hasTools: !!request.tools?.length,
      toolCount: request.tools?.length || 0,
      hasResponseFormat: !!request.responseFormat,
      stream: !!request.stream,
    })
-    const startTime = Date.now()
+    // Create Ollama client using OpenAI-compatible API
    try {
      // Prepare messages array
    const ollama = new OpenAI({
      apiKey: 'empty',
      baseURL: `${OLLAMA_HOST}/v1`,
@@ -70,12 +116,18 @@ export const ollamaProvider: ProviderConfig = {
    // Add system prompt if present
    if (request.systemPrompt) {
-        allMessages.push({ role: 'system', content: request.systemPrompt })
+      allMessages.push({
        role: 'system',
        content: request.systemPrompt,
      })
    }
    // Add context if present
    if (request.context) {
-        allMessages.push({ role: 'user', content: request.context })
+      allMessages.push({
        role: 'user',
        content: request.context,
      })
    }
    // Add remaining messages
@@ -83,17 +135,6 @@ export const ollamaProvider: ProviderConfig = {
      allMessages.push(...request.messages)
    }
      // Build the basic payload
      const payload: any = {
        model: request.model,
        messages: allMessages,
        stream: false,
      }
      // Add optional parameters
      if (request.temperature !== undefined) payload.temperature = request.temperature
      if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
    // Transform tools to OpenAI format if provided
    const tools = request.tools?.length
      ? request.tools.map((tool) => ({
@@ -106,31 +147,183 @@ export const ollamaProvider: ProviderConfig = {
        }))
      : undefined
-      // Handle tools and tool usage control
+    // Build the request payload
-      if (tools?.length) {
+    const payload: any = {
-        // Filter out any tools with usageControl='none', but ignore 'force' since Ollama doesn't support it
+      model: request.model,
-        const filteredTools = tools.filter((tool) => {
+      messages: allMessages,
-          const toolId = tool.function?.name
+    }
          const toolConfig = request.tools?.find((t) => t.id === toolId)
          // Only filter out 'none', treat 'force' as 'auto'
          return toolConfig?.usageControl !== 'none'
        })
-        if (filteredTools?.length) {
+    // Add optional parameters
    if (request.temperature !== undefined) payload.temperature = request.temperature
    if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
    // Add response format for structured output if specified
    if (request.responseFormat) {
      // Use OpenAI's JSON schema format (Ollama supports this)
      payload.response_format = {
        type: 'json_schema',
        json_schema: {
          name: request.responseFormat.name || 'response_schema',
          schema: request.responseFormat.schema || request.responseFormat,
          strict: request.responseFormat.strict !== false,
        },
      }
      logger.info('Added JSON schema response format to Ollama request')
    }
    // Handle tools and tool usage control
    let preparedTools: ReturnType<typeof prepareToolsWithUsageControl> | null = null
    if (tools?.length) {
      preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'ollama')
      const { tools: filteredTools, toolChoice } = preparedTools
      if (filteredTools?.length && toolChoice) {
        payload.tools = filteredTools
-          // Always use 'auto' for Ollama, regardless of the tool_choice setting
+        // Ollama supports 'auto' but not forced tool selection - convert 'force' to 'auto'
-          payload.tool_choice = 'auto'
+        payload.tool_choice = typeof toolChoice === 'string' ? toolChoice : 'auto'
        logger.info('Ollama request configuration:', {
          toolCount: filteredTools.length,
-            toolChoice: 'auto', // Ollama always uses auto
+          toolChoice: payload.tool_choice,
          model: request.model,
        })
      }
    }
    // Start execution timer for the entire provider execution
    const providerStartTime = Date.now()
    const providerStartTimeISO = new Date(providerStartTime).toISOString()
    try {
      // Check if we can stream directly (no tools required)
      if (request.stream && (!tools || tools.length === 0)) {
        logger.info('Using streaming response for Ollama request')
        // Create a streaming request with token usage tracking
        const streamResponse = await ollama.chat.completions.create({
          ...payload,
          stream: true,
          stream_options: { include_usage: true },
        })
        // Start collecting token usage from the stream
        const tokenUsage = {
          prompt: 0,
          completion: 0,
          total: 0,
        }
        // Create a StreamingExecution response with a callback to update content and tokens
        const streamingResult = {
          stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
            // Update the execution data with the final content and token usage
            streamingResult.execution.output.content = content
            // Clean up the response content
            if (content) {
              streamingResult.execution.output.content = content
                .replace(/```json\n?|\n?```/g, '')
                .trim()
            }
            // Update the timing information with the actual completion time
            const streamEndTime = Date.now()
            const streamEndTimeISO = new Date(streamEndTime).toISOString()
            if (streamingResult.execution.output.providerTiming) {
              streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
              streamingResult.execution.output.providerTiming.duration =
                streamEndTime - providerStartTime
              // Update the time segment as well
              if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
                streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
                  streamEndTime
                streamingResult.execution.output.providerTiming.timeSegments[0].duration =
                  streamEndTime - providerStartTime
              }
            }
            // Update token usage if available from the stream
            if (usage) {
              const newTokens = {
                prompt: usage.prompt_tokens || tokenUsage.prompt,
                completion: usage.completion_tokens || tokenUsage.completion,
                total: usage.total_tokens || tokenUsage.total,
              }
              streamingResult.execution.output.tokens = newTokens
            }
          }),
          execution: {
            success: true,
            output: {
              content: '', // Will be filled by the stream completion callback
              model: request.model,
              tokens: tokenUsage,
              toolCalls: undefined,
              providerTiming: {
                startTime: providerStartTimeISO,
                endTime: new Date().toISOString(),
                duration: Date.now() - providerStartTime,
                timeSegments: [
                  {
                    type: 'model',
                    name: 'Streaming response',
                    startTime: providerStartTime,
                    endTime: Date.now(),
                    duration: Date.now() - providerStartTime,
                  },
                ],
              },
            },
            logs: [], // No block logs for direct streaming
            metadata: {
              startTime: providerStartTimeISO,
              endTime: new Date().toISOString(),
              duration: Date.now() - providerStartTime,
            },
          },
        } as StreamingExecution
        // Return the streaming execution object
        return streamingResult as StreamingExecution
      }
      // Make the initial API request
      const initialCallTime = Date.now()
      // Track the original tool_choice for forced tool tracking
      const originalToolChoice = payload.tool_choice
      // Track forced tools and their usage
      const forcedTools = preparedTools?.forcedTools || []
      let usedForcedTools: string[] = []
      // Helper function to check for forced tool usage in responses
      const checkForForcedToolUsage = (
        response: any,
        toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any }
      ) => {
        if (typeof toolChoice === 'object' && response.choices[0]?.message?.tool_calls) {
          const toolCallsResponse = response.choices[0].message.tool_calls
          const result = trackForcedToolUsage(
            toolCallsResponse,
            toolChoice,
            logger,
            'ollama',
            forcedTools,
            usedForcedTools
          )
          hasUsedForcedTool = result.hasUsedForcedTool
          usedForcedTools = result.usedForcedTools
        }
      }
      let currentResponse = await ollama.chat.completions.create(payload)
-      const firstResponseTime = Date.now() - startTime
+      const firstResponseTime = Date.now() - initialCallTime
      let content = currentResponse.choices[0]?.message?.content || ''
@@ -140,6 +333,7 @@ export const ollamaProvider: ProviderConfig = {
        content = content.trim()
      }
      // Collect token information
      const tokens = {
        prompt: currentResponse.usage?.prompt_tokens || 0,
        completion: currentResponse.usage?.completion_tokens || 0,
@@ -155,18 +349,23 @@ export const ollamaProvider: ProviderConfig = {
      let modelTime = firstResponseTime
      let toolsTime = 0
      // Track if a forced tool has been used
      let hasUsedForcedTool = false
      // Track each model and tool call segment with timestamps
      const timeSegments: TimeSegment[] = [
        {
          type: 'model',
          name: 'Initial response',
-          startTime: startTime,
+          startTime: initialCallTime,
-          endTime: startTime + firstResponseTime,
+          endTime: initialCallTime + firstResponseTime,
          duration: firstResponseTime,
        },
      ]
-      try {
+      // Check if a forced tool was used in the first response
      checkForForcedToolUsage(currentResponse, originalToolChoice)
      while (iterationCount < MAX_ITERATIONS) {
        // Check for tool calls
        const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
@@ -174,6 +373,10 @@ export const ollamaProvider: ProviderConfig = {
          break
        }
        logger.info(
          `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
        )
        // Track time for tool calls in this batch
        const toolsStartTime = Date.now()
@@ -190,26 +393,7 @@ export const ollamaProvider: ProviderConfig = {
            // Execute the tool
            const toolCallStartTime = Date.now()
-              // Only merge actual tool parameters for logging
+            const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
              const toolParams = {
                ...tool.params,
                ...toolArgs,
              }
              // Add system parameters for execution
              const executionParams = {
                ...toolParams,
                ...(request.workflowId
                  ? {
                      _context: {
                        workflowId: request.workflowId,
                        ...(request.chatId ? { chatId: request.chatId } : {}),
                      },
                    }
                  : {}),
                ...(request.environmentVariables ? { envVars: request.environmentVariables } : {}),
              }
            const result = await executeTool(toolName, executionParams, true)
            const toolCallEndTime = Date.now()
            const toolCallDuration = toolCallEndTime - toolCallStartTime
@@ -269,7 +453,10 @@ export const ollamaProvider: ProviderConfig = {
              content: JSON.stringify(resultContent),
            })
          } catch (error) {
-              logger.error('Error processing tool call:', { error })
+            logger.error('Error processing tool call:', {
              error,
              toolName: toolCall?.function?.name,
            })
          }
        }
@@ -283,12 +470,31 @@ export const ollamaProvider: ProviderConfig = {
          messages: currentMessages,
        }
        // Update tool_choice based on which forced tools have been used
        if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
          // If we have remaining forced tools, get the next one to force
          const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
          if (remainingTools.length > 0) {
            // Ollama doesn't support forced tool selection, so we keep using 'auto'
            nextPayload.tool_choice = 'auto'
            logger.info(`Ollama doesn't support forced tools, using auto for: ${remainingTools[0]}`)
          } else {
            // All forced tools have been used, continue with auto
            nextPayload.tool_choice = 'auto'
            logger.info('All forced tools have been used, continuing with auto tool_choice')
          }
        }
        // Time the next model call
        const nextModelStartTime = Date.now()
        // Make the next request
        currentResponse = await ollama.chat.completions.create(nextPayload)
        // Check if any forced tools were used in this response
        checkForForcedToolUsage(currentResponse, nextPayload.tool_choice)
        const nextModelEndTime = Date.now()
        const thisModelTime = nextModelEndTime - nextModelStartTime
@@ -321,35 +527,129 @@ export const ollamaProvider: ProviderConfig = {
        iterationCount++
      }
-      } catch (error) {
+
-        logger.error('Error in Ollama request:', { error })
+      // After all tool processing complete, if streaming was requested and we have messages, use streaming for the final response
      if (request.stream && iterationCount > 0) {
        logger.info('Using streaming for final response after tool calls')
        const streamingPayload = {
          ...payload,
          messages: currentMessages,
          tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls
          stream: true,
          stream_options: { include_usage: true },
        }
-      const endTime = Date.now()
+        const streamResponse = await ollama.chat.completions.create(streamingPayload)
        // Create the StreamingExecution object with all collected data
        const streamingResult = {
          stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
            // Update the execution data with the final content and token usage
            streamingResult.execution.output.content = content
            // Clean up the response content
            if (content) {
              streamingResult.execution.output.content = content
                .replace(/```json\n?|\n?```/g, '')
                .trim()
            }
            // Update token usage if available from the stream
            if (usage) {
              const newTokens = {
                prompt: usage.prompt_tokens || tokens.prompt,
                completion: usage.completion_tokens || tokens.completion,
                total: usage.total_tokens || tokens.total,
              }
              streamingResult.execution.output.tokens = newTokens
            }
          }),
          execution: {
            success: true,
            output: {
              content: '', // Will be filled by the callback
              model: request.model,
              tokens: {
                prompt: tokens.prompt,
                completion: tokens.completion,
                total: tokens.total,
              },
              toolCalls:
                toolCalls.length > 0
                  ? {
                      list: toolCalls,
                      count: toolCalls.length,
                    }
                  : undefined,
              providerTiming: {
                startTime: providerStartTimeISO,
                endTime: new Date().toISOString(),
                duration: Date.now() - providerStartTime,
                modelTime: modelTime,
                toolsTime: toolsTime,
                firstResponseTime: firstResponseTime,
                iterations: iterationCount + 1,
                timeSegments: timeSegments,
              },
            },
            logs: [], // No block logs at provider level
            metadata: {
              startTime: providerStartTimeISO,
              endTime: new Date().toISOString(),
              duration: Date.now() - providerStartTime,
            },
          },
        } as StreamingExecution
        // Return the streaming execution object
        return streamingResult as StreamingExecution
      }
      // Calculate overall timing
      const providerEndTime = Date.now()
      const providerEndTimeISO = new Date(providerEndTime).toISOString()
      const totalDuration = providerEndTime - providerStartTime
      return {
-        content: content,
+        content,
        model: request.model,
        tokens,
        toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
        toolResults: toolResults.length > 0 ? toolResults : undefined,
        timing: {
-          startTime: new Date(startTime).toISOString(),
+          startTime: providerStartTimeISO,
-          endTime: new Date(endTime).toISOString(),
+          endTime: providerEndTimeISO,
-          duration: endTime - startTime,
+          duration: totalDuration,
          modelTime: modelTime,
          toolsTime: toolsTime,
          firstResponseTime: firstResponseTime,
          iterations: iterationCount + 1,
-          timeSegments,
+          timeSegments: timeSegments,
        },
      }
    } catch (error) {
-      logger.error('Error in Ollama request', {
+      // Include timing information even for errors
-        error: error instanceof Error ? error.message : 'Unknown error',
+      const providerEndTime = Date.now()
-        model: request.model,
+      const providerEndTimeISO = new Date(providerEndTime).toISOString()
      const totalDuration = providerEndTime - providerStartTime
      logger.error('Error in Ollama request:', {
        error,
        duration: totalDuration,
      })
-      throw error
+
      // Create a new error with timing information
      const enhancedError = new Error(error instanceof Error ? error.message : String(error))
      // @ts-ignore - Adding timing property to the error
      enhancedError.timing = {
        startTime: providerStartTimeISO,
        endTime: providerEndTimeISO,
        duration: totalDuration,
      }
      throw enhancedError
    }
  },
 }
--- a/apps/sim/providers/utils.ts
+++ b/apps/sim/providers/utils.ts
@@ -27,6 +27,7 @@ import { openaiProvider } from '@/providers/openai'
 import type { ProviderConfig, ProviderId, ProviderToolConfig } from '@/providers/types'
 import { xAIProvider } from '@/providers/xai'
 import { useCustomToolsStore } from '@/stores/custom-tools/store'
 import { useOllamaStore } from '@/stores/ollama/store'
 const logger = createLogger('ProviderUtils')
@@ -548,6 +549,12 @@ export function getApiKey(provider: string, model: string, userProvidedKey?: str
  // If user provided a key, use it as a fallback
  const hasUserKey = !!userProvidedKey
  // Ollama models don't require API keys - they run locally
  const isOllamaModel = provider === 'ollama' || useOllamaStore.getState().models.includes(model)
  if (isOllamaModel) {
    return 'empty' // Ollama uses 'empty' as a placeholder API key
  }
  // Use server key rotation for all OpenAI models and Anthropic's Claude models on the hosted platform
  const isOpenAIModel = provider === 'openai'
  const isClaudeModel = provider === 'anthropic'
--- a/apps/sim/scripts/ollama_docker.sh
+++ b/apps/sim/scripts/ollama_docker.sh
@@ -1,25 +0,0 @@
 #!/bin/bash
 set -e
 # Check that at least one argument is provided. If not, display the usage help.
 if [ "$#" -eq 0 ]; then
  echo "Usage: $(basename "$0") <ollama command> [args...]"
  echo "Example: $(basename "$0") ps      # This will run 'ollama ps' inside the container"
  exit 1
 fi
 # Start a detached container from the ollama/ollama image,
 # mounting the host's ~/.ollama directory directly into the container.
 # Here we mount it to /root/.ollama, assuming that's where the image expects it.
 CONTAINER_ID=$(docker run -d -v ~/.ollama:/root/.ollama -p 11434:11434 ollama/ollama
 )
 # Define a cleanup function to stop the container regardless of how the script exits.
 cleanup() {
  docker stop "$CONTAINER_ID" >/dev/null
 }
 trap cleanup EXIT
 # Execute the command provided by the user within the running container.
 # The command runs as: "ollama <user-arguments>"
 docker exec -it "$CONTAINER_ID" ollama "$@"
--- a/apps/sim/stores/ollama/store.ts
+++ b/apps/sim/stores/ollama/store.ts
@@ -5,11 +5,72 @@ import type { OllamaStore } from '@/stores/ollama/types'
 const logger = createLogger('OllamaStore')
-export const useOllamaStore = create<OllamaStore>((set) => ({
+// Fetch models from the server API when on client side
 const fetchOllamaModels = async (): Promise<string[]> => {
  try {
    const response = await fetch('/api/providers/ollama/models')
    if (!response.ok) {
      logger.warn('Failed to fetch Ollama models from API', {
        status: response.status,
        statusText: response.statusText,
      })
      return []
    }
    const data = await response.json()
    return data.models || []
  } catch (error) {
    logger.error('Error fetching Ollama models', {
      error: error instanceof Error ? error.message : 'Unknown error',
    })
    return []
  }
 }
 export const useOllamaStore = create<OllamaStore>((set, get) => ({
  models: [],
  isLoading: false,
  setModels: (models) => {
    set({ models })
    // Update the providers when models change
    updateOllamaProviderModels(models)
  },
  // Fetch models from API (client-side only)
  fetchModels: async () => {
    if (typeof window === 'undefined') {
      logger.info('Skipping client-side model fetch on server')
      return
    }
    if (get().isLoading) {
      logger.info('Model fetch already in progress')
      return
    }
    logger.info('Fetching Ollama models from API')
    set({ isLoading: true })
    try {
      const models = await fetchOllamaModels()
      logger.info('Successfully fetched Ollama models', {
        count: models.length,
        models,
      })
      get().setModels(models)
    } catch (error) {
      logger.error('Failed to fetch Ollama models', {
        error: error instanceof Error ? error.message : 'Unknown error',
      })
    } finally {
      set({ isLoading: false })
    }
  },
 }))
 // Auto-fetch models when the store is first accessed on the client
 if (typeof window !== 'undefined') {
  // Delay to avoid hydration issues
  setTimeout(() => {
    useOllamaStore.getState().fetchModels()
  }, 1000)
 }
--- a/apps/sim/stores/ollama/types.ts
+++ b/apps/sim/stores/ollama/types.ts
@@ -1,4 +1,6 @@
 export interface OllamaStore {
  models: string[]
  isLoading: boolean
  setModels: (models: string[]) => void
  fetchModels: () => Promise<void>
 }
--- a/docker-compose.ollama.yml
+++ b/docker-compose.ollama.yml
@@ -1,11 +1,106 @@
 name: sim-with-ollama
 services:
-  local-llm-gpu:
+  # Main Sim Studio Application
-    profiles:
+  simstudio:
-      - local-gpu # This profile requires both 'local' and 'gpu'
+    build:
      context: .
      dockerfile: docker/app.Dockerfile
    ports:
      - '3000:3000'
    deploy:
      resources:
        limits:
          memory: 8G
    environment:
      - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
      - BETTER_AUTH_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
      - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
      - BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
      - ENCRYPTION_KEY=${ENCRYPTION_KEY:-$(openssl rand -hex 32)}
      - OLLAMA_URL=http://ollama:11434
      - NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002}
    depends_on:
      db:
        condition: service_healthy
      migrations:
        condition: service_completed_successfully
      realtime:
        condition: service_healthy
      ollama:
        condition: service_healthy
    healthcheck:
      test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3000']
      interval: 90s
      timeout: 5s
      retries: 3
      start_period: 10s
    restart: unless-stopped
  # Realtime Socket Server
  realtime:
    build:
      context: .
      dockerfile: docker/realtime.Dockerfile
    environment:
      - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
      - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
      - BETTER_AUTH_URL=${BETTER_AUTH_URL:-http://localhost:3000}
      - BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
    depends_on:
      db:
        condition: service_healthy
    restart: unless-stopped
    ports:
      - '3002:3002'
    deploy:
      resources:
        limits:
          memory: 8G
    healthcheck:
      test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health']
      interval: 90s
      timeout: 5s
      retries: 3
      start_period: 10s
  # Database Migrations
  migrations:
    build:
      context: .
      dockerfile: docker/db.Dockerfile
    environment:
      - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
    depends_on:
      db:
        condition: service_healthy
    command: ['bun', 'run', 'db:migrate']
    restart: 'no'
  # PostgreSQL Database with Vector Extension
  db:
    image: pgvector/pgvector:pg17
    restart: always
    ports:
      - '5432:5432'
    environment:
      - POSTGRES_USER=${POSTGRES_USER:-postgres}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
      - POSTGRES_DB=${POSTGRES_DB:-simstudio}
    volumes:
      - postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ['CMD-SHELL', 'pg_isready -U postgres']
      interval: 5s
      timeout: 5s
      retries: 5
  # Ollama with GPU support (default)
  ollama:
    image: ollama/ollama:latest
    pull_policy: always
    volumes:
-      - ${HOME}/.ollama:/root/.ollama
+      - ollama_data:/root/.ollama
    ports:
      - '11434:11434'
    environment:
@@ -13,6 +108,7 @@ services:
      - OLLAMA_LOAD_TIMEOUT=-1
      - OLLAMA_KEEP_ALIVE=-1
      - OLLAMA_DEBUG=1
      - OLLAMA_HOST=0.0.0.0:11434
    command: 'serve'
    deploy:
      resources:
@@ -26,23 +122,56 @@ services:
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 30s
    restart: unless-stopped
-  local-llm-cpu:
+  # Ollama CPU-only version (use with --profile cpu profile)
  ollama-cpu:
    profiles:
-      - local-cpu # This profile requires both 'local' and 'cpu'
+      - cpu
    image: ollama/ollama:latest
    pull_policy: always
    volumes:
-      - ${HOME}/.ollama:/root/.ollama
+      - ollama_data:/root/.ollama
    ports:
      - '11434:11434'
    environment:
      - OLLAMA_LOAD_TIMEOUT=-1
      - OLLAMA_KEEP_ALIVE=-1
      - OLLAMA_DEBUG=1
      - OLLAMA_HOST=0.0.0.0:11434
    command: 'serve'
    healthcheck:
      test: ['CMD', 'curl', '-f', 'http://localhost:11434/']
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 30s
    restart: unless-stopped
  # Helper container to pull models automatically
  model-setup:
    image: ollama/ollama:latest
    profiles:
      - setup
    volumes:
      - ollama_data:/root/.ollama
    environment:
      - OLLAMA_HOST=ollama:11434
    depends_on:
      ollama:
        condition: service_healthy
    command: >
      sh -c "
        echo 'Waiting for Ollama to be ready...' &&
        sleep 10 &&
        echo 'Pulling gemma3:4b model (recommended starter model)...' &&
        ollama pull gemma3:4b &&
        echo 'Model setup complete! You can now use gemma3:4b in Sim.' &&
        echo 'To add more models, run: docker compose -f docker-compose.ollama.yml exec ollama ollama pull <model-name>'
      "
    restart: 'no'
 volumes:
  postgres_data:
  ollama_data: