diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 4d0d8df90..469ce8aa0 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -164,10 +164,14 @@ Access the application at [http://localhost:3000/](http://localhost:3000/)
 
 To use local models with Sim:
 
-1. Pull models using our helper script:
+1. Install Ollama and pull models:
 
 ```bash
-./apps/sim/scripts/ollama_docker.sh pull <model_name>
+# Install Ollama (if not already installed)
+curl -fsSL https://ollama.ai/install.sh | sh
+
+# Pull a model (e.g., gemma3:4b)
+ollama pull gemma3:4b
 ```
 
 2. Start Sim with local model support:
diff --git a/README.md b/README.md
index f9855815e..be3b0ec97 100644
--- a/README.md
+++ b/README.md
@@ -59,27 +59,21 @@ docker compose -f docker-compose.prod.yml up -d
 
 Access the application at [http://localhost:3000/](http://localhost:3000/)
 
-#### Using Local Models
+#### Using Local Models with Ollama
 
-To use local models with Sim:
-
-1. Pull models using our helper script:
+Run Sim with local AI models using [Ollama](https://ollama.ai) - no external APIs required:
 
 ```bash
-./apps/sim/scripts/ollama_docker.sh pull <model_name>
+# Start with GPU support (automatically downloads gemma3:4b model)
+docker compose -f docker-compose.ollama.yml --profile setup up -d
+
+# For CPU-only systems:
+docker compose -f docker-compose.ollama.yml --profile cpu --profile setup up -d
 ```
 
-2. Start Sim with local model support:
-
+Wait for the model to download, then visit [http://localhost:3000](http://localhost:3000). Add more models with:
 ```bash
-# With NVIDIA GPU support
-docker compose --profile local-gpu -f docker-compose.ollama.yml up -d
-
-# Without GPU (CPU only)
-docker compose --profile local-cpu -f docker-compose.ollama.yml up -d
-
-# If hosting on a server, update the environment variables in the docker-compose.prod.yml file to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434)
-docker compose -f docker-compose.prod.yml up -d
+docker compose -f docker-compose.ollama.yml exec ollama ollama pull llama3.1:8b
 ```
 
 ### Option 3: Dev Containers
diff --git a/apps/sim/.env.example b/apps/sim/.env.example
index 5c126fb32..ee2c0f84d 100644
--- a/apps/sim/.env.example
+++ b/apps/sim/.env.example
@@ -15,3 +15,6 @@ ENCRYPTION_KEY=your_encryption_key  # Use `openssl rand -hex 32` to generate
 # RESEND_API_KEY=  # Uncomment and add your key from https://resend.com to send actual emails
                    # If left commented out, emails will be logged to console instead
 
+# Local AI Models (Optional)
+# OLLAMA_URL=http://localhost:11434  # URL for local Ollama server - uncomment if using local models
+
diff --git a/apps/sim/app/api/providers/ollama/models/route.ts b/apps/sim/app/api/providers/ollama/models/route.ts
new file mode 100644
index 000000000..7c184588b
--- /dev/null
+++ b/apps/sim/app/api/providers/ollama/models/route.ts
@@ -0,0 +1,52 @@
+import { type NextRequest, NextResponse } from 'next/server'
+import { env } from '@/lib/env'
+import { createLogger } from '@/lib/logs/console/logger'
+import type { ModelsObject } from '@/providers/ollama/types'
+
+const logger = createLogger('OllamaModelsAPI')
+const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
+
+export const dynamic = 'force-dynamic'
+
+/**
+ * Get available Ollama models
+ */
+export async function GET(request: NextRequest) {
+  try {
+    logger.info('Fetching Ollama models', {
+      host: OLLAMA_HOST,
+    })
+
+    const response = await fetch(`${OLLAMA_HOST}/api/tags`, {
+      headers: {
+        'Content-Type': 'application/json',
+      },
+    })
+
+    if (!response.ok) {
+      logger.warn('Ollama service is not available', {
+        status: response.status,
+        statusText: response.statusText,
+      })
+      return NextResponse.json({ models: [] })
+    }
+
+    const data = (await response.json()) as ModelsObject
+    const models = data.models.map((model) => model.name)
+
+    logger.info('Successfully fetched Ollama models', {
+      count: models.length,
+      models,
+    })
+
+    return NextResponse.json({ models })
+  } catch (error) {
+    logger.error('Failed to fetch Ollama models', {
+      error: error instanceof Error ? error.message : 'Unknown error',
+      host: OLLAMA_HOST,
+    })
+
+    // Return empty array instead of error to avoid breaking the UI
+    return NextResponse.json({ models: [] })
+  }
+}
diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx
index 0a9907e27..cb1100853 100644
--- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx
@@ -405,33 +405,37 @@ export function WorkflowBlock({ id, data }: NodeProps<WorkflowBlockProps>) {
       // If there's no condition, the block should be shown
       if (!block.condition) return true
 
+      // If condition is a function, call it to get the actual condition object
+      const actualCondition =
+        typeof block.condition === 'function' ? block.condition() : block.condition
+
       // Get the values of the fields this block depends on from the appropriate state
-      const fieldValue = stateToUse[block.condition.field]?.value
-      const andFieldValue = block.condition.and
-        ? stateToUse[block.condition.and.field]?.value
+      const fieldValue = stateToUse[actualCondition.field]?.value
+      const andFieldValue = actualCondition.and
+        ? stateToUse[actualCondition.and.field]?.value
         : undefined
 
       // Check if the condition value is an array
-      const isValueMatch = Array.isArray(block.condition.value)
+      const isValueMatch = Array.isArray(actualCondition.value)
         ? fieldValue != null &&
-          (block.condition.not
-            ? !block.condition.value.includes(fieldValue as string | number | boolean)
-            : block.condition.value.includes(fieldValue as string | number | boolean))
-        : block.condition.not
-          ? fieldValue !== block.condition.value
-          : fieldValue === block.condition.value
+          (actualCondition.not
+            ? !actualCondition.value.includes(fieldValue as string | number | boolean)
+            : actualCondition.value.includes(fieldValue as string | number | boolean))
+        : actualCondition.not
+          ? fieldValue !== actualCondition.value
+          : fieldValue === actualCondition.value
 
       // Check both conditions if 'and' is present
       const isAndValueMatch =
-        !block.condition.and ||
-        (Array.isArray(block.condition.and.value)
+        !actualCondition.and ||
+        (Array.isArray(actualCondition.and.value)
           ? andFieldValue != null &&
-            (block.condition.and.not
-              ? !block.condition.and.value.includes(andFieldValue as string | number | boolean)
-              : block.condition.and.value.includes(andFieldValue as string | number | boolean))
-          : block.condition.and.not
-            ? andFieldValue !== block.condition.and.value
-            : andFieldValue === block.condition.and.value)
+            (actualCondition.and.not
+              ? !actualCondition.and.value.includes(andFieldValue as string | number | boolean)
+              : actualCondition.and.value.includes(andFieldValue as string | number | boolean))
+          : actualCondition.and.not
+            ? andFieldValue !== actualCondition.and.value
+            : andFieldValue === actualCondition.and.value)
 
       return isValueMatch && isAndValueMatch
     })
diff --git a/apps/sim/blocks/blocks/agent.ts b/apps/sim/blocks/blocks/agent.ts
index f1eb04a96..4e02bf40a 100644
--- a/apps/sim/blocks/blocks/agent.ts
+++ b/apps/sim/blocks/blocks/agent.ts
@@ -12,6 +12,12 @@ import {
   MODELS_WITH_TEMPERATURE_SUPPORT,
   providers,
 } from '@/providers/utils'
+
+// Get current Ollama models dynamically
+const getCurrentOllamaModels = () => {
+  return useOllamaStore.getState().models
+}
+
 import { useOllamaStore } from '@/stores/ollama/store'
 import type { ToolResponse } from '@/tools/types'
 
@@ -213,14 +219,18 @@ Create a system prompt appropriately detailed for the request, using clear langu
       password: true,
       connectionDroppable: false,
       required: true,
-      // Hide API key for all hosted models when running on hosted version
+      // Hide API key for hosted models and Ollama models
       condition: isHosted
         ? {
             field: 'model',
             value: getHostedModels(),
             not: true, // Show for all models EXCEPT those listed
           }
-        : undefined, // Show for all models in non-hosted environments
+        : () => ({
+            field: 'model',
+            value: getCurrentOllamaModels(),
+            not: true, // Show for all models EXCEPT Ollama models
+          }),
     },
     {
       id: 'azureEndpoint',
diff --git a/apps/sim/blocks/types.ts b/apps/sim/blocks/types.ts
index e1818e386..8cd139174 100644
--- a/apps/sim/blocks/types.ts
+++ b/apps/sim/blocks/types.ts
@@ -118,16 +118,27 @@ export interface SubBlockConfig {
   hidden?: boolean
   description?: string
   value?: (params: Record<string, any>) => string
-  condition?: {
-    field: string
-    value: string | number | boolean | Array<string | number | boolean>
-    not?: boolean
-    and?: {
-      field: string
-      value: string | number | boolean | Array<string | number | boolean> | undefined
-      not?: boolean
-    }
-  }
+  condition?:
+    | {
+        field: string
+        value: string | number | boolean | Array<string | number | boolean>
+        not?: boolean
+        and?: {
+          field: string
+          value: string | number | boolean | Array<string | number | boolean> | undefined
+          not?: boolean
+        }
+      }
+    | (() => {
+        field: string
+        value: string | number | boolean | Array<string | number | boolean>
+        not?: boolean
+        and?: {
+          field: string
+          value: string | number | boolean | Array<string | number | boolean> | undefined
+          not?: boolean
+        }
+      })
   // Props specific to 'code' sub-block type
   language?: 'javascript' | 'json'
   generationType?: GenerationType
diff --git a/apps/sim/executor/resolver/resolver.ts b/apps/sim/executor/resolver/resolver.ts
index 51101c310..13fbdd121 100644
--- a/apps/sim/executor/resolver/resolver.ts
+++ b/apps/sim/executor/resolver/resolver.ts
@@ -58,7 +58,7 @@ export class InputResolver {
 
   /**
    * Evaluates if a sub-block should be active based on its condition
-   * @param condition - The condition to evaluate
+   * @param condition - The condition to evaluate (can be static object or function)
    * @param currentValues - Current values of all inputs
    * @returns True if the sub-block should be active
    */
@@ -70,37 +70,46 @@ export class InputResolver {
           not?: boolean
           and?: { field: string; value: any; not?: boolean }
         }
+      | (() => {
+          field: string
+          value: any
+          not?: boolean
+          and?: { field: string; value: any; not?: boolean }
+        })
       | undefined,
     currentValues: Record<string, any>
   ): boolean {
     if (!condition) return true
 
+    // If condition is a function, call it to get the actual condition object
+    const actualCondition = typeof condition === 'function' ? condition() : condition
+
     // Get the field value
-    const fieldValue = currentValues[condition.field]
+    const fieldValue = currentValues[actualCondition.field]
 
     // Check if the condition value is an array
-    const isValueMatch = Array.isArray(condition.value)
+    const isValueMatch = Array.isArray(actualCondition.value)
       ? fieldValue != null &&
-        (condition.not
-          ? !condition.value.includes(fieldValue)
-          : condition.value.includes(fieldValue))
-      : condition.not
-        ? fieldValue !== condition.value
-        : fieldValue === condition.value
+        (actualCondition.not
+          ? !actualCondition.value.includes(fieldValue)
+          : actualCondition.value.includes(fieldValue))
+      : actualCondition.not
+        ? fieldValue !== actualCondition.value
+        : fieldValue === actualCondition.value
 
     // Check both conditions if 'and' is present
     const isAndValueMatch =
-      !condition.and ||
+      !actualCondition.and ||
       (() => {
-        const andFieldValue = currentValues[condition.and!.field]
-        return Array.isArray(condition.and!.value)
+        const andFieldValue = currentValues[actualCondition.and!.field]
+        return Array.isArray(actualCondition.and!.value)
           ? andFieldValue != null &&
-              (condition.and!.not
-                ? !condition.and!.value.includes(andFieldValue)
-                : condition.and!.value.includes(andFieldValue))
-          : condition.and!.not
-            ? andFieldValue !== condition.and!.value
-            : andFieldValue === condition.and!.value
+              (actualCondition.and!.not
+                ? !actualCondition.and!.value.includes(andFieldValue)
+                : actualCondition.and!.value.includes(andFieldValue))
+          : actualCondition.and!.not
+            ? andFieldValue !== actualCondition.and!.value
+            : andFieldValue === actualCondition.and!.value
       })()
 
     return isValueMatch && isAndValueMatch
diff --git a/apps/sim/providers/ollama/index.ts b/apps/sim/providers/ollama/index.ts
index 7dc2bbb40..3bf99217b 100644
--- a/apps/sim/providers/ollama/index.ts
+++ b/apps/sim/providers/ollama/index.ts
@@ -1,6 +1,7 @@
 import OpenAI from 'openai'
 import { env } from '@/lib/env'
 import { createLogger } from '@/lib/logs/console/logger'
+import type { StreamingExecution } from '@/executor/types'
 import type { ModelsObject } from '@/providers/ollama/types'
 import type {
   ProviderConfig,
@@ -8,12 +9,57 @@ import type {
   ProviderResponse,
   TimeSegment,
 } from '@/providers/types'
+import {
+  prepareToolExecution,
+  prepareToolsWithUsageControl,
+  trackForcedToolUsage,
+} from '@/providers/utils'
 import { useOllamaStore } from '@/stores/ollama/store'
 import { executeTool } from '@/tools'
 
 const logger = createLogger('OllamaProvider')
 const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
 
+/**
+ * Helper function to convert an Ollama stream to a standard ReadableStream
+ * and collect completion metrics
+ */
+function createReadableStreamFromOllamaStream(
+  ollamaStream: any,
+  onComplete?: (content: string, usage?: any) => void
+): ReadableStream {
+  let fullContent = ''
+  let usageData: any = null
+
+  return new ReadableStream({
+    async start(controller) {
+      try {
+        for await (const chunk of ollamaStream) {
+          // Check for usage data in the final chunk
+          if (chunk.usage) {
+            usageData = chunk.usage
+          }
+
+          const content = chunk.choices[0]?.delta?.content || ''
+          if (content) {
+            fullContent += content
+            controller.enqueue(new TextEncoder().encode(content))
+          }
+        }
+
+        // Once stream is complete, call the completion callback with the final content and usage
+        if (onComplete) {
+          onComplete(fullContent, usageData)
+        }
+
+        controller.close()
+      } catch (error) {
+        controller.error(error)
+      }
+    },
+  })
+}
+
 export const ollamaProvider: ProviderConfig = {
   id: 'ollama',
   name: 'Ollama',
@@ -46,91 +92,238 @@ export const ollamaProvider: ProviderConfig = {
     }
   },
 
-  executeRequest: async (request: ProviderRequest): Promise<ProviderResponse> => {
+  executeRequest: async (
+    request: ProviderRequest
+  ): Promise<ProviderResponse | StreamingExecution> => {
     logger.info('Preparing Ollama request', {
       model: request.model,
       hasSystemPrompt: !!request.systemPrompt,
-      hasMessages: !!request.context,
+      hasMessages: !!request.messages?.length,
       hasTools: !!request.tools?.length,
       toolCount: request.tools?.length || 0,
       hasResponseFormat: !!request.responseFormat,
+      stream: !!request.stream,
     })
 
-    const startTime = Date.now()
+    // Create Ollama client using OpenAI-compatible API
+    const ollama = new OpenAI({
+      apiKey: 'empty',
+      baseURL: `${OLLAMA_HOST}/v1`,
+    })
+
+    // Start with an empty array for all messages
+    const allMessages = []
+
+    // Add system prompt if present
+    if (request.systemPrompt) {
+      allMessages.push({
+        role: 'system',
+        content: request.systemPrompt,
+      })
+    }
+
+    // Add context if present
+    if (request.context) {
+      allMessages.push({
+        role: 'user',
+        content: request.context,
+      })
+    }
+
+    // Add remaining messages
+    if (request.messages) {
+      allMessages.push(...request.messages)
+    }
+
+    // Transform tools to OpenAI format if provided
+    const tools = request.tools?.length
+      ? request.tools.map((tool) => ({
+          type: 'function',
+          function: {
+            name: tool.id,
+            description: tool.description,
+            parameters: tool.parameters,
+          },
+        }))
+      : undefined
+
+    // Build the request payload
+    const payload: any = {
+      model: request.model,
+      messages: allMessages,
+    }
+
+    // Add optional parameters
+    if (request.temperature !== undefined) payload.temperature = request.temperature
+    if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
+
+    // Add response format for structured output if specified
+    if (request.responseFormat) {
+      // Use OpenAI's JSON schema format (Ollama supports this)
+      payload.response_format = {
+        type: 'json_schema',
+        json_schema: {
+          name: request.responseFormat.name || 'response_schema',
+          schema: request.responseFormat.schema || request.responseFormat,
+          strict: request.responseFormat.strict !== false,
+        },
+      }
+
+      logger.info('Added JSON schema response format to Ollama request')
+    }
+
+    // Handle tools and tool usage control
+    let preparedTools: ReturnType<typeof prepareToolsWithUsageControl> | null = null
+
+    if (tools?.length) {
+      preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'ollama')
+      const { tools: filteredTools, toolChoice } = preparedTools
+
+      if (filteredTools?.length && toolChoice) {
+        payload.tools = filteredTools
+        // Ollama supports 'auto' but not forced tool selection - convert 'force' to 'auto'
+        payload.tool_choice = typeof toolChoice === 'string' ? toolChoice : 'auto'
+
+        logger.info('Ollama request configuration:', {
+          toolCount: filteredTools.length,
+          toolChoice: payload.tool_choice,
+          model: request.model,
+        })
+      }
+    }
+
+    // Start execution timer for the entire provider execution
+    const providerStartTime = Date.now()
+    const providerStartTimeISO = new Date(providerStartTime).toISOString()
 
     try {
-      // Prepare messages array
-      const ollama = new OpenAI({
-        apiKey: 'empty',
-        baseURL: `${OLLAMA_HOST}/v1`,
-      })
+      // Check if we can stream directly (no tools required)
+      if (request.stream && (!tools || tools.length === 0)) {
+        logger.info('Using streaming response for Ollama request')
 
-      // Start with an empty array for all messages
-      const allMessages = []
-
-      // Add system prompt if present
-      if (request.systemPrompt) {
-        allMessages.push({ role: 'system', content: request.systemPrompt })
-      }
-
-      // Add context if present
-      if (request.context) {
-        allMessages.push({ role: 'user', content: request.context })
-      }
-
-      // Add remaining messages
-      if (request.messages) {
-        allMessages.push(...request.messages)
-      }
-
-      // Build the basic payload
-      const payload: any = {
-        model: request.model,
-        messages: allMessages,
-        stream: false,
-      }
-
-      // Add optional parameters
-      if (request.temperature !== undefined) payload.temperature = request.temperature
-      if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
-
-      // Transform tools to OpenAI format if provided
-      const tools = request.tools?.length
-        ? request.tools.map((tool) => ({
-            type: 'function',
-            function: {
-              name: tool.id,
-              description: tool.description,
-              parameters: tool.parameters,
-            },
-          }))
-        : undefined
-
-      // Handle tools and tool usage control
-      if (tools?.length) {
-        // Filter out any tools with usageControl='none', but ignore 'force' since Ollama doesn't support it
-        const filteredTools = tools.filter((tool) => {
-          const toolId = tool.function?.name
-          const toolConfig = request.tools?.find((t) => t.id === toolId)
-          // Only filter out 'none', treat 'force' as 'auto'
-          return toolConfig?.usageControl !== 'none'
+        // Create a streaming request with token usage tracking
+        const streamResponse = await ollama.chat.completions.create({
+          ...payload,
+          stream: true,
+          stream_options: { include_usage: true },
         })
 
-        if (filteredTools?.length) {
-          payload.tools = filteredTools
-          // Always use 'auto' for Ollama, regardless of the tool_choice setting
-          payload.tool_choice = 'auto'
+        // Start collecting token usage from the stream
+        const tokenUsage = {
+          prompt: 0,
+          completion: 0,
+          total: 0,
+        }
 
-          logger.info('Ollama request configuration:', {
-            toolCount: filteredTools.length,
-            toolChoice: 'auto', // Ollama always uses auto
-            model: request.model,
-          })
+        // Create a StreamingExecution response with a callback to update content and tokens
+        const streamingResult = {
+          stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
+            // Update the execution data with the final content and token usage
+            streamingResult.execution.output.content = content
+
+            // Clean up the response content
+            if (content) {
+              streamingResult.execution.output.content = content
+                .replace(/```json\n?|\n?```/g, '')
+                .trim()
+            }
+
+            // Update the timing information with the actual completion time
+            const streamEndTime = Date.now()
+            const streamEndTimeISO = new Date(streamEndTime).toISOString()
+
+            if (streamingResult.execution.output.providerTiming) {
+              streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
+              streamingResult.execution.output.providerTiming.duration =
+                streamEndTime - providerStartTime
+
+              // Update the time segment as well
+              if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
+                streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
+                  streamEndTime
+                streamingResult.execution.output.providerTiming.timeSegments[0].duration =
+                  streamEndTime - providerStartTime
+              }
+            }
+
+            // Update token usage if available from the stream
+            if (usage) {
+              const newTokens = {
+                prompt: usage.prompt_tokens || tokenUsage.prompt,
+                completion: usage.completion_tokens || tokenUsage.completion,
+                total: usage.total_tokens || tokenUsage.total,
+              }
+
+              streamingResult.execution.output.tokens = newTokens
+            }
+          }),
+          execution: {
+            success: true,
+            output: {
+              content: '', // Will be filled by the stream completion callback
+              model: request.model,
+              tokens: tokenUsage,
+              toolCalls: undefined,
+              providerTiming: {
+                startTime: providerStartTimeISO,
+                endTime: new Date().toISOString(),
+                duration: Date.now() - providerStartTime,
+                timeSegments: [
+                  {
+                    type: 'model',
+                    name: 'Streaming response',
+                    startTime: providerStartTime,
+                    endTime: Date.now(),
+                    duration: Date.now() - providerStartTime,
+                  },
+                ],
+              },
+            },
+            logs: [], // No block logs for direct streaming
+            metadata: {
+              startTime: providerStartTimeISO,
+              endTime: new Date().toISOString(),
+              duration: Date.now() - providerStartTime,
+            },
+          },
+        } as StreamingExecution
+
+        // Return the streaming execution object
+        return streamingResult as StreamingExecution
+      }
+
+      // Make the initial API request
+      const initialCallTime = Date.now()
+
+      // Track the original tool_choice for forced tool tracking
+      const originalToolChoice = payload.tool_choice
+
+      // Track forced tools and their usage
+      const forcedTools = preparedTools?.forcedTools || []
+      let usedForcedTools: string[] = []
+
+      // Helper function to check for forced tool usage in responses
+      const checkForForcedToolUsage = (
+        response: any,
+        toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any }
+      ) => {
+        if (typeof toolChoice === 'object' && response.choices[0]?.message?.tool_calls) {
+          const toolCallsResponse = response.choices[0].message.tool_calls
+          const result = trackForcedToolUsage(
+            toolCallsResponse,
+            toolChoice,
+            logger,
+            'ollama',
+            forcedTools,
+            usedForcedTools
+          )
+          hasUsedForcedTool = result.hasUsedForcedTool
+          usedForcedTools = result.usedForcedTools
         }
       }
 
       let currentResponse = await ollama.chat.completions.create(payload)
-      const firstResponseTime = Date.now() - startTime
+      const firstResponseTime = Date.now() - initialCallTime
 
       let content = currentResponse.choices[0]?.message?.content || ''
 
@@ -140,6 +333,7 @@ export const ollamaProvider: ProviderConfig = {
         content = content.trim()
       }
 
+      // Collect token information
       const tokens = {
         prompt: currentResponse.usage?.prompt_tokens || 0,
         completion: currentResponse.usage?.completion_tokens || 0,
@@ -155,201 +349,307 @@ export const ollamaProvider: ProviderConfig = {
       let modelTime = firstResponseTime
       let toolsTime = 0
 
+      // Track if a forced tool has been used
+      let hasUsedForcedTool = false
+
       // Track each model and tool call segment with timestamps
       const timeSegments: TimeSegment[] = [
         {
           type: 'model',
           name: 'Initial response',
-          startTime: startTime,
-          endTime: startTime + firstResponseTime,
+          startTime: initialCallTime,
+          endTime: initialCallTime + firstResponseTime,
           duration: firstResponseTime,
         },
       ]
 
-      try {
-        while (iterationCount < MAX_ITERATIONS) {
-          // Check for tool calls
-          const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
-          if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
-            break
-          }
+      // Check if a forced tool was used in the first response
+      checkForForcedToolUsage(currentResponse, originalToolChoice)
 
-          // Track time for tool calls in this batch
-          const toolsStartTime = Date.now()
-
-          // Process each tool call
-          for (const toolCall of toolCallsInResponse) {
-            try {
-              const toolName = toolCall.function.name
-              const toolArgs = JSON.parse(toolCall.function.arguments)
-
-              // Get the tool from the tools registry
-              const tool = request.tools?.find((t) => t.id === toolName)
-              if (!tool) continue
-
-              // Execute the tool
-              const toolCallStartTime = Date.now()
-
-              // Only merge actual tool parameters for logging
-              const toolParams = {
-                ...tool.params,
-                ...toolArgs,
-              }
-
-              // Add system parameters for execution
-              const executionParams = {
-                ...toolParams,
-                ...(request.workflowId
-                  ? {
-                      _context: {
-                        workflowId: request.workflowId,
-                        ...(request.chatId ? { chatId: request.chatId } : {}),
-                      },
-                    }
-                  : {}),
-                ...(request.environmentVariables ? { envVars: request.environmentVariables } : {}),
-              }
-
-              const result = await executeTool(toolName, executionParams, true)
-              const toolCallEndTime = Date.now()
-              const toolCallDuration = toolCallEndTime - toolCallStartTime
-
-              // Add to time segments for both success and failure
-              timeSegments.push({
-                type: 'tool',
-                name: toolName,
-                startTime: toolCallStartTime,
-                endTime: toolCallEndTime,
-                duration: toolCallDuration,
-              })
-
-              // Prepare result content for the LLM
-              let resultContent: any
-              if (result.success) {
-                toolResults.push(result.output)
-                resultContent = result.output
-              } else {
-                // Include error information so LLM can respond appropriately
-                resultContent = {
-                  error: true,
-                  message: result.error || 'Tool execution failed',
-                  tool: toolName,
-                }
-              }
-
-              toolCalls.push({
-                name: toolName,
-                arguments: toolParams,
-                startTime: new Date(toolCallStartTime).toISOString(),
-                endTime: new Date(toolCallEndTime).toISOString(),
-                duration: toolCallDuration,
-                result: resultContent,
-                success: result.success,
-              })
-
-              // Add the tool call and result to messages (both success and failure)
-              currentMessages.push({
-                role: 'assistant',
-                content: null,
-                tool_calls: [
-                  {
-                    id: toolCall.id,
-                    type: 'function',
-                    function: {
-                      name: toolName,
-                      arguments: toolCall.function.arguments,
-                    },
-                  },
-                ],
-              })
-
-              currentMessages.push({
-                role: 'tool',
-                tool_call_id: toolCall.id,
-                content: JSON.stringify(resultContent),
-              })
-            } catch (error) {
-              logger.error('Error processing tool call:', { error })
-            }
-          }
-
-          // Calculate tool call time for this iteration
-          const thisToolsTime = Date.now() - toolsStartTime
-          toolsTime += thisToolsTime
-
-          // Make the next request with updated messages
-          const nextPayload = {
-            ...payload,
-            messages: currentMessages,
-          }
-
-          // Time the next model call
-          const nextModelStartTime = Date.now()
-
-          // Make the next request
-          currentResponse = await ollama.chat.completions.create(nextPayload)
-
-          const nextModelEndTime = Date.now()
-          const thisModelTime = nextModelEndTime - nextModelStartTime
-
-          // Add to time segments
-          timeSegments.push({
-            type: 'model',
-            name: `Model response (iteration ${iterationCount + 1})`,
-            startTime: nextModelStartTime,
-            endTime: nextModelEndTime,
-            duration: thisModelTime,
-          })
-
-          // Add to model time
-          modelTime += thisModelTime
-
-          // Update content if we have a text response
-          if (currentResponse.choices[0]?.message?.content) {
-            content = currentResponse.choices[0].message.content
-            // Clean up the response content
-            content = content.replace(/```json\n?|\n?```/g, '')
-            content = content.trim()
-          }
-
-          // Update token counts
-          if (currentResponse.usage) {
-            tokens.prompt += currentResponse.usage.prompt_tokens || 0
-            tokens.completion += currentResponse.usage.completion_tokens || 0
-            tokens.total += currentResponse.usage.total_tokens || 0
-          }
-
-          iterationCount++
+      while (iterationCount < MAX_ITERATIONS) {
+        // Check for tool calls
+        const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
+        if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
+          break
         }
-      } catch (error) {
-        logger.error('Error in Ollama request:', { error })
+
+        logger.info(
+          `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
+        )
+
+        // Track time for tool calls in this batch
+        const toolsStartTime = Date.now()
+
+        // Process each tool call
+        for (const toolCall of toolCallsInResponse) {
+          try {
+            const toolName = toolCall.function.name
+            const toolArgs = JSON.parse(toolCall.function.arguments)
+
+            // Get the tool from the tools registry
+            const tool = request.tools?.find((t) => t.id === toolName)
+            if (!tool) continue
+
+            // Execute the tool
+            const toolCallStartTime = Date.now()
+
+            const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
+            const result = await executeTool(toolName, executionParams, true)
+            const toolCallEndTime = Date.now()
+            const toolCallDuration = toolCallEndTime - toolCallStartTime
+
+            // Add to time segments for both success and failure
+            timeSegments.push({
+              type: 'tool',
+              name: toolName,
+              startTime: toolCallStartTime,
+              endTime: toolCallEndTime,
+              duration: toolCallDuration,
+            })
+
+            // Prepare result content for the LLM
+            let resultContent: any
+            if (result.success) {
+              toolResults.push(result.output)
+              resultContent = result.output
+            } else {
+              // Include error information so LLM can respond appropriately
+              resultContent = {
+                error: true,
+                message: result.error || 'Tool execution failed',
+                tool: toolName,
+              }
+            }
+
+            toolCalls.push({
+              name: toolName,
+              arguments: toolParams,
+              startTime: new Date(toolCallStartTime).toISOString(),
+              endTime: new Date(toolCallEndTime).toISOString(),
+              duration: toolCallDuration,
+              result: resultContent,
+              success: result.success,
+            })
+
+            // Add the tool call and result to messages (both success and failure)
+            currentMessages.push({
+              role: 'assistant',
+              content: null,
+              tool_calls: [
+                {
+                  id: toolCall.id,
+                  type: 'function',
+                  function: {
+                    name: toolName,
+                    arguments: toolCall.function.arguments,
+                  },
+                },
+              ],
+            })
+
+            currentMessages.push({
+              role: 'tool',
+              tool_call_id: toolCall.id,
+              content: JSON.stringify(resultContent),
+            })
+          } catch (error) {
+            logger.error('Error processing tool call:', {
+              error,
+              toolName: toolCall?.function?.name,
+            })
+          }
+        }
+
+        // Calculate tool call time for this iteration
+        const thisToolsTime = Date.now() - toolsStartTime
+        toolsTime += thisToolsTime
+
+        // Make the next request with updated messages
+        const nextPayload = {
+          ...payload,
+          messages: currentMessages,
+        }
+
+        // Update tool_choice based on which forced tools have been used
+        if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
+          // If we have remaining forced tools, get the next one to force
+          const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
+
+          if (remainingTools.length > 0) {
+            // Ollama doesn't support forced tool selection, so we keep using 'auto'
+            nextPayload.tool_choice = 'auto'
+            logger.info(`Ollama doesn't support forced tools, using auto for: ${remainingTools[0]}`)
+          } else {
+            // All forced tools have been used, continue with auto
+            nextPayload.tool_choice = 'auto'
+            logger.info('All forced tools have been used, continuing with auto tool_choice')
+          }
+        }
+
+        // Time the next model call
+        const nextModelStartTime = Date.now()
+
+        // Make the next request
+        currentResponse = await ollama.chat.completions.create(nextPayload)
+
+        // Check if any forced tools were used in this response
+        checkForForcedToolUsage(currentResponse, nextPayload.tool_choice)
+
+        const nextModelEndTime = Date.now()
+        const thisModelTime = nextModelEndTime - nextModelStartTime
+
+        // Add to time segments
+        timeSegments.push({
+          type: 'model',
+          name: `Model response (iteration ${iterationCount + 1})`,
+          startTime: nextModelStartTime,
+          endTime: nextModelEndTime,
+          duration: thisModelTime,
+        })
+
+        // Add to model time
+        modelTime += thisModelTime
+
+        // Update content if we have a text response
+        if (currentResponse.choices[0]?.message?.content) {
+          content = currentResponse.choices[0].message.content
+          // Clean up the response content
+          content = content.replace(/```json\n?|\n?```/g, '')
+          content = content.trim()
+        }
+
+        // Update token counts
+        if (currentResponse.usage) {
+          tokens.prompt += currentResponse.usage.prompt_tokens || 0
+          tokens.completion += currentResponse.usage.completion_tokens || 0
+          tokens.total += currentResponse.usage.total_tokens || 0
+        }
+
+        iterationCount++
       }
 
-      const endTime = Date.now()
+      // After all tool processing complete, if streaming was requested and we have messages, use streaming for the final response
+      if (request.stream && iterationCount > 0) {
+        logger.info('Using streaming for final response after tool calls')
+
+        const streamingPayload = {
+          ...payload,
+          messages: currentMessages,
+          tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls
+          stream: true,
+          stream_options: { include_usage: true },
+        }
+
+        const streamResponse = await ollama.chat.completions.create(streamingPayload)
+
+        // Create the StreamingExecution object with all collected data
+        const streamingResult = {
+          stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
+            // Update the execution data with the final content and token usage
+            streamingResult.execution.output.content = content
+
+            // Clean up the response content
+            if (content) {
+              streamingResult.execution.output.content = content
+                .replace(/```json\n?|\n?```/g, '')
+                .trim()
+            }
+
+            // Update token usage if available from the stream
+            if (usage) {
+              const newTokens = {
+                prompt: usage.prompt_tokens || tokens.prompt,
+                completion: usage.completion_tokens || tokens.completion,
+                total: usage.total_tokens || tokens.total,
+              }
+
+              streamingResult.execution.output.tokens = newTokens
+            }
+          }),
+          execution: {
+            success: true,
+            output: {
+              content: '', // Will be filled by the callback
+              model: request.model,
+              tokens: {
+                prompt: tokens.prompt,
+                completion: tokens.completion,
+                total: tokens.total,
+              },
+              toolCalls:
+                toolCalls.length > 0
+                  ? {
+                      list: toolCalls,
+                      count: toolCalls.length,
+                    }
+                  : undefined,
+              providerTiming: {
+                startTime: providerStartTimeISO,
+                endTime: new Date().toISOString(),
+                duration: Date.now() - providerStartTime,
+                modelTime: modelTime,
+                toolsTime: toolsTime,
+                firstResponseTime: firstResponseTime,
+                iterations: iterationCount + 1,
+                timeSegments: timeSegments,
+              },
+            },
+            logs: [], // No block logs at provider level
+            metadata: {
+              startTime: providerStartTimeISO,
+              endTime: new Date().toISOString(),
+              duration: Date.now() - providerStartTime,
+            },
+          },
+        } as StreamingExecution
+
+        // Return the streaming execution object
+        return streamingResult as StreamingExecution
+      }
+
+      // Calculate overall timing
+      const providerEndTime = Date.now()
+      const providerEndTimeISO = new Date(providerEndTime).toISOString()
+      const totalDuration = providerEndTime - providerStartTime
 
       return {
-        content: content,
+        content,
         model: request.model,
         tokens,
         toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
         toolResults: toolResults.length > 0 ? toolResults : undefined,
         timing: {
-          startTime: new Date(startTime).toISOString(),
-          endTime: new Date(endTime).toISOString(),
-          duration: endTime - startTime,
+          startTime: providerStartTimeISO,
+          endTime: providerEndTimeISO,
+          duration: totalDuration,
           modelTime: modelTime,
           toolsTime: toolsTime,
           firstResponseTime: firstResponseTime,
           iterations: iterationCount + 1,
-          timeSegments,
+          timeSegments: timeSegments,
         },
       }
     } catch (error) {
-      logger.error('Error in Ollama request', {
-        error: error instanceof Error ? error.message : 'Unknown error',
-        model: request.model,
+      // Include timing information even for errors
+      const providerEndTime = Date.now()
+      const providerEndTimeISO = new Date(providerEndTime).toISOString()
+      const totalDuration = providerEndTime - providerStartTime
+
+      logger.error('Error in Ollama request:', {
+        error,
+        duration: totalDuration,
       })
-      throw error
+
+      // Create a new error with timing information
+      const enhancedError = new Error(error instanceof Error ? error.message : String(error))
+      // @ts-ignore - Adding timing property to the error
+      enhancedError.timing = {
+        startTime: providerStartTimeISO,
+        endTime: providerEndTimeISO,
+        duration: totalDuration,
+      }
+
+      throw enhancedError
     }
   },
 }
diff --git a/apps/sim/providers/utils.ts b/apps/sim/providers/utils.ts
index 6e7f759c9..6ab2650f0 100644
--- a/apps/sim/providers/utils.ts
+++ b/apps/sim/providers/utils.ts
@@ -27,6 +27,7 @@ import { openaiProvider } from '@/providers/openai'
 import type { ProviderConfig, ProviderId, ProviderToolConfig } from '@/providers/types'
 import { xAIProvider } from '@/providers/xai'
 import { useCustomToolsStore } from '@/stores/custom-tools/store'
+import { useOllamaStore } from '@/stores/ollama/store'
 
 const logger = createLogger('ProviderUtils')
 
@@ -548,6 +549,12 @@ export function getApiKey(provider: string, model: string, userProvidedKey?: str
   // If user provided a key, use it as a fallback
   const hasUserKey = !!userProvidedKey
 
+  // Ollama models don't require API keys - they run locally
+  const isOllamaModel = provider === 'ollama' || useOllamaStore.getState().models.includes(model)
+  if (isOllamaModel) {
+    return 'empty' // Ollama uses 'empty' as a placeholder API key
+  }
+
   // Use server key rotation for all OpenAI models and Anthropic's Claude models on the hosted platform
   const isOpenAIModel = provider === 'openai'
   const isClaudeModel = provider === 'anthropic'
diff --git a/apps/sim/scripts/ollama_docker.sh b/apps/sim/scripts/ollama_docker.sh
deleted file mode 100755
index d8c993085..000000000
--- a/apps/sim/scripts/ollama_docker.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-set -e
-
-# Check that at least one argument is provided. If not, display the usage help.
-if [ "$#" -eq 0 ]; then
-  echo "Usage: $(basename "$0") <ollama command> [args...]"
-  echo "Example: $(basename "$0") ps      # This will run 'ollama ps' inside the container"
-  exit 1
-fi
-
-# Start a detached container from the ollama/ollama image,
-# mounting the host's ~/.ollama directory directly into the container.
-# Here we mount it to /root/.ollama, assuming that's where the image expects it.
-CONTAINER_ID=$(docker run -d -v ~/.ollama:/root/.ollama -p 11434:11434 ollama/ollama
-)
-
-# Define a cleanup function to stop the container regardless of how the script exits.
-cleanup() {
-  docker stop "$CONTAINER_ID" >/dev/null
-}
-trap cleanup EXIT
-
-# Execute the command provided by the user within the running container.
-# The command runs as: "ollama <user-arguments>"
-docker exec -it "$CONTAINER_ID" ollama "$@"
diff --git a/apps/sim/stores/ollama/store.ts b/apps/sim/stores/ollama/store.ts
index 672a91212..4d52d1516 100644
--- a/apps/sim/stores/ollama/store.ts
+++ b/apps/sim/stores/ollama/store.ts
@@ -5,11 +5,72 @@ import type { OllamaStore } from '@/stores/ollama/types'
 
 const logger = createLogger('OllamaStore')
 
-export const useOllamaStore = create<OllamaStore>((set) => ({
+// Fetch models from the server API when on client side
+const fetchOllamaModels = async (): Promise<string[]> => {
+  try {
+    const response = await fetch('/api/providers/ollama/models')
+    if (!response.ok) {
+      logger.warn('Failed to fetch Ollama models from API', {
+        status: response.status,
+        statusText: response.statusText,
+      })
+      return []
+    }
+    const data = await response.json()
+    return data.models || []
+  } catch (error) {
+    logger.error('Error fetching Ollama models', {
+      error: error instanceof Error ? error.message : 'Unknown error',
+    })
+    return []
+  }
+}
+
+export const useOllamaStore = create<OllamaStore>((set, get) => ({
   models: [],
+  isLoading: false,
   setModels: (models) => {
     set({ models })
     // Update the providers when models change
     updateOllamaProviderModels(models)
   },
+
+  // Fetch models from API (client-side only)
+  fetchModels: async () => {
+    if (typeof window === 'undefined') {
+      logger.info('Skipping client-side model fetch on server')
+      return
+    }
+
+    if (get().isLoading) {
+      logger.info('Model fetch already in progress')
+      return
+    }
+
+    logger.info('Fetching Ollama models from API')
+    set({ isLoading: true })
+
+    try {
+      const models = await fetchOllamaModels()
+      logger.info('Successfully fetched Ollama models', {
+        count: models.length,
+        models,
+      })
+      get().setModels(models)
+    } catch (error) {
+      logger.error('Failed to fetch Ollama models', {
+        error: error instanceof Error ? error.message : 'Unknown error',
+      })
+    } finally {
+      set({ isLoading: false })
+    }
+  },
 }))
+
+// Auto-fetch models when the store is first accessed on the client
+if (typeof window !== 'undefined') {
+  // Delay to avoid hydration issues
+  setTimeout(() => {
+    useOllamaStore.getState().fetchModels()
+  }, 1000)
+}
diff --git a/apps/sim/stores/ollama/types.ts b/apps/sim/stores/ollama/types.ts
index 7c89f4ff9..77b0fa26c 100644
--- a/apps/sim/stores/ollama/types.ts
+++ b/apps/sim/stores/ollama/types.ts
@@ -1,4 +1,6 @@
 export interface OllamaStore {
   models: string[]
+  isLoading: boolean
   setModels: (models: string[]) => void
+  fetchModels: () => Promise<void>
 }
diff --git a/docker-compose.ollama.yml b/docker-compose.ollama.yml
index ca2244789..e5b75cac0 100644
--- a/docker-compose.ollama.yml
+++ b/docker-compose.ollama.yml
@@ -1,11 +1,106 @@
+name: sim-with-ollama
+
 services:
-  local-llm-gpu:
-    profiles:
-      - local-gpu # This profile requires both 'local' and 'gpu'
+  # Main Sim Studio Application
+  simstudio:
+    build:
+      context: .
+      dockerfile: docker/app.Dockerfile
+    ports:
+      - '3000:3000'
+    deploy:
+      resources:
+        limits:
+          memory: 8G
+    environment:
+      - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
+      - BETTER_AUTH_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
+      - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
+      - BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
+      - ENCRYPTION_KEY=${ENCRYPTION_KEY:-$(openssl rand -hex 32)}
+      - OLLAMA_URL=http://ollama:11434
+      - NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002}
+    depends_on:
+      db:
+        condition: service_healthy
+      migrations:
+        condition: service_completed_successfully
+      realtime:
+        condition: service_healthy
+      ollama:
+        condition: service_healthy
+    healthcheck:
+      test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3000']
+      interval: 90s
+      timeout: 5s
+      retries: 3
+      start_period: 10s
+    restart: unless-stopped
+
+  # Realtime Socket Server
+  realtime:
+    build:
+      context: .
+      dockerfile: docker/realtime.Dockerfile
+    environment:
+      - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
+      - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
+      - BETTER_AUTH_URL=${BETTER_AUTH_URL:-http://localhost:3000}
+      - BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
+    depends_on:
+      db:
+        condition: service_healthy
+    restart: unless-stopped
+    ports:
+      - '3002:3002'
+    deploy:
+      resources:
+        limits:
+          memory: 8G
+    healthcheck:
+      test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health']
+      interval: 90s
+      timeout: 5s
+      retries: 3
+      start_period: 10s
+
+  # Database Migrations
+  migrations:
+    build:
+      context: .
+      dockerfile: docker/db.Dockerfile
+    environment:
+      - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
+    depends_on:
+      db:
+        condition: service_healthy
+    command: ['bun', 'run', 'db:migrate']
+    restart: 'no'
+
+  # PostgreSQL Database with Vector Extension
+  db:
+    image: pgvector/pgvector:pg17
+    restart: always
+    ports:
+      - '5432:5432'
+    environment:
+      - POSTGRES_USER=${POSTGRES_USER:-postgres}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
+      - POSTGRES_DB=${POSTGRES_DB:-simstudio}
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ['CMD-SHELL', 'pg_isready -U postgres']
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  # Ollama with GPU support (default)
+  ollama:
     image: ollama/ollama:latest
     pull_policy: always
     volumes:
-      - ${HOME}/.ollama:/root/.ollama
+      - ollama_data:/root/.ollama
     ports:
       - '11434:11434'
     environment:
@@ -13,6 +108,7 @@ services:
       - OLLAMA_LOAD_TIMEOUT=-1
       - OLLAMA_KEEP_ALIVE=-1
       - OLLAMA_DEBUG=1
+      - OLLAMA_HOST=0.0.0.0:11434
     command: 'serve'
     deploy:
       resources:
@@ -26,23 +122,56 @@ services:
       interval: 10s
       timeout: 5s
       retries: 5
+      start_period: 30s
+    restart: unless-stopped
 
-  local-llm-cpu:
+  # Ollama CPU-only version (use with --profile cpu profile)
+  ollama-cpu:
     profiles:
-      - local-cpu # This profile requires both 'local' and 'cpu'
+      - cpu
     image: ollama/ollama:latest
     pull_policy: always
     volumes:
-      - ${HOME}/.ollama:/root/.ollama
+      - ollama_data:/root/.ollama
     ports:
       - '11434:11434'
     environment:
       - OLLAMA_LOAD_TIMEOUT=-1
       - OLLAMA_KEEP_ALIVE=-1
       - OLLAMA_DEBUG=1
+      - OLLAMA_HOST=0.0.0.0:11434
     command: 'serve'
     healthcheck:
       test: ['CMD', 'curl', '-f', 'http://localhost:11434/']
       interval: 10s
       timeout: 5s
       retries: 5
+      start_period: 30s
+    restart: unless-stopped
+
+  # Helper container to pull models automatically
+  model-setup:
+    image: ollama/ollama:latest
+    profiles:
+      - setup
+    volumes:
+      - ollama_data:/root/.ollama
+    environment:
+      - OLLAMA_HOST=ollama:11434
+    depends_on:
+      ollama:
+        condition: service_healthy
+    command: >
+      sh -c "
+        echo 'Waiting for Ollama to be ready...' &&
+        sleep 10 &&
+        echo 'Pulling gemma3:4b model (recommended starter model)...' &&
+        ollama pull gemma3:4b &&
+        echo 'Model setup complete! You can now use gemma3:4b in Sim.' &&
+        echo 'To add more models, run: docker compose -f docker-compose.ollama.yml exec ollama ollama pull <model-name>'
+      "
+    restart: 'no'
+
+volumes:
+  postgres_data:
+  ollama_data: