feat(ollama): added streaming & tool call support for ollama, updated docs (#884)

This commit is contained in:
Waleed Latif
2025-08-05 15:04:50 -07:00
committed by GitHub
parent be65bf795f
commit 746b87743a
14 changed files with 893 additions and 332 deletions

View File

@@ -164,10 +164,14 @@ Access the application at [http://localhost:3000/](http://localhost:3000/)
To use local models with Sim:
1. Pull models using our helper script:
1. Install Ollama and pull models:
```bash
./apps/sim/scripts/ollama_docker.sh pull <model_name>
# Install Ollama (if not already installed)
curl -fsSL https://ollama.ai/install.sh | sh
# Pull a model (e.g., gemma3:4b)
ollama pull gemma3:4b
```
2. Start Sim with local model support:

View File

@@ -59,27 +59,21 @@ docker compose -f docker-compose.prod.yml up -d
Access the application at [http://localhost:3000/](http://localhost:3000/)
#### Using Local Models
#### Using Local Models with Ollama
To use local models with Sim:
1. Pull models using our helper script:
Run Sim with local AI models using [Ollama](https://ollama.ai) - no external APIs required:
```bash
./apps/sim/scripts/ollama_docker.sh pull <model_name>
# Start with GPU support (automatically downloads gemma3:4b model)
docker compose -f docker-compose.ollama.yml --profile setup up -d
# For CPU-only systems:
docker compose -f docker-compose.ollama.yml --profile cpu --profile setup up -d
```
2. Start Sim with local model support:
Wait for the model to download, then visit [http://localhost:3000](http://localhost:3000). Add more models with:
```bash
# With NVIDIA GPU support
docker compose --profile local-gpu -f docker-compose.ollama.yml up -d
# Without GPU (CPU only)
docker compose --profile local-cpu -f docker-compose.ollama.yml up -d
# If hosting on a server, update the environment variables in the docker-compose.prod.yml file to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434)
docker compose -f docker-compose.prod.yml up -d
docker compose -f docker-compose.ollama.yml exec ollama ollama pull llama3.1:8b
```
### Option 3: Dev Containers

View File

@@ -15,3 +15,6 @@ ENCRYPTION_KEY=your_encryption_key # Use `openssl rand -hex 32` to generate
# RESEND_API_KEY= # Uncomment and add your key from https://resend.com to send actual emails
# If left commented out, emails will be logged to console instead
# Local AI Models (Optional)
# OLLAMA_URL=http://localhost:11434 # URL for local Ollama server - uncomment if using local models

View File

@@ -0,0 +1,52 @@
import { type NextRequest, NextResponse } from 'next/server'
import { env } from '@/lib/env'
import { createLogger } from '@/lib/logs/console/logger'
import type { ModelsObject } from '@/providers/ollama/types'
const logger = createLogger('OllamaModelsAPI')
const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
export const dynamic = 'force-dynamic'
/**
* Get available Ollama models
*/
export async function GET(request: NextRequest) {
try {
logger.info('Fetching Ollama models', {
host: OLLAMA_HOST,
})
const response = await fetch(`${OLLAMA_HOST}/api/tags`, {
headers: {
'Content-Type': 'application/json',
},
})
if (!response.ok) {
logger.warn('Ollama service is not available', {
status: response.status,
statusText: response.statusText,
})
return NextResponse.json({ models: [] })
}
const data = (await response.json()) as ModelsObject
const models = data.models.map((model) => model.name)
logger.info('Successfully fetched Ollama models', {
count: models.length,
models,
})
return NextResponse.json({ models })
} catch (error) {
logger.error('Failed to fetch Ollama models', {
error: error instanceof Error ? error.message : 'Unknown error',
host: OLLAMA_HOST,
})
// Return empty array instead of error to avoid breaking the UI
return NextResponse.json({ models: [] })
}
}

View File

@@ -405,33 +405,37 @@ export function WorkflowBlock({ id, data }: NodeProps<WorkflowBlockProps>) {
// If there's no condition, the block should be shown
if (!block.condition) return true
// If condition is a function, call it to get the actual condition object
const actualCondition =
typeof block.condition === 'function' ? block.condition() : block.condition
// Get the values of the fields this block depends on from the appropriate state
const fieldValue = stateToUse[block.condition.field]?.value
const andFieldValue = block.condition.and
? stateToUse[block.condition.and.field]?.value
const fieldValue = stateToUse[actualCondition.field]?.value
const andFieldValue = actualCondition.and
? stateToUse[actualCondition.and.field]?.value
: undefined
// Check if the condition value is an array
const isValueMatch = Array.isArray(block.condition.value)
const isValueMatch = Array.isArray(actualCondition.value)
? fieldValue != null &&
(block.condition.not
? !block.condition.value.includes(fieldValue as string | number | boolean)
: block.condition.value.includes(fieldValue as string | number | boolean))
: block.condition.not
? fieldValue !== block.condition.value
: fieldValue === block.condition.value
(actualCondition.not
? !actualCondition.value.includes(fieldValue as string | number | boolean)
: actualCondition.value.includes(fieldValue as string | number | boolean))
: actualCondition.not
? fieldValue !== actualCondition.value
: fieldValue === actualCondition.value
// Check both conditions if 'and' is present
const isAndValueMatch =
!block.condition.and ||
(Array.isArray(block.condition.and.value)
!actualCondition.and ||
(Array.isArray(actualCondition.and.value)
? andFieldValue != null &&
(block.condition.and.not
? !block.condition.and.value.includes(andFieldValue as string | number | boolean)
: block.condition.and.value.includes(andFieldValue as string | number | boolean))
: block.condition.and.not
? andFieldValue !== block.condition.and.value
: andFieldValue === block.condition.and.value)
(actualCondition.and.not
? !actualCondition.and.value.includes(andFieldValue as string | number | boolean)
: actualCondition.and.value.includes(andFieldValue as string | number | boolean))
: actualCondition.and.not
? andFieldValue !== actualCondition.and.value
: andFieldValue === actualCondition.and.value)
return isValueMatch && isAndValueMatch
})

View File

@@ -12,6 +12,12 @@ import {
MODELS_WITH_TEMPERATURE_SUPPORT,
providers,
} from '@/providers/utils'
// Get current Ollama models dynamically
const getCurrentOllamaModels = () => {
return useOllamaStore.getState().models
}
import { useOllamaStore } from '@/stores/ollama/store'
import type { ToolResponse } from '@/tools/types'
@@ -213,14 +219,18 @@ Create a system prompt appropriately detailed for the request, using clear langu
password: true,
connectionDroppable: false,
required: true,
// Hide API key for all hosted models when running on hosted version
// Hide API key for hosted models and Ollama models
condition: isHosted
? {
field: 'model',
value: getHostedModels(),
not: true, // Show for all models EXCEPT those listed
}
: undefined, // Show for all models in non-hosted environments
: () => ({
field: 'model',
value: getCurrentOllamaModels(),
not: true, // Show for all models EXCEPT Ollama models
}),
},
{
id: 'azureEndpoint',

View File

@@ -118,16 +118,27 @@ export interface SubBlockConfig {
hidden?: boolean
description?: string
value?: (params: Record<string, any>) => string
condition?: {
field: string
value: string | number | boolean | Array<string | number | boolean>
not?: boolean
and?: {
field: string
value: string | number | boolean | Array<string | number | boolean> | undefined
not?: boolean
}
}
condition?:
| {
field: string
value: string | number | boolean | Array<string | number | boolean>
not?: boolean
and?: {
field: string
value: string | number | boolean | Array<string | number | boolean> | undefined
not?: boolean
}
}
| (() => {
field: string
value: string | number | boolean | Array<string | number | boolean>
not?: boolean
and?: {
field: string
value: string | number | boolean | Array<string | number | boolean> | undefined
not?: boolean
}
})
// Props specific to 'code' sub-block type
language?: 'javascript' | 'json'
generationType?: GenerationType

View File

@@ -58,7 +58,7 @@ export class InputResolver {
/**
* Evaluates if a sub-block should be active based on its condition
* @param condition - The condition to evaluate
* @param condition - The condition to evaluate (can be static object or function)
* @param currentValues - Current values of all inputs
* @returns True if the sub-block should be active
*/
@@ -70,37 +70,46 @@ export class InputResolver {
not?: boolean
and?: { field: string; value: any; not?: boolean }
}
| (() => {
field: string
value: any
not?: boolean
and?: { field: string; value: any; not?: boolean }
})
| undefined,
currentValues: Record<string, any>
): boolean {
if (!condition) return true
// If condition is a function, call it to get the actual condition object
const actualCondition = typeof condition === 'function' ? condition() : condition
// Get the field value
const fieldValue = currentValues[condition.field]
const fieldValue = currentValues[actualCondition.field]
// Check if the condition value is an array
const isValueMatch = Array.isArray(condition.value)
const isValueMatch = Array.isArray(actualCondition.value)
? fieldValue != null &&
(condition.not
? !condition.value.includes(fieldValue)
: condition.value.includes(fieldValue))
: condition.not
? fieldValue !== condition.value
: fieldValue === condition.value
(actualCondition.not
? !actualCondition.value.includes(fieldValue)
: actualCondition.value.includes(fieldValue))
: actualCondition.not
? fieldValue !== actualCondition.value
: fieldValue === actualCondition.value
// Check both conditions if 'and' is present
const isAndValueMatch =
!condition.and ||
!actualCondition.and ||
(() => {
const andFieldValue = currentValues[condition.and!.field]
return Array.isArray(condition.and!.value)
const andFieldValue = currentValues[actualCondition.and!.field]
return Array.isArray(actualCondition.and!.value)
? andFieldValue != null &&
(condition.and!.not
? !condition.and!.value.includes(andFieldValue)
: condition.and!.value.includes(andFieldValue))
: condition.and!.not
? andFieldValue !== condition.and!.value
: andFieldValue === condition.and!.value
(actualCondition.and!.not
? !actualCondition.and!.value.includes(andFieldValue)
: actualCondition.and!.value.includes(andFieldValue))
: actualCondition.and!.not
? andFieldValue !== actualCondition.and!.value
: andFieldValue === actualCondition.and!.value
})()
return isValueMatch && isAndValueMatch

View File

@@ -1,6 +1,7 @@
import OpenAI from 'openai'
import { env } from '@/lib/env'
import { createLogger } from '@/lib/logs/console/logger'
import type { StreamingExecution } from '@/executor/types'
import type { ModelsObject } from '@/providers/ollama/types'
import type {
ProviderConfig,
@@ -8,12 +9,57 @@ import type {
ProviderResponse,
TimeSegment,
} from '@/providers/types'
import {
prepareToolExecution,
prepareToolsWithUsageControl,
trackForcedToolUsage,
} from '@/providers/utils'
import { useOllamaStore } from '@/stores/ollama/store'
import { executeTool } from '@/tools'
const logger = createLogger('OllamaProvider')
const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
/**
* Helper function to convert an Ollama stream to a standard ReadableStream
* and collect completion metrics
*/
function createReadableStreamFromOllamaStream(
ollamaStream: any,
onComplete?: (content: string, usage?: any) => void
): ReadableStream {
let fullContent = ''
let usageData: any = null
return new ReadableStream({
async start(controller) {
try {
for await (const chunk of ollamaStream) {
// Check for usage data in the final chunk
if (chunk.usage) {
usageData = chunk.usage
}
const content = chunk.choices[0]?.delta?.content || ''
if (content) {
fullContent += content
controller.enqueue(new TextEncoder().encode(content))
}
}
// Once stream is complete, call the completion callback with the final content and usage
if (onComplete) {
onComplete(fullContent, usageData)
}
controller.close()
} catch (error) {
controller.error(error)
}
},
})
}
export const ollamaProvider: ProviderConfig = {
id: 'ollama',
name: 'Ollama',
@@ -46,91 +92,238 @@ export const ollamaProvider: ProviderConfig = {
}
},
executeRequest: async (request: ProviderRequest): Promise<ProviderResponse> => {
executeRequest: async (
request: ProviderRequest
): Promise<ProviderResponse | StreamingExecution> => {
logger.info('Preparing Ollama request', {
model: request.model,
hasSystemPrompt: !!request.systemPrompt,
hasMessages: !!request.context,
hasMessages: !!request.messages?.length,
hasTools: !!request.tools?.length,
toolCount: request.tools?.length || 0,
hasResponseFormat: !!request.responseFormat,
stream: !!request.stream,
})
const startTime = Date.now()
// Create Ollama client using OpenAI-compatible API
const ollama = new OpenAI({
apiKey: 'empty',
baseURL: `${OLLAMA_HOST}/v1`,
})
// Start with an empty array for all messages
const allMessages = []
// Add system prompt if present
if (request.systemPrompt) {
allMessages.push({
role: 'system',
content: request.systemPrompt,
})
}
// Add context if present
if (request.context) {
allMessages.push({
role: 'user',
content: request.context,
})
}
// Add remaining messages
if (request.messages) {
allMessages.push(...request.messages)
}
// Transform tools to OpenAI format if provided
const tools = request.tools?.length
? request.tools.map((tool) => ({
type: 'function',
function: {
name: tool.id,
description: tool.description,
parameters: tool.parameters,
},
}))
: undefined
// Build the request payload
const payload: any = {
model: request.model,
messages: allMessages,
}
// Add optional parameters
if (request.temperature !== undefined) payload.temperature = request.temperature
if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
// Add response format for structured output if specified
if (request.responseFormat) {
// Use OpenAI's JSON schema format (Ollama supports this)
payload.response_format = {
type: 'json_schema',
json_schema: {
name: request.responseFormat.name || 'response_schema',
schema: request.responseFormat.schema || request.responseFormat,
strict: request.responseFormat.strict !== false,
},
}
logger.info('Added JSON schema response format to Ollama request')
}
// Handle tools and tool usage control
let preparedTools: ReturnType<typeof prepareToolsWithUsageControl> | null = null
if (tools?.length) {
preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'ollama')
const { tools: filteredTools, toolChoice } = preparedTools
if (filteredTools?.length && toolChoice) {
payload.tools = filteredTools
// Ollama supports 'auto' but not forced tool selection - convert 'force' to 'auto'
payload.tool_choice = typeof toolChoice === 'string' ? toolChoice : 'auto'
logger.info('Ollama request configuration:', {
toolCount: filteredTools.length,
toolChoice: payload.tool_choice,
model: request.model,
})
}
}
// Start execution timer for the entire provider execution
const providerStartTime = Date.now()
const providerStartTimeISO = new Date(providerStartTime).toISOString()
try {
// Prepare messages array
const ollama = new OpenAI({
apiKey: 'empty',
baseURL: `${OLLAMA_HOST}/v1`,
})
// Check if we can stream directly (no tools required)
if (request.stream && (!tools || tools.length === 0)) {
logger.info('Using streaming response for Ollama request')
// Start with an empty array for all messages
const allMessages = []
// Add system prompt if present
if (request.systemPrompt) {
allMessages.push({ role: 'system', content: request.systemPrompt })
}
// Add context if present
if (request.context) {
allMessages.push({ role: 'user', content: request.context })
}
// Add remaining messages
if (request.messages) {
allMessages.push(...request.messages)
}
// Build the basic payload
const payload: any = {
model: request.model,
messages: allMessages,
stream: false,
}
// Add optional parameters
if (request.temperature !== undefined) payload.temperature = request.temperature
if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
// Transform tools to OpenAI format if provided
const tools = request.tools?.length
? request.tools.map((tool) => ({
type: 'function',
function: {
name: tool.id,
description: tool.description,
parameters: tool.parameters,
},
}))
: undefined
// Handle tools and tool usage control
if (tools?.length) {
// Filter out any tools with usageControl='none', but ignore 'force' since Ollama doesn't support it
const filteredTools = tools.filter((tool) => {
const toolId = tool.function?.name
const toolConfig = request.tools?.find((t) => t.id === toolId)
// Only filter out 'none', treat 'force' as 'auto'
return toolConfig?.usageControl !== 'none'
// Create a streaming request with token usage tracking
const streamResponse = await ollama.chat.completions.create({
...payload,
stream: true,
stream_options: { include_usage: true },
})
if (filteredTools?.length) {
payload.tools = filteredTools
// Always use 'auto' for Ollama, regardless of the tool_choice setting
payload.tool_choice = 'auto'
// Start collecting token usage from the stream
const tokenUsage = {
prompt: 0,
completion: 0,
total: 0,
}
logger.info('Ollama request configuration:', {
toolCount: filteredTools.length,
toolChoice: 'auto', // Ollama always uses auto
model: request.model,
})
// Create a StreamingExecution response with a callback to update content and tokens
const streamingResult = {
stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
// Update the execution data with the final content and token usage
streamingResult.execution.output.content = content
// Clean up the response content
if (content) {
streamingResult.execution.output.content = content
.replace(/```json\n?|\n?```/g, '')
.trim()
}
// Update the timing information with the actual completion time
const streamEndTime = Date.now()
const streamEndTimeISO = new Date(streamEndTime).toISOString()
if (streamingResult.execution.output.providerTiming) {
streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
streamingResult.execution.output.providerTiming.duration =
streamEndTime - providerStartTime
// Update the time segment as well
if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
streamEndTime
streamingResult.execution.output.providerTiming.timeSegments[0].duration =
streamEndTime - providerStartTime
}
}
// Update token usage if available from the stream
if (usage) {
const newTokens = {
prompt: usage.prompt_tokens || tokenUsage.prompt,
completion: usage.completion_tokens || tokenUsage.completion,
total: usage.total_tokens || tokenUsage.total,
}
streamingResult.execution.output.tokens = newTokens
}
}),
execution: {
success: true,
output: {
content: '', // Will be filled by the stream completion callback
model: request.model,
tokens: tokenUsage,
toolCalls: undefined,
providerTiming: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
timeSegments: [
{
type: 'model',
name: 'Streaming response',
startTime: providerStartTime,
endTime: Date.now(),
duration: Date.now() - providerStartTime,
},
],
},
},
logs: [], // No block logs for direct streaming
metadata: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
},
},
} as StreamingExecution
// Return the streaming execution object
return streamingResult as StreamingExecution
}
// Make the initial API request
const initialCallTime = Date.now()
// Track the original tool_choice for forced tool tracking
const originalToolChoice = payload.tool_choice
// Track forced tools and their usage
const forcedTools = preparedTools?.forcedTools || []
let usedForcedTools: string[] = []
// Helper function to check for forced tool usage in responses
const checkForForcedToolUsage = (
response: any,
toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any }
) => {
if (typeof toolChoice === 'object' && response.choices[0]?.message?.tool_calls) {
const toolCallsResponse = response.choices[0].message.tool_calls
const result = trackForcedToolUsage(
toolCallsResponse,
toolChoice,
logger,
'ollama',
forcedTools,
usedForcedTools
)
hasUsedForcedTool = result.hasUsedForcedTool
usedForcedTools = result.usedForcedTools
}
}
let currentResponse = await ollama.chat.completions.create(payload)
const firstResponseTime = Date.now() - startTime
const firstResponseTime = Date.now() - initialCallTime
let content = currentResponse.choices[0]?.message?.content || ''
@@ -140,6 +333,7 @@ export const ollamaProvider: ProviderConfig = {
content = content.trim()
}
// Collect token information
const tokens = {
prompt: currentResponse.usage?.prompt_tokens || 0,
completion: currentResponse.usage?.completion_tokens || 0,
@@ -155,201 +349,307 @@ export const ollamaProvider: ProviderConfig = {
let modelTime = firstResponseTime
let toolsTime = 0
// Track if a forced tool has been used
let hasUsedForcedTool = false
// Track each model and tool call segment with timestamps
const timeSegments: TimeSegment[] = [
{
type: 'model',
name: 'Initial response',
startTime: startTime,
endTime: startTime + firstResponseTime,
startTime: initialCallTime,
endTime: initialCallTime + firstResponseTime,
duration: firstResponseTime,
},
]
try {
while (iterationCount < MAX_ITERATIONS) {
// Check for tool calls
const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
break
}
// Check if a forced tool was used in the first response
checkForForcedToolUsage(currentResponse, originalToolChoice)
// Track time for tool calls in this batch
const toolsStartTime = Date.now()
// Process each tool call
for (const toolCall of toolCallsInResponse) {
try {
const toolName = toolCall.function.name
const toolArgs = JSON.parse(toolCall.function.arguments)
// Get the tool from the tools registry
const tool = request.tools?.find((t) => t.id === toolName)
if (!tool) continue
// Execute the tool
const toolCallStartTime = Date.now()
// Only merge actual tool parameters for logging
const toolParams = {
...tool.params,
...toolArgs,
}
// Add system parameters for execution
const executionParams = {
...toolParams,
...(request.workflowId
? {
_context: {
workflowId: request.workflowId,
...(request.chatId ? { chatId: request.chatId } : {}),
},
}
: {}),
...(request.environmentVariables ? { envVars: request.environmentVariables } : {}),
}
const result = await executeTool(toolName, executionParams, true)
const toolCallEndTime = Date.now()
const toolCallDuration = toolCallEndTime - toolCallStartTime
// Add to time segments for both success and failure
timeSegments.push({
type: 'tool',
name: toolName,
startTime: toolCallStartTime,
endTime: toolCallEndTime,
duration: toolCallDuration,
})
// Prepare result content for the LLM
let resultContent: any
if (result.success) {
toolResults.push(result.output)
resultContent = result.output
} else {
// Include error information so LLM can respond appropriately
resultContent = {
error: true,
message: result.error || 'Tool execution failed',
tool: toolName,
}
}
toolCalls.push({
name: toolName,
arguments: toolParams,
startTime: new Date(toolCallStartTime).toISOString(),
endTime: new Date(toolCallEndTime).toISOString(),
duration: toolCallDuration,
result: resultContent,
success: result.success,
})
// Add the tool call and result to messages (both success and failure)
currentMessages.push({
role: 'assistant',
content: null,
tool_calls: [
{
id: toolCall.id,
type: 'function',
function: {
name: toolName,
arguments: toolCall.function.arguments,
},
},
],
})
currentMessages.push({
role: 'tool',
tool_call_id: toolCall.id,
content: JSON.stringify(resultContent),
})
} catch (error) {
logger.error('Error processing tool call:', { error })
}
}
// Calculate tool call time for this iteration
const thisToolsTime = Date.now() - toolsStartTime
toolsTime += thisToolsTime
// Make the next request with updated messages
const nextPayload = {
...payload,
messages: currentMessages,
}
// Time the next model call
const nextModelStartTime = Date.now()
// Make the next request
currentResponse = await ollama.chat.completions.create(nextPayload)
const nextModelEndTime = Date.now()
const thisModelTime = nextModelEndTime - nextModelStartTime
// Add to time segments
timeSegments.push({
type: 'model',
name: `Model response (iteration ${iterationCount + 1})`,
startTime: nextModelStartTime,
endTime: nextModelEndTime,
duration: thisModelTime,
})
// Add to model time
modelTime += thisModelTime
// Update content if we have a text response
if (currentResponse.choices[0]?.message?.content) {
content = currentResponse.choices[0].message.content
// Clean up the response content
content = content.replace(/```json\n?|\n?```/g, '')
content = content.trim()
}
// Update token counts
if (currentResponse.usage) {
tokens.prompt += currentResponse.usage.prompt_tokens || 0
tokens.completion += currentResponse.usage.completion_tokens || 0
tokens.total += currentResponse.usage.total_tokens || 0
}
iterationCount++
while (iterationCount < MAX_ITERATIONS) {
// Check for tool calls
const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
break
}
} catch (error) {
logger.error('Error in Ollama request:', { error })
logger.info(
`Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
)
// Track time for tool calls in this batch
const toolsStartTime = Date.now()
// Process each tool call
for (const toolCall of toolCallsInResponse) {
try {
const toolName = toolCall.function.name
const toolArgs = JSON.parse(toolCall.function.arguments)
// Get the tool from the tools registry
const tool = request.tools?.find((t) => t.id === toolName)
if (!tool) continue
// Execute the tool
const toolCallStartTime = Date.now()
const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
const result = await executeTool(toolName, executionParams, true)
const toolCallEndTime = Date.now()
const toolCallDuration = toolCallEndTime - toolCallStartTime
// Add to time segments for both success and failure
timeSegments.push({
type: 'tool',
name: toolName,
startTime: toolCallStartTime,
endTime: toolCallEndTime,
duration: toolCallDuration,
})
// Prepare result content for the LLM
let resultContent: any
if (result.success) {
toolResults.push(result.output)
resultContent = result.output
} else {
// Include error information so LLM can respond appropriately
resultContent = {
error: true,
message: result.error || 'Tool execution failed',
tool: toolName,
}
}
toolCalls.push({
name: toolName,
arguments: toolParams,
startTime: new Date(toolCallStartTime).toISOString(),
endTime: new Date(toolCallEndTime).toISOString(),
duration: toolCallDuration,
result: resultContent,
success: result.success,
})
// Add the tool call and result to messages (both success and failure)
currentMessages.push({
role: 'assistant',
content: null,
tool_calls: [
{
id: toolCall.id,
type: 'function',
function: {
name: toolName,
arguments: toolCall.function.arguments,
},
},
],
})
currentMessages.push({
role: 'tool',
tool_call_id: toolCall.id,
content: JSON.stringify(resultContent),
})
} catch (error) {
logger.error('Error processing tool call:', {
error,
toolName: toolCall?.function?.name,
})
}
}
// Calculate tool call time for this iteration
const thisToolsTime = Date.now() - toolsStartTime
toolsTime += thisToolsTime
// Make the next request with updated messages
const nextPayload = {
...payload,
messages: currentMessages,
}
// Update tool_choice based on which forced tools have been used
if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
// If we have remaining forced tools, get the next one to force
const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
if (remainingTools.length > 0) {
// Ollama doesn't support forced tool selection, so we keep using 'auto'
nextPayload.tool_choice = 'auto'
logger.info(`Ollama doesn't support forced tools, using auto for: ${remainingTools[0]}`)
} else {
// All forced tools have been used, continue with auto
nextPayload.tool_choice = 'auto'
logger.info('All forced tools have been used, continuing with auto tool_choice')
}
}
// Time the next model call
const nextModelStartTime = Date.now()
// Make the next request
currentResponse = await ollama.chat.completions.create(nextPayload)
// Check if any forced tools were used in this response
checkForForcedToolUsage(currentResponse, nextPayload.tool_choice)
const nextModelEndTime = Date.now()
const thisModelTime = nextModelEndTime - nextModelStartTime
// Add to time segments
timeSegments.push({
type: 'model',
name: `Model response (iteration ${iterationCount + 1})`,
startTime: nextModelStartTime,
endTime: nextModelEndTime,
duration: thisModelTime,
})
// Add to model time
modelTime += thisModelTime
// Update content if we have a text response
if (currentResponse.choices[0]?.message?.content) {
content = currentResponse.choices[0].message.content
// Clean up the response content
content = content.replace(/```json\n?|\n?```/g, '')
content = content.trim()
}
// Update token counts
if (currentResponse.usage) {
tokens.prompt += currentResponse.usage.prompt_tokens || 0
tokens.completion += currentResponse.usage.completion_tokens || 0
tokens.total += currentResponse.usage.total_tokens || 0
}
iterationCount++
}
const endTime = Date.now()
// After all tool processing complete, if streaming was requested and we have messages, use streaming for the final response
if (request.stream && iterationCount > 0) {
logger.info('Using streaming for final response after tool calls')
const streamingPayload = {
...payload,
messages: currentMessages,
tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls
stream: true,
stream_options: { include_usage: true },
}
const streamResponse = await ollama.chat.completions.create(streamingPayload)
// Create the StreamingExecution object with all collected data
const streamingResult = {
stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
// Update the execution data with the final content and token usage
streamingResult.execution.output.content = content
// Clean up the response content
if (content) {
streamingResult.execution.output.content = content
.replace(/```json\n?|\n?```/g, '')
.trim()
}
// Update token usage if available from the stream
if (usage) {
const newTokens = {
prompt: usage.prompt_tokens || tokens.prompt,
completion: usage.completion_tokens || tokens.completion,
total: usage.total_tokens || tokens.total,
}
streamingResult.execution.output.tokens = newTokens
}
}),
execution: {
success: true,
output: {
content: '', // Will be filled by the callback
model: request.model,
tokens: {
prompt: tokens.prompt,
completion: tokens.completion,
total: tokens.total,
},
toolCalls:
toolCalls.length > 0
? {
list: toolCalls,
count: toolCalls.length,
}
: undefined,
providerTiming: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
modelTime: modelTime,
toolsTime: toolsTime,
firstResponseTime: firstResponseTime,
iterations: iterationCount + 1,
timeSegments: timeSegments,
},
},
logs: [], // No block logs at provider level
metadata: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
},
},
} as StreamingExecution
// Return the streaming execution object
return streamingResult as StreamingExecution
}
// Calculate overall timing
const providerEndTime = Date.now()
const providerEndTimeISO = new Date(providerEndTime).toISOString()
const totalDuration = providerEndTime - providerStartTime
return {
content: content,
content,
model: request.model,
tokens,
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
toolResults: toolResults.length > 0 ? toolResults : undefined,
timing: {
startTime: new Date(startTime).toISOString(),
endTime: new Date(endTime).toISOString(),
duration: endTime - startTime,
startTime: providerStartTimeISO,
endTime: providerEndTimeISO,
duration: totalDuration,
modelTime: modelTime,
toolsTime: toolsTime,
firstResponseTime: firstResponseTime,
iterations: iterationCount + 1,
timeSegments,
timeSegments: timeSegments,
},
}
} catch (error) {
logger.error('Error in Ollama request', {
error: error instanceof Error ? error.message : 'Unknown error',
model: request.model,
// Include timing information even for errors
const providerEndTime = Date.now()
const providerEndTimeISO = new Date(providerEndTime).toISOString()
const totalDuration = providerEndTime - providerStartTime
logger.error('Error in Ollama request:', {
error,
duration: totalDuration,
})
throw error
// Create a new error with timing information
const enhancedError = new Error(error instanceof Error ? error.message : String(error))
// @ts-ignore - Adding timing property to the error
enhancedError.timing = {
startTime: providerStartTimeISO,
endTime: providerEndTimeISO,
duration: totalDuration,
}
throw enhancedError
}
},
}

View File

@@ -27,6 +27,7 @@ import { openaiProvider } from '@/providers/openai'
import type { ProviderConfig, ProviderId, ProviderToolConfig } from '@/providers/types'
import { xAIProvider } from '@/providers/xai'
import { useCustomToolsStore } from '@/stores/custom-tools/store'
import { useOllamaStore } from '@/stores/ollama/store'
const logger = createLogger('ProviderUtils')
@@ -548,6 +549,12 @@ export function getApiKey(provider: string, model: string, userProvidedKey?: str
// If user provided a key, use it as a fallback
const hasUserKey = !!userProvidedKey
// Ollama models don't require API keys - they run locally
const isOllamaModel = provider === 'ollama' || useOllamaStore.getState().models.includes(model)
if (isOllamaModel) {
return 'empty' // Ollama uses 'empty' as a placeholder API key
}
// Use server key rotation for all OpenAI models and Anthropic's Claude models on the hosted platform
const isOpenAIModel = provider === 'openai'
const isClaudeModel = provider === 'anthropic'

View File

@@ -1,25 +0,0 @@
#!/bin/bash
set -e
# Check that at least one argument is provided. If not, display the usage help.
if [ "$#" -eq 0 ]; then
echo "Usage: $(basename "$0") <ollama command> [args...]"
echo "Example: $(basename "$0") ps # This will run 'ollama ps' inside the container"
exit 1
fi
# Start a detached container from the ollama/ollama image,
# mounting the host's ~/.ollama directory directly into the container.
# Here we mount it to /root/.ollama, assuming that's where the image expects it.
CONTAINER_ID=$(docker run -d -v ~/.ollama:/root/.ollama -p 11434:11434 ollama/ollama
)
# Define a cleanup function to stop the container regardless of how the script exits.
cleanup() {
docker stop "$CONTAINER_ID" >/dev/null
}
trap cleanup EXIT
# Execute the command provided by the user within the running container.
# The command runs as: "ollama <user-arguments>"
docker exec -it "$CONTAINER_ID" ollama "$@"

View File

@@ -5,11 +5,72 @@ import type { OllamaStore } from '@/stores/ollama/types'
const logger = createLogger('OllamaStore')
export const useOllamaStore = create<OllamaStore>((set) => ({
// Fetch models from the server API when on client side
const fetchOllamaModels = async (): Promise<string[]> => {
try {
const response = await fetch('/api/providers/ollama/models')
if (!response.ok) {
logger.warn('Failed to fetch Ollama models from API', {
status: response.status,
statusText: response.statusText,
})
return []
}
const data = await response.json()
return data.models || []
} catch (error) {
logger.error('Error fetching Ollama models', {
error: error instanceof Error ? error.message : 'Unknown error',
})
return []
}
}
export const useOllamaStore = create<OllamaStore>((set, get) => ({
models: [],
isLoading: false,
setModels: (models) => {
set({ models })
// Update the providers when models change
updateOllamaProviderModels(models)
},
// Fetch models from API (client-side only)
fetchModels: async () => {
if (typeof window === 'undefined') {
logger.info('Skipping client-side model fetch on server')
return
}
if (get().isLoading) {
logger.info('Model fetch already in progress')
return
}
logger.info('Fetching Ollama models from API')
set({ isLoading: true })
try {
const models = await fetchOllamaModels()
logger.info('Successfully fetched Ollama models', {
count: models.length,
models,
})
get().setModels(models)
} catch (error) {
logger.error('Failed to fetch Ollama models', {
error: error instanceof Error ? error.message : 'Unknown error',
})
} finally {
set({ isLoading: false })
}
},
}))
// Auto-fetch models when the store is first accessed on the client
if (typeof window !== 'undefined') {
// Delay to avoid hydration issues
setTimeout(() => {
useOllamaStore.getState().fetchModels()
}, 1000)
}

View File

@@ -1,4 +1,6 @@
export interface OllamaStore {
models: string[]
isLoading: boolean
setModels: (models: string[]) => void
fetchModels: () => Promise<void>
}

View File

@@ -1,11 +1,106 @@
name: sim-with-ollama
services:
local-llm-gpu:
profiles:
- local-gpu # This profile requires both 'local' and 'gpu'
# Main Sim Studio Application
simstudio:
build:
context: .
dockerfile: docker/app.Dockerfile
ports:
- '3000:3000'
deploy:
resources:
limits:
memory: 8G
environment:
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
- BETTER_AUTH_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
- NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
- BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
- ENCRYPTION_KEY=${ENCRYPTION_KEY:-$(openssl rand -hex 32)}
- OLLAMA_URL=http://ollama:11434
- NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002}
depends_on:
db:
condition: service_healthy
migrations:
condition: service_completed_successfully
realtime:
condition: service_healthy
ollama:
condition: service_healthy
healthcheck:
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3000']
interval: 90s
timeout: 5s
retries: 3
start_period: 10s
restart: unless-stopped
# Realtime Socket Server
realtime:
build:
context: .
dockerfile: docker/realtime.Dockerfile
environment:
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
- NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
- BETTER_AUTH_URL=${BETTER_AUTH_URL:-http://localhost:3000}
- BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
depends_on:
db:
condition: service_healthy
restart: unless-stopped
ports:
- '3002:3002'
deploy:
resources:
limits:
memory: 8G
healthcheck:
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health']
interval: 90s
timeout: 5s
retries: 3
start_period: 10s
# Database Migrations
migrations:
build:
context: .
dockerfile: docker/db.Dockerfile
environment:
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
depends_on:
db:
condition: service_healthy
command: ['bun', 'run', 'db:migrate']
restart: 'no'
# PostgreSQL Database with Vector Extension
db:
image: pgvector/pgvector:pg17
restart: always
ports:
- '5432:5432'
environment:
- POSTGRES_USER=${POSTGRES_USER:-postgres}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
- POSTGRES_DB=${POSTGRES_DB:-simstudio}
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U postgres']
interval: 5s
timeout: 5s
retries: 5
# Ollama with GPU support (default)
ollama:
image: ollama/ollama:latest
pull_policy: always
volumes:
- ${HOME}/.ollama:/root/.ollama
- ollama_data:/root/.ollama
ports:
- '11434:11434'
environment:
@@ -13,6 +108,7 @@ services:
- OLLAMA_LOAD_TIMEOUT=-1
- OLLAMA_KEEP_ALIVE=-1
- OLLAMA_DEBUG=1
- OLLAMA_HOST=0.0.0.0:11434
command: 'serve'
deploy:
resources:
@@ -26,23 +122,56 @@ services:
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
restart: unless-stopped
local-llm-cpu:
# Ollama CPU-only version (use with --profile cpu profile)
ollama-cpu:
profiles:
- local-cpu # This profile requires both 'local' and 'cpu'
- cpu
image: ollama/ollama:latest
pull_policy: always
volumes:
- ${HOME}/.ollama:/root/.ollama
- ollama_data:/root/.ollama
ports:
- '11434:11434'
environment:
- OLLAMA_LOAD_TIMEOUT=-1
- OLLAMA_KEEP_ALIVE=-1
- OLLAMA_DEBUG=1
- OLLAMA_HOST=0.0.0.0:11434
command: 'serve'
healthcheck:
test: ['CMD', 'curl', '-f', 'http://localhost:11434/']
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
restart: unless-stopped
# Helper container to pull models automatically
model-setup:
image: ollama/ollama:latest
profiles:
- setup
volumes:
- ollama_data:/root/.ollama
environment:
- OLLAMA_HOST=ollama:11434
depends_on:
ollama:
condition: service_healthy
command: >
sh -c "
echo 'Waiting for Ollama to be ready...' &&
sleep 10 &&
echo 'Pulling gemma3:4b model (recommended starter model)...' &&
ollama pull gemma3:4b &&
echo 'Model setup complete! You can now use gemma3:4b in Sim.' &&
echo 'To add more models, run: docker compose -f docker-compose.ollama.yml exec ollama ollama pull <model-name>'
"
restart: 'no'
volumes:
postgres_data:
ollama_data: