diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 4d0d8df90..469ce8aa0 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -164,10 +164,14 @@ Access the application at [http://localhost:3000/](http://localhost:3000/) To use local models with Sim: -1. Pull models using our helper script: +1. Install Ollama and pull models: ```bash -./apps/sim/scripts/ollama_docker.sh pull +# Install Ollama (if not already installed) +curl -fsSL https://ollama.ai/install.sh | sh + +# Pull a model (e.g., gemma3:4b) +ollama pull gemma3:4b ``` 2. Start Sim with local model support: diff --git a/README.md b/README.md index f9855815e..be3b0ec97 100644 --- a/README.md +++ b/README.md @@ -59,27 +59,21 @@ docker compose -f docker-compose.prod.yml up -d Access the application at [http://localhost:3000/](http://localhost:3000/) -#### Using Local Models +#### Using Local Models with Ollama -To use local models with Sim: - -1. Pull models using our helper script: +Run Sim with local AI models using [Ollama](https://ollama.ai) - no external APIs required: ```bash -./apps/sim/scripts/ollama_docker.sh pull +# Start with GPU support (automatically downloads gemma3:4b model) +docker compose -f docker-compose.ollama.yml --profile setup up -d + +# For CPU-only systems: +docker compose -f docker-compose.ollama.yml --profile cpu --profile setup up -d ``` -2. Start Sim with local model support: - +Wait for the model to download, then visit [http://localhost:3000](http://localhost:3000). Add more models with: ```bash -# With NVIDIA GPU support -docker compose --profile local-gpu -f docker-compose.ollama.yml up -d - -# Without GPU (CPU only) -docker compose --profile local-cpu -f docker-compose.ollama.yml up -d - -# If hosting on a server, update the environment variables in the docker-compose.prod.yml file to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434) -docker compose -f docker-compose.prod.yml up -d +docker compose -f docker-compose.ollama.yml exec ollama ollama pull llama3.1:8b ``` ### Option 3: Dev Containers diff --git a/apps/sim/.env.example b/apps/sim/.env.example index 5c126fb32..ee2c0f84d 100644 --- a/apps/sim/.env.example +++ b/apps/sim/.env.example @@ -15,3 +15,6 @@ ENCRYPTION_KEY=your_encryption_key # Use `openssl rand -hex 32` to generate # RESEND_API_KEY= # Uncomment and add your key from https://resend.com to send actual emails # If left commented out, emails will be logged to console instead +# Local AI Models (Optional) +# OLLAMA_URL=http://localhost:11434 # URL for local Ollama server - uncomment if using local models + diff --git a/apps/sim/app/api/providers/ollama/models/route.ts b/apps/sim/app/api/providers/ollama/models/route.ts new file mode 100644 index 000000000..7c184588b --- /dev/null +++ b/apps/sim/app/api/providers/ollama/models/route.ts @@ -0,0 +1,52 @@ +import { type NextRequest, NextResponse } from 'next/server' +import { env } from '@/lib/env' +import { createLogger } from '@/lib/logs/console/logger' +import type { ModelsObject } from '@/providers/ollama/types' + +const logger = createLogger('OllamaModelsAPI') +const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434' + +export const dynamic = 'force-dynamic' + +/** + * Get available Ollama models + */ +export async function GET(request: NextRequest) { + try { + logger.info('Fetching Ollama models', { + host: OLLAMA_HOST, + }) + + const response = await fetch(`${OLLAMA_HOST}/api/tags`, { + headers: { + 'Content-Type': 'application/json', + }, + }) + + if (!response.ok) { + logger.warn('Ollama service is not available', { + status: response.status, + statusText: response.statusText, + }) + return NextResponse.json({ models: [] }) + } + + const data = (await response.json()) as ModelsObject + const models = data.models.map((model) => model.name) + + logger.info('Successfully fetched Ollama models', { + count: models.length, + models, + }) + + return NextResponse.json({ models }) + } catch (error) { + logger.error('Failed to fetch Ollama models', { + error: error instanceof Error ? error.message : 'Unknown error', + host: OLLAMA_HOST, + }) + + // Return empty array instead of error to avoid breaking the UI + return NextResponse.json({ models: [] }) + } +} diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx index 0a9907e27..cb1100853 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/workflow-block.tsx @@ -405,33 +405,37 @@ export function WorkflowBlock({ id, data }: NodeProps) { // If there's no condition, the block should be shown if (!block.condition) return true + // If condition is a function, call it to get the actual condition object + const actualCondition = + typeof block.condition === 'function' ? block.condition() : block.condition + // Get the values of the fields this block depends on from the appropriate state - const fieldValue = stateToUse[block.condition.field]?.value - const andFieldValue = block.condition.and - ? stateToUse[block.condition.and.field]?.value + const fieldValue = stateToUse[actualCondition.field]?.value + const andFieldValue = actualCondition.and + ? stateToUse[actualCondition.and.field]?.value : undefined // Check if the condition value is an array - const isValueMatch = Array.isArray(block.condition.value) + const isValueMatch = Array.isArray(actualCondition.value) ? fieldValue != null && - (block.condition.not - ? !block.condition.value.includes(fieldValue as string | number | boolean) - : block.condition.value.includes(fieldValue as string | number | boolean)) - : block.condition.not - ? fieldValue !== block.condition.value - : fieldValue === block.condition.value + (actualCondition.not + ? !actualCondition.value.includes(fieldValue as string | number | boolean) + : actualCondition.value.includes(fieldValue as string | number | boolean)) + : actualCondition.not + ? fieldValue !== actualCondition.value + : fieldValue === actualCondition.value // Check both conditions if 'and' is present const isAndValueMatch = - !block.condition.and || - (Array.isArray(block.condition.and.value) + !actualCondition.and || + (Array.isArray(actualCondition.and.value) ? andFieldValue != null && - (block.condition.and.not - ? !block.condition.and.value.includes(andFieldValue as string | number | boolean) - : block.condition.and.value.includes(andFieldValue as string | number | boolean)) - : block.condition.and.not - ? andFieldValue !== block.condition.and.value - : andFieldValue === block.condition.and.value) + (actualCondition.and.not + ? !actualCondition.and.value.includes(andFieldValue as string | number | boolean) + : actualCondition.and.value.includes(andFieldValue as string | number | boolean)) + : actualCondition.and.not + ? andFieldValue !== actualCondition.and.value + : andFieldValue === actualCondition.and.value) return isValueMatch && isAndValueMatch }) diff --git a/apps/sim/blocks/blocks/agent.ts b/apps/sim/blocks/blocks/agent.ts index f1eb04a96..4e02bf40a 100644 --- a/apps/sim/blocks/blocks/agent.ts +++ b/apps/sim/blocks/blocks/agent.ts @@ -12,6 +12,12 @@ import { MODELS_WITH_TEMPERATURE_SUPPORT, providers, } from '@/providers/utils' + +// Get current Ollama models dynamically +const getCurrentOllamaModels = () => { + return useOllamaStore.getState().models +} + import { useOllamaStore } from '@/stores/ollama/store' import type { ToolResponse } from '@/tools/types' @@ -213,14 +219,18 @@ Create a system prompt appropriately detailed for the request, using clear langu password: true, connectionDroppable: false, required: true, - // Hide API key for all hosted models when running on hosted version + // Hide API key for hosted models and Ollama models condition: isHosted ? { field: 'model', value: getHostedModels(), not: true, // Show for all models EXCEPT those listed } - : undefined, // Show for all models in non-hosted environments + : () => ({ + field: 'model', + value: getCurrentOllamaModels(), + not: true, // Show for all models EXCEPT Ollama models + }), }, { id: 'azureEndpoint', diff --git a/apps/sim/blocks/types.ts b/apps/sim/blocks/types.ts index e1818e386..8cd139174 100644 --- a/apps/sim/blocks/types.ts +++ b/apps/sim/blocks/types.ts @@ -118,16 +118,27 @@ export interface SubBlockConfig { hidden?: boolean description?: string value?: (params: Record) => string - condition?: { - field: string - value: string | number | boolean | Array - not?: boolean - and?: { - field: string - value: string | number | boolean | Array | undefined - not?: boolean - } - } + condition?: + | { + field: string + value: string | number | boolean | Array + not?: boolean + and?: { + field: string + value: string | number | boolean | Array | undefined + not?: boolean + } + } + | (() => { + field: string + value: string | number | boolean | Array + not?: boolean + and?: { + field: string + value: string | number | boolean | Array | undefined + not?: boolean + } + }) // Props specific to 'code' sub-block type language?: 'javascript' | 'json' generationType?: GenerationType diff --git a/apps/sim/executor/resolver/resolver.ts b/apps/sim/executor/resolver/resolver.ts index 51101c310..13fbdd121 100644 --- a/apps/sim/executor/resolver/resolver.ts +++ b/apps/sim/executor/resolver/resolver.ts @@ -58,7 +58,7 @@ export class InputResolver { /** * Evaluates if a sub-block should be active based on its condition - * @param condition - The condition to evaluate + * @param condition - The condition to evaluate (can be static object or function) * @param currentValues - Current values of all inputs * @returns True if the sub-block should be active */ @@ -70,37 +70,46 @@ export class InputResolver { not?: boolean and?: { field: string; value: any; not?: boolean } } + | (() => { + field: string + value: any + not?: boolean + and?: { field: string; value: any; not?: boolean } + }) | undefined, currentValues: Record ): boolean { if (!condition) return true + // If condition is a function, call it to get the actual condition object + const actualCondition = typeof condition === 'function' ? condition() : condition + // Get the field value - const fieldValue = currentValues[condition.field] + const fieldValue = currentValues[actualCondition.field] // Check if the condition value is an array - const isValueMatch = Array.isArray(condition.value) + const isValueMatch = Array.isArray(actualCondition.value) ? fieldValue != null && - (condition.not - ? !condition.value.includes(fieldValue) - : condition.value.includes(fieldValue)) - : condition.not - ? fieldValue !== condition.value - : fieldValue === condition.value + (actualCondition.not + ? !actualCondition.value.includes(fieldValue) + : actualCondition.value.includes(fieldValue)) + : actualCondition.not + ? fieldValue !== actualCondition.value + : fieldValue === actualCondition.value // Check both conditions if 'and' is present const isAndValueMatch = - !condition.and || + !actualCondition.and || (() => { - const andFieldValue = currentValues[condition.and!.field] - return Array.isArray(condition.and!.value) + const andFieldValue = currentValues[actualCondition.and!.field] + return Array.isArray(actualCondition.and!.value) ? andFieldValue != null && - (condition.and!.not - ? !condition.and!.value.includes(andFieldValue) - : condition.and!.value.includes(andFieldValue)) - : condition.and!.not - ? andFieldValue !== condition.and!.value - : andFieldValue === condition.and!.value + (actualCondition.and!.not + ? !actualCondition.and!.value.includes(andFieldValue) + : actualCondition.and!.value.includes(andFieldValue)) + : actualCondition.and!.not + ? andFieldValue !== actualCondition.and!.value + : andFieldValue === actualCondition.and!.value })() return isValueMatch && isAndValueMatch diff --git a/apps/sim/providers/ollama/index.ts b/apps/sim/providers/ollama/index.ts index 7dc2bbb40..3bf99217b 100644 --- a/apps/sim/providers/ollama/index.ts +++ b/apps/sim/providers/ollama/index.ts @@ -1,6 +1,7 @@ import OpenAI from 'openai' import { env } from '@/lib/env' import { createLogger } from '@/lib/logs/console/logger' +import type { StreamingExecution } from '@/executor/types' import type { ModelsObject } from '@/providers/ollama/types' import type { ProviderConfig, @@ -8,12 +9,57 @@ import type { ProviderResponse, TimeSegment, } from '@/providers/types' +import { + prepareToolExecution, + prepareToolsWithUsageControl, + trackForcedToolUsage, +} from '@/providers/utils' import { useOllamaStore } from '@/stores/ollama/store' import { executeTool } from '@/tools' const logger = createLogger('OllamaProvider') const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434' +/** + * Helper function to convert an Ollama stream to a standard ReadableStream + * and collect completion metrics + */ +function createReadableStreamFromOllamaStream( + ollamaStream: any, + onComplete?: (content: string, usage?: any) => void +): ReadableStream { + let fullContent = '' + let usageData: any = null + + return new ReadableStream({ + async start(controller) { + try { + for await (const chunk of ollamaStream) { + // Check for usage data in the final chunk + if (chunk.usage) { + usageData = chunk.usage + } + + const content = chunk.choices[0]?.delta?.content || '' + if (content) { + fullContent += content + controller.enqueue(new TextEncoder().encode(content)) + } + } + + // Once stream is complete, call the completion callback with the final content and usage + if (onComplete) { + onComplete(fullContent, usageData) + } + + controller.close() + } catch (error) { + controller.error(error) + } + }, + }) +} + export const ollamaProvider: ProviderConfig = { id: 'ollama', name: 'Ollama', @@ -46,91 +92,238 @@ export const ollamaProvider: ProviderConfig = { } }, - executeRequest: async (request: ProviderRequest): Promise => { + executeRequest: async ( + request: ProviderRequest + ): Promise => { logger.info('Preparing Ollama request', { model: request.model, hasSystemPrompt: !!request.systemPrompt, - hasMessages: !!request.context, + hasMessages: !!request.messages?.length, hasTools: !!request.tools?.length, toolCount: request.tools?.length || 0, hasResponseFormat: !!request.responseFormat, + stream: !!request.stream, }) - const startTime = Date.now() + // Create Ollama client using OpenAI-compatible API + const ollama = new OpenAI({ + apiKey: 'empty', + baseURL: `${OLLAMA_HOST}/v1`, + }) + + // Start with an empty array for all messages + const allMessages = [] + + // Add system prompt if present + if (request.systemPrompt) { + allMessages.push({ + role: 'system', + content: request.systemPrompt, + }) + } + + // Add context if present + if (request.context) { + allMessages.push({ + role: 'user', + content: request.context, + }) + } + + // Add remaining messages + if (request.messages) { + allMessages.push(...request.messages) + } + + // Transform tools to OpenAI format if provided + const tools = request.tools?.length + ? request.tools.map((tool) => ({ + type: 'function', + function: { + name: tool.id, + description: tool.description, + parameters: tool.parameters, + }, + })) + : undefined + + // Build the request payload + const payload: any = { + model: request.model, + messages: allMessages, + } + + // Add optional parameters + if (request.temperature !== undefined) payload.temperature = request.temperature + if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens + + // Add response format for structured output if specified + if (request.responseFormat) { + // Use OpenAI's JSON schema format (Ollama supports this) + payload.response_format = { + type: 'json_schema', + json_schema: { + name: request.responseFormat.name || 'response_schema', + schema: request.responseFormat.schema || request.responseFormat, + strict: request.responseFormat.strict !== false, + }, + } + + logger.info('Added JSON schema response format to Ollama request') + } + + // Handle tools and tool usage control + let preparedTools: ReturnType | null = null + + if (tools?.length) { + preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'ollama') + const { tools: filteredTools, toolChoice } = preparedTools + + if (filteredTools?.length && toolChoice) { + payload.tools = filteredTools + // Ollama supports 'auto' but not forced tool selection - convert 'force' to 'auto' + payload.tool_choice = typeof toolChoice === 'string' ? toolChoice : 'auto' + + logger.info('Ollama request configuration:', { + toolCount: filteredTools.length, + toolChoice: payload.tool_choice, + model: request.model, + }) + } + } + + // Start execution timer for the entire provider execution + const providerStartTime = Date.now() + const providerStartTimeISO = new Date(providerStartTime).toISOString() try { - // Prepare messages array - const ollama = new OpenAI({ - apiKey: 'empty', - baseURL: `${OLLAMA_HOST}/v1`, - }) + // Check if we can stream directly (no tools required) + if (request.stream && (!tools || tools.length === 0)) { + logger.info('Using streaming response for Ollama request') - // Start with an empty array for all messages - const allMessages = [] - - // Add system prompt if present - if (request.systemPrompt) { - allMessages.push({ role: 'system', content: request.systemPrompt }) - } - - // Add context if present - if (request.context) { - allMessages.push({ role: 'user', content: request.context }) - } - - // Add remaining messages - if (request.messages) { - allMessages.push(...request.messages) - } - - // Build the basic payload - const payload: any = { - model: request.model, - messages: allMessages, - stream: false, - } - - // Add optional parameters - if (request.temperature !== undefined) payload.temperature = request.temperature - if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens - - // Transform tools to OpenAI format if provided - const tools = request.tools?.length - ? request.tools.map((tool) => ({ - type: 'function', - function: { - name: tool.id, - description: tool.description, - parameters: tool.parameters, - }, - })) - : undefined - - // Handle tools and tool usage control - if (tools?.length) { - // Filter out any tools with usageControl='none', but ignore 'force' since Ollama doesn't support it - const filteredTools = tools.filter((tool) => { - const toolId = tool.function?.name - const toolConfig = request.tools?.find((t) => t.id === toolId) - // Only filter out 'none', treat 'force' as 'auto' - return toolConfig?.usageControl !== 'none' + // Create a streaming request with token usage tracking + const streamResponse = await ollama.chat.completions.create({ + ...payload, + stream: true, + stream_options: { include_usage: true }, }) - if (filteredTools?.length) { - payload.tools = filteredTools - // Always use 'auto' for Ollama, regardless of the tool_choice setting - payload.tool_choice = 'auto' + // Start collecting token usage from the stream + const tokenUsage = { + prompt: 0, + completion: 0, + total: 0, + } - logger.info('Ollama request configuration:', { - toolCount: filteredTools.length, - toolChoice: 'auto', // Ollama always uses auto - model: request.model, - }) + // Create a StreamingExecution response with a callback to update content and tokens + const streamingResult = { + stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => { + // Update the execution data with the final content and token usage + streamingResult.execution.output.content = content + + // Clean up the response content + if (content) { + streamingResult.execution.output.content = content + .replace(/```json\n?|\n?```/g, '') + .trim() + } + + // Update the timing information with the actual completion time + const streamEndTime = Date.now() + const streamEndTimeISO = new Date(streamEndTime).toISOString() + + if (streamingResult.execution.output.providerTiming) { + streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO + streamingResult.execution.output.providerTiming.duration = + streamEndTime - providerStartTime + + // Update the time segment as well + if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) { + streamingResult.execution.output.providerTiming.timeSegments[0].endTime = + streamEndTime + streamingResult.execution.output.providerTiming.timeSegments[0].duration = + streamEndTime - providerStartTime + } + } + + // Update token usage if available from the stream + if (usage) { + const newTokens = { + prompt: usage.prompt_tokens || tokenUsage.prompt, + completion: usage.completion_tokens || tokenUsage.completion, + total: usage.total_tokens || tokenUsage.total, + } + + streamingResult.execution.output.tokens = newTokens + } + }), + execution: { + success: true, + output: { + content: '', // Will be filled by the stream completion callback + model: request.model, + tokens: tokenUsage, + toolCalls: undefined, + providerTiming: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + timeSegments: [ + { + type: 'model', + name: 'Streaming response', + startTime: providerStartTime, + endTime: Date.now(), + duration: Date.now() - providerStartTime, + }, + ], + }, + }, + logs: [], // No block logs for direct streaming + metadata: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + }, + }, + } as StreamingExecution + + // Return the streaming execution object + return streamingResult as StreamingExecution + } + + // Make the initial API request + const initialCallTime = Date.now() + + // Track the original tool_choice for forced tool tracking + const originalToolChoice = payload.tool_choice + + // Track forced tools and their usage + const forcedTools = preparedTools?.forcedTools || [] + let usedForcedTools: string[] = [] + + // Helper function to check for forced tool usage in responses + const checkForForcedToolUsage = ( + response: any, + toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any } + ) => { + if (typeof toolChoice === 'object' && response.choices[0]?.message?.tool_calls) { + const toolCallsResponse = response.choices[0].message.tool_calls + const result = trackForcedToolUsage( + toolCallsResponse, + toolChoice, + logger, + 'ollama', + forcedTools, + usedForcedTools + ) + hasUsedForcedTool = result.hasUsedForcedTool + usedForcedTools = result.usedForcedTools } } let currentResponse = await ollama.chat.completions.create(payload) - const firstResponseTime = Date.now() - startTime + const firstResponseTime = Date.now() - initialCallTime let content = currentResponse.choices[0]?.message?.content || '' @@ -140,6 +333,7 @@ export const ollamaProvider: ProviderConfig = { content = content.trim() } + // Collect token information const tokens = { prompt: currentResponse.usage?.prompt_tokens || 0, completion: currentResponse.usage?.completion_tokens || 0, @@ -155,201 +349,307 @@ export const ollamaProvider: ProviderConfig = { let modelTime = firstResponseTime let toolsTime = 0 + // Track if a forced tool has been used + let hasUsedForcedTool = false + // Track each model and tool call segment with timestamps const timeSegments: TimeSegment[] = [ { type: 'model', name: 'Initial response', - startTime: startTime, - endTime: startTime + firstResponseTime, + startTime: initialCallTime, + endTime: initialCallTime + firstResponseTime, duration: firstResponseTime, }, ] - try { - while (iterationCount < MAX_ITERATIONS) { - // Check for tool calls - const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls - if (!toolCallsInResponse || toolCallsInResponse.length === 0) { - break - } + // Check if a forced tool was used in the first response + checkForForcedToolUsage(currentResponse, originalToolChoice) - // Track time for tool calls in this batch - const toolsStartTime = Date.now() - - // Process each tool call - for (const toolCall of toolCallsInResponse) { - try { - const toolName = toolCall.function.name - const toolArgs = JSON.parse(toolCall.function.arguments) - - // Get the tool from the tools registry - const tool = request.tools?.find((t) => t.id === toolName) - if (!tool) continue - - // Execute the tool - const toolCallStartTime = Date.now() - - // Only merge actual tool parameters for logging - const toolParams = { - ...tool.params, - ...toolArgs, - } - - // Add system parameters for execution - const executionParams = { - ...toolParams, - ...(request.workflowId - ? { - _context: { - workflowId: request.workflowId, - ...(request.chatId ? { chatId: request.chatId } : {}), - }, - } - : {}), - ...(request.environmentVariables ? { envVars: request.environmentVariables } : {}), - } - - const result = await executeTool(toolName, executionParams, true) - const toolCallEndTime = Date.now() - const toolCallDuration = toolCallEndTime - toolCallStartTime - - // Add to time segments for both success and failure - timeSegments.push({ - type: 'tool', - name: toolName, - startTime: toolCallStartTime, - endTime: toolCallEndTime, - duration: toolCallDuration, - }) - - // Prepare result content for the LLM - let resultContent: any - if (result.success) { - toolResults.push(result.output) - resultContent = result.output - } else { - // Include error information so LLM can respond appropriately - resultContent = { - error: true, - message: result.error || 'Tool execution failed', - tool: toolName, - } - } - - toolCalls.push({ - name: toolName, - arguments: toolParams, - startTime: new Date(toolCallStartTime).toISOString(), - endTime: new Date(toolCallEndTime).toISOString(), - duration: toolCallDuration, - result: resultContent, - success: result.success, - }) - - // Add the tool call and result to messages (both success and failure) - currentMessages.push({ - role: 'assistant', - content: null, - tool_calls: [ - { - id: toolCall.id, - type: 'function', - function: { - name: toolName, - arguments: toolCall.function.arguments, - }, - }, - ], - }) - - currentMessages.push({ - role: 'tool', - tool_call_id: toolCall.id, - content: JSON.stringify(resultContent), - }) - } catch (error) { - logger.error('Error processing tool call:', { error }) - } - } - - // Calculate tool call time for this iteration - const thisToolsTime = Date.now() - toolsStartTime - toolsTime += thisToolsTime - - // Make the next request with updated messages - const nextPayload = { - ...payload, - messages: currentMessages, - } - - // Time the next model call - const nextModelStartTime = Date.now() - - // Make the next request - currentResponse = await ollama.chat.completions.create(nextPayload) - - const nextModelEndTime = Date.now() - const thisModelTime = nextModelEndTime - nextModelStartTime - - // Add to time segments - timeSegments.push({ - type: 'model', - name: `Model response (iteration ${iterationCount + 1})`, - startTime: nextModelStartTime, - endTime: nextModelEndTime, - duration: thisModelTime, - }) - - // Add to model time - modelTime += thisModelTime - - // Update content if we have a text response - if (currentResponse.choices[0]?.message?.content) { - content = currentResponse.choices[0].message.content - // Clean up the response content - content = content.replace(/```json\n?|\n?```/g, '') - content = content.trim() - } - - // Update token counts - if (currentResponse.usage) { - tokens.prompt += currentResponse.usage.prompt_tokens || 0 - tokens.completion += currentResponse.usage.completion_tokens || 0 - tokens.total += currentResponse.usage.total_tokens || 0 - } - - iterationCount++ + while (iterationCount < MAX_ITERATIONS) { + // Check for tool calls + const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls + if (!toolCallsInResponse || toolCallsInResponse.length === 0) { + break } - } catch (error) { - logger.error('Error in Ollama request:', { error }) + + logger.info( + `Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})` + ) + + // Track time for tool calls in this batch + const toolsStartTime = Date.now() + + // Process each tool call + for (const toolCall of toolCallsInResponse) { + try { + const toolName = toolCall.function.name + const toolArgs = JSON.parse(toolCall.function.arguments) + + // Get the tool from the tools registry + const tool = request.tools?.find((t) => t.id === toolName) + if (!tool) continue + + // Execute the tool + const toolCallStartTime = Date.now() + + const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request) + const result = await executeTool(toolName, executionParams, true) + const toolCallEndTime = Date.now() + const toolCallDuration = toolCallEndTime - toolCallStartTime + + // Add to time segments for both success and failure + timeSegments.push({ + type: 'tool', + name: toolName, + startTime: toolCallStartTime, + endTime: toolCallEndTime, + duration: toolCallDuration, + }) + + // Prepare result content for the LLM + let resultContent: any + if (result.success) { + toolResults.push(result.output) + resultContent = result.output + } else { + // Include error information so LLM can respond appropriately + resultContent = { + error: true, + message: result.error || 'Tool execution failed', + tool: toolName, + } + } + + toolCalls.push({ + name: toolName, + arguments: toolParams, + startTime: new Date(toolCallStartTime).toISOString(), + endTime: new Date(toolCallEndTime).toISOString(), + duration: toolCallDuration, + result: resultContent, + success: result.success, + }) + + // Add the tool call and result to messages (both success and failure) + currentMessages.push({ + role: 'assistant', + content: null, + tool_calls: [ + { + id: toolCall.id, + type: 'function', + function: { + name: toolName, + arguments: toolCall.function.arguments, + }, + }, + ], + }) + + currentMessages.push({ + role: 'tool', + tool_call_id: toolCall.id, + content: JSON.stringify(resultContent), + }) + } catch (error) { + logger.error('Error processing tool call:', { + error, + toolName: toolCall?.function?.name, + }) + } + } + + // Calculate tool call time for this iteration + const thisToolsTime = Date.now() - toolsStartTime + toolsTime += thisToolsTime + + // Make the next request with updated messages + const nextPayload = { + ...payload, + messages: currentMessages, + } + + // Update tool_choice based on which forced tools have been used + if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) { + // If we have remaining forced tools, get the next one to force + const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool)) + + if (remainingTools.length > 0) { + // Ollama doesn't support forced tool selection, so we keep using 'auto' + nextPayload.tool_choice = 'auto' + logger.info(`Ollama doesn't support forced tools, using auto for: ${remainingTools[0]}`) + } else { + // All forced tools have been used, continue with auto + nextPayload.tool_choice = 'auto' + logger.info('All forced tools have been used, continuing with auto tool_choice') + } + } + + // Time the next model call + const nextModelStartTime = Date.now() + + // Make the next request + currentResponse = await ollama.chat.completions.create(nextPayload) + + // Check if any forced tools were used in this response + checkForForcedToolUsage(currentResponse, nextPayload.tool_choice) + + const nextModelEndTime = Date.now() + const thisModelTime = nextModelEndTime - nextModelStartTime + + // Add to time segments + timeSegments.push({ + type: 'model', + name: `Model response (iteration ${iterationCount + 1})`, + startTime: nextModelStartTime, + endTime: nextModelEndTime, + duration: thisModelTime, + }) + + // Add to model time + modelTime += thisModelTime + + // Update content if we have a text response + if (currentResponse.choices[0]?.message?.content) { + content = currentResponse.choices[0].message.content + // Clean up the response content + content = content.replace(/```json\n?|\n?```/g, '') + content = content.trim() + } + + // Update token counts + if (currentResponse.usage) { + tokens.prompt += currentResponse.usage.prompt_tokens || 0 + tokens.completion += currentResponse.usage.completion_tokens || 0 + tokens.total += currentResponse.usage.total_tokens || 0 + } + + iterationCount++ } - const endTime = Date.now() + // After all tool processing complete, if streaming was requested and we have messages, use streaming for the final response + if (request.stream && iterationCount > 0) { + logger.info('Using streaming for final response after tool calls') + + const streamingPayload = { + ...payload, + messages: currentMessages, + tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls + stream: true, + stream_options: { include_usage: true }, + } + + const streamResponse = await ollama.chat.completions.create(streamingPayload) + + // Create the StreamingExecution object with all collected data + const streamingResult = { + stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => { + // Update the execution data with the final content and token usage + streamingResult.execution.output.content = content + + // Clean up the response content + if (content) { + streamingResult.execution.output.content = content + .replace(/```json\n?|\n?```/g, '') + .trim() + } + + // Update token usage if available from the stream + if (usage) { + const newTokens = { + prompt: usage.prompt_tokens || tokens.prompt, + completion: usage.completion_tokens || tokens.completion, + total: usage.total_tokens || tokens.total, + } + + streamingResult.execution.output.tokens = newTokens + } + }), + execution: { + success: true, + output: { + content: '', // Will be filled by the callback + model: request.model, + tokens: { + prompt: tokens.prompt, + completion: tokens.completion, + total: tokens.total, + }, + toolCalls: + toolCalls.length > 0 + ? { + list: toolCalls, + count: toolCalls.length, + } + : undefined, + providerTiming: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + modelTime: modelTime, + toolsTime: toolsTime, + firstResponseTime: firstResponseTime, + iterations: iterationCount + 1, + timeSegments: timeSegments, + }, + }, + logs: [], // No block logs at provider level + metadata: { + startTime: providerStartTimeISO, + endTime: new Date().toISOString(), + duration: Date.now() - providerStartTime, + }, + }, + } as StreamingExecution + + // Return the streaming execution object + return streamingResult as StreamingExecution + } + + // Calculate overall timing + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime return { - content: content, + content, model: request.model, tokens, toolCalls: toolCalls.length > 0 ? toolCalls : undefined, toolResults: toolResults.length > 0 ? toolResults : undefined, timing: { - startTime: new Date(startTime).toISOString(), - endTime: new Date(endTime).toISOString(), - duration: endTime - startTime, + startTime: providerStartTimeISO, + endTime: providerEndTimeISO, + duration: totalDuration, modelTime: modelTime, toolsTime: toolsTime, firstResponseTime: firstResponseTime, iterations: iterationCount + 1, - timeSegments, + timeSegments: timeSegments, }, } } catch (error) { - logger.error('Error in Ollama request', { - error: error instanceof Error ? error.message : 'Unknown error', - model: request.model, + // Include timing information even for errors + const providerEndTime = Date.now() + const providerEndTimeISO = new Date(providerEndTime).toISOString() + const totalDuration = providerEndTime - providerStartTime + + logger.error('Error in Ollama request:', { + error, + duration: totalDuration, }) - throw error + + // Create a new error with timing information + const enhancedError = new Error(error instanceof Error ? error.message : String(error)) + // @ts-ignore - Adding timing property to the error + enhancedError.timing = { + startTime: providerStartTimeISO, + endTime: providerEndTimeISO, + duration: totalDuration, + } + + throw enhancedError } }, } diff --git a/apps/sim/providers/utils.ts b/apps/sim/providers/utils.ts index 6e7f759c9..6ab2650f0 100644 --- a/apps/sim/providers/utils.ts +++ b/apps/sim/providers/utils.ts @@ -27,6 +27,7 @@ import { openaiProvider } from '@/providers/openai' import type { ProviderConfig, ProviderId, ProviderToolConfig } from '@/providers/types' import { xAIProvider } from '@/providers/xai' import { useCustomToolsStore } from '@/stores/custom-tools/store' +import { useOllamaStore } from '@/stores/ollama/store' const logger = createLogger('ProviderUtils') @@ -548,6 +549,12 @@ export function getApiKey(provider: string, model: string, userProvidedKey?: str // If user provided a key, use it as a fallback const hasUserKey = !!userProvidedKey + // Ollama models don't require API keys - they run locally + const isOllamaModel = provider === 'ollama' || useOllamaStore.getState().models.includes(model) + if (isOllamaModel) { + return 'empty' // Ollama uses 'empty' as a placeholder API key + } + // Use server key rotation for all OpenAI models and Anthropic's Claude models on the hosted platform const isOpenAIModel = provider === 'openai' const isClaudeModel = provider === 'anthropic' diff --git a/apps/sim/scripts/ollama_docker.sh b/apps/sim/scripts/ollama_docker.sh deleted file mode 100755 index d8c993085..000000000 --- a/apps/sim/scripts/ollama_docker.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e - -# Check that at least one argument is provided. If not, display the usage help. -if [ "$#" -eq 0 ]; then - echo "Usage: $(basename "$0") [args...]" - echo "Example: $(basename "$0") ps # This will run 'ollama ps' inside the container" - exit 1 -fi - -# Start a detached container from the ollama/ollama image, -# mounting the host's ~/.ollama directory directly into the container. -# Here we mount it to /root/.ollama, assuming that's where the image expects it. -CONTAINER_ID=$(docker run -d -v ~/.ollama:/root/.ollama -p 11434:11434 ollama/ollama -) - -# Define a cleanup function to stop the container regardless of how the script exits. -cleanup() { - docker stop "$CONTAINER_ID" >/dev/null -} -trap cleanup EXIT - -# Execute the command provided by the user within the running container. -# The command runs as: "ollama " -docker exec -it "$CONTAINER_ID" ollama "$@" diff --git a/apps/sim/stores/ollama/store.ts b/apps/sim/stores/ollama/store.ts index 672a91212..4d52d1516 100644 --- a/apps/sim/stores/ollama/store.ts +++ b/apps/sim/stores/ollama/store.ts @@ -5,11 +5,72 @@ import type { OllamaStore } from '@/stores/ollama/types' const logger = createLogger('OllamaStore') -export const useOllamaStore = create((set) => ({ +// Fetch models from the server API when on client side +const fetchOllamaModels = async (): Promise => { + try { + const response = await fetch('/api/providers/ollama/models') + if (!response.ok) { + logger.warn('Failed to fetch Ollama models from API', { + status: response.status, + statusText: response.statusText, + }) + return [] + } + const data = await response.json() + return data.models || [] + } catch (error) { + logger.error('Error fetching Ollama models', { + error: error instanceof Error ? error.message : 'Unknown error', + }) + return [] + } +} + +export const useOllamaStore = create((set, get) => ({ models: [], + isLoading: false, setModels: (models) => { set({ models }) // Update the providers when models change updateOllamaProviderModels(models) }, + + // Fetch models from API (client-side only) + fetchModels: async () => { + if (typeof window === 'undefined') { + logger.info('Skipping client-side model fetch on server') + return + } + + if (get().isLoading) { + logger.info('Model fetch already in progress') + return + } + + logger.info('Fetching Ollama models from API') + set({ isLoading: true }) + + try { + const models = await fetchOllamaModels() + logger.info('Successfully fetched Ollama models', { + count: models.length, + models, + }) + get().setModels(models) + } catch (error) { + logger.error('Failed to fetch Ollama models', { + error: error instanceof Error ? error.message : 'Unknown error', + }) + } finally { + set({ isLoading: false }) + } + }, })) + +// Auto-fetch models when the store is first accessed on the client +if (typeof window !== 'undefined') { + // Delay to avoid hydration issues + setTimeout(() => { + useOllamaStore.getState().fetchModels() + }, 1000) +} diff --git a/apps/sim/stores/ollama/types.ts b/apps/sim/stores/ollama/types.ts index 7c89f4ff9..77b0fa26c 100644 --- a/apps/sim/stores/ollama/types.ts +++ b/apps/sim/stores/ollama/types.ts @@ -1,4 +1,6 @@ export interface OllamaStore { models: string[] + isLoading: boolean setModels: (models: string[]) => void + fetchModels: () => Promise } diff --git a/docker-compose.ollama.yml b/docker-compose.ollama.yml index ca2244789..e5b75cac0 100644 --- a/docker-compose.ollama.yml +++ b/docker-compose.ollama.yml @@ -1,11 +1,106 @@ +name: sim-with-ollama + services: - local-llm-gpu: - profiles: - - local-gpu # This profile requires both 'local' and 'gpu' + # Main Sim Studio Application + simstudio: + build: + context: . + dockerfile: docker/app.Dockerfile + ports: + - '3000:3000' + deploy: + resources: + limits: + memory: 8G + environment: + - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} + - BETTER_AUTH_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000} + - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000} + - BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)} + - ENCRYPTION_KEY=${ENCRYPTION_KEY:-$(openssl rand -hex 32)} + - OLLAMA_URL=http://ollama:11434 + - NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002} + depends_on: + db: + condition: service_healthy + migrations: + condition: service_completed_successfully + realtime: + condition: service_healthy + ollama: + condition: service_healthy + healthcheck: + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3000'] + interval: 90s + timeout: 5s + retries: 3 + start_period: 10s + restart: unless-stopped + + # Realtime Socket Server + realtime: + build: + context: . + dockerfile: docker/realtime.Dockerfile + environment: + - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} + - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000} + - BETTER_AUTH_URL=${BETTER_AUTH_URL:-http://localhost:3000} + - BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)} + depends_on: + db: + condition: service_healthy + restart: unless-stopped + ports: + - '3002:3002' + deploy: + resources: + limits: + memory: 8G + healthcheck: + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health'] + interval: 90s + timeout: 5s + retries: 3 + start_period: 10s + + # Database Migrations + migrations: + build: + context: . + dockerfile: docker/db.Dockerfile + environment: + - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} + depends_on: + db: + condition: service_healthy + command: ['bun', 'run', 'db:migrate'] + restart: 'no' + + # PostgreSQL Database with Vector Extension + db: + image: pgvector/pgvector:pg17 + restart: always + ports: + - '5432:5432' + environment: + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} + - POSTGRES_DB=${POSTGRES_DB:-simstudio} + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ['CMD-SHELL', 'pg_isready -U postgres'] + interval: 5s + timeout: 5s + retries: 5 + + # Ollama with GPU support (default) + ollama: image: ollama/ollama:latest pull_policy: always volumes: - - ${HOME}/.ollama:/root/.ollama + - ollama_data:/root/.ollama ports: - '11434:11434' environment: @@ -13,6 +108,7 @@ services: - OLLAMA_LOAD_TIMEOUT=-1 - OLLAMA_KEEP_ALIVE=-1 - OLLAMA_DEBUG=1 + - OLLAMA_HOST=0.0.0.0:11434 command: 'serve' deploy: resources: @@ -26,23 +122,56 @@ services: interval: 10s timeout: 5s retries: 5 + start_period: 30s + restart: unless-stopped - local-llm-cpu: + # Ollama CPU-only version (use with --profile cpu profile) + ollama-cpu: profiles: - - local-cpu # This profile requires both 'local' and 'cpu' + - cpu image: ollama/ollama:latest pull_policy: always volumes: - - ${HOME}/.ollama:/root/.ollama + - ollama_data:/root/.ollama ports: - '11434:11434' environment: - OLLAMA_LOAD_TIMEOUT=-1 - OLLAMA_KEEP_ALIVE=-1 - OLLAMA_DEBUG=1 + - OLLAMA_HOST=0.0.0.0:11434 command: 'serve' healthcheck: test: ['CMD', 'curl', '-f', 'http://localhost:11434/'] interval: 10s timeout: 5s retries: 5 + start_period: 30s + restart: unless-stopped + + # Helper container to pull models automatically + model-setup: + image: ollama/ollama:latest + profiles: + - setup + volumes: + - ollama_data:/root/.ollama + environment: + - OLLAMA_HOST=ollama:11434 + depends_on: + ollama: + condition: service_healthy + command: > + sh -c " + echo 'Waiting for Ollama to be ready...' && + sleep 10 && + echo 'Pulling gemma3:4b model (recommended starter model)...' && + ollama pull gemma3:4b && + echo 'Model setup complete! You can now use gemma3:4b in Sim.' && + echo 'To add more models, run: docker compose -f docker-compose.ollama.yml exec ollama ollama pull ' + " + restart: 'no' + +volumes: + postgres_data: + ollama_data: