mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-07 22:24:06 -05:00
feat(ollama): added streaming & tool call support for ollama, updated docs (#884)
This commit is contained in:
8
.github/CONTRIBUTING.md
vendored
8
.github/CONTRIBUTING.md
vendored
@@ -164,10 +164,14 @@ Access the application at [http://localhost:3000/](http://localhost:3000/)
|
||||
|
||||
To use local models with Sim:
|
||||
|
||||
1. Pull models using our helper script:
|
||||
1. Install Ollama and pull models:
|
||||
|
||||
```bash
|
||||
./apps/sim/scripts/ollama_docker.sh pull <model_name>
|
||||
# Install Ollama (if not already installed)
|
||||
curl -fsSL https://ollama.ai/install.sh | sh
|
||||
|
||||
# Pull a model (e.g., gemma3:4b)
|
||||
ollama pull gemma3:4b
|
||||
```
|
||||
|
||||
2. Start Sim with local model support:
|
||||
|
||||
24
README.md
24
README.md
@@ -59,27 +59,21 @@ docker compose -f docker-compose.prod.yml up -d
|
||||
|
||||
Access the application at [http://localhost:3000/](http://localhost:3000/)
|
||||
|
||||
#### Using Local Models
|
||||
#### Using Local Models with Ollama
|
||||
|
||||
To use local models with Sim:
|
||||
|
||||
1. Pull models using our helper script:
|
||||
Run Sim with local AI models using [Ollama](https://ollama.ai) - no external APIs required:
|
||||
|
||||
```bash
|
||||
./apps/sim/scripts/ollama_docker.sh pull <model_name>
|
||||
# Start with GPU support (automatically downloads gemma3:4b model)
|
||||
docker compose -f docker-compose.ollama.yml --profile setup up -d
|
||||
|
||||
# For CPU-only systems:
|
||||
docker compose -f docker-compose.ollama.yml --profile cpu --profile setup up -d
|
||||
```
|
||||
|
||||
2. Start Sim with local model support:
|
||||
|
||||
Wait for the model to download, then visit [http://localhost:3000](http://localhost:3000). Add more models with:
|
||||
```bash
|
||||
# With NVIDIA GPU support
|
||||
docker compose --profile local-gpu -f docker-compose.ollama.yml up -d
|
||||
|
||||
# Without GPU (CPU only)
|
||||
docker compose --profile local-cpu -f docker-compose.ollama.yml up -d
|
||||
|
||||
# If hosting on a server, update the environment variables in the docker-compose.prod.yml file to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434)
|
||||
docker compose -f docker-compose.prod.yml up -d
|
||||
docker compose -f docker-compose.ollama.yml exec ollama ollama pull llama3.1:8b
|
||||
```
|
||||
|
||||
### Option 3: Dev Containers
|
||||
|
||||
@@ -15,3 +15,6 @@ ENCRYPTION_KEY=your_encryption_key # Use `openssl rand -hex 32` to generate
|
||||
# RESEND_API_KEY= # Uncomment and add your key from https://resend.com to send actual emails
|
||||
# If left commented out, emails will be logged to console instead
|
||||
|
||||
# Local AI Models (Optional)
|
||||
# OLLAMA_URL=http://localhost:11434 # URL for local Ollama server - uncomment if using local models
|
||||
|
||||
|
||||
52
apps/sim/app/api/providers/ollama/models/route.ts
Normal file
52
apps/sim/app/api/providers/ollama/models/route.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { env } from '@/lib/env'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import type { ModelsObject } from '@/providers/ollama/types'
|
||||
|
||||
const logger = createLogger('OllamaModelsAPI')
|
||||
const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
|
||||
|
||||
export const dynamic = 'force-dynamic'
|
||||
|
||||
/**
|
||||
* Get available Ollama models
|
||||
*/
|
||||
export async function GET(request: NextRequest) {
|
||||
try {
|
||||
logger.info('Fetching Ollama models', {
|
||||
host: OLLAMA_HOST,
|
||||
})
|
||||
|
||||
const response = await fetch(`${OLLAMA_HOST}/api/tags`, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
logger.warn('Ollama service is not available', {
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
})
|
||||
return NextResponse.json({ models: [] })
|
||||
}
|
||||
|
||||
const data = (await response.json()) as ModelsObject
|
||||
const models = data.models.map((model) => model.name)
|
||||
|
||||
logger.info('Successfully fetched Ollama models', {
|
||||
count: models.length,
|
||||
models,
|
||||
})
|
||||
|
||||
return NextResponse.json({ models })
|
||||
} catch (error) {
|
||||
logger.error('Failed to fetch Ollama models', {
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
host: OLLAMA_HOST,
|
||||
})
|
||||
|
||||
// Return empty array instead of error to avoid breaking the UI
|
||||
return NextResponse.json({ models: [] })
|
||||
}
|
||||
}
|
||||
@@ -405,33 +405,37 @@ export function WorkflowBlock({ id, data }: NodeProps<WorkflowBlockProps>) {
|
||||
// If there's no condition, the block should be shown
|
||||
if (!block.condition) return true
|
||||
|
||||
// If condition is a function, call it to get the actual condition object
|
||||
const actualCondition =
|
||||
typeof block.condition === 'function' ? block.condition() : block.condition
|
||||
|
||||
// Get the values of the fields this block depends on from the appropriate state
|
||||
const fieldValue = stateToUse[block.condition.field]?.value
|
||||
const andFieldValue = block.condition.and
|
||||
? stateToUse[block.condition.and.field]?.value
|
||||
const fieldValue = stateToUse[actualCondition.field]?.value
|
||||
const andFieldValue = actualCondition.and
|
||||
? stateToUse[actualCondition.and.field]?.value
|
||||
: undefined
|
||||
|
||||
// Check if the condition value is an array
|
||||
const isValueMatch = Array.isArray(block.condition.value)
|
||||
const isValueMatch = Array.isArray(actualCondition.value)
|
||||
? fieldValue != null &&
|
||||
(block.condition.not
|
||||
? !block.condition.value.includes(fieldValue as string | number | boolean)
|
||||
: block.condition.value.includes(fieldValue as string | number | boolean))
|
||||
: block.condition.not
|
||||
? fieldValue !== block.condition.value
|
||||
: fieldValue === block.condition.value
|
||||
(actualCondition.not
|
||||
? !actualCondition.value.includes(fieldValue as string | number | boolean)
|
||||
: actualCondition.value.includes(fieldValue as string | number | boolean))
|
||||
: actualCondition.not
|
||||
? fieldValue !== actualCondition.value
|
||||
: fieldValue === actualCondition.value
|
||||
|
||||
// Check both conditions if 'and' is present
|
||||
const isAndValueMatch =
|
||||
!block.condition.and ||
|
||||
(Array.isArray(block.condition.and.value)
|
||||
!actualCondition.and ||
|
||||
(Array.isArray(actualCondition.and.value)
|
||||
? andFieldValue != null &&
|
||||
(block.condition.and.not
|
||||
? !block.condition.and.value.includes(andFieldValue as string | number | boolean)
|
||||
: block.condition.and.value.includes(andFieldValue as string | number | boolean))
|
||||
: block.condition.and.not
|
||||
? andFieldValue !== block.condition.and.value
|
||||
: andFieldValue === block.condition.and.value)
|
||||
(actualCondition.and.not
|
||||
? !actualCondition.and.value.includes(andFieldValue as string | number | boolean)
|
||||
: actualCondition.and.value.includes(andFieldValue as string | number | boolean))
|
||||
: actualCondition.and.not
|
||||
? andFieldValue !== actualCondition.and.value
|
||||
: andFieldValue === actualCondition.and.value)
|
||||
|
||||
return isValueMatch && isAndValueMatch
|
||||
})
|
||||
|
||||
@@ -12,6 +12,12 @@ import {
|
||||
MODELS_WITH_TEMPERATURE_SUPPORT,
|
||||
providers,
|
||||
} from '@/providers/utils'
|
||||
|
||||
// Get current Ollama models dynamically
|
||||
const getCurrentOllamaModels = () => {
|
||||
return useOllamaStore.getState().models
|
||||
}
|
||||
|
||||
import { useOllamaStore } from '@/stores/ollama/store'
|
||||
import type { ToolResponse } from '@/tools/types'
|
||||
|
||||
@@ -213,14 +219,18 @@ Create a system prompt appropriately detailed for the request, using clear langu
|
||||
password: true,
|
||||
connectionDroppable: false,
|
||||
required: true,
|
||||
// Hide API key for all hosted models when running on hosted version
|
||||
// Hide API key for hosted models and Ollama models
|
||||
condition: isHosted
|
||||
? {
|
||||
field: 'model',
|
||||
value: getHostedModels(),
|
||||
not: true, // Show for all models EXCEPT those listed
|
||||
}
|
||||
: undefined, // Show for all models in non-hosted environments
|
||||
: () => ({
|
||||
field: 'model',
|
||||
value: getCurrentOllamaModels(),
|
||||
not: true, // Show for all models EXCEPT Ollama models
|
||||
}),
|
||||
},
|
||||
{
|
||||
id: 'azureEndpoint',
|
||||
|
||||
@@ -118,16 +118,27 @@ export interface SubBlockConfig {
|
||||
hidden?: boolean
|
||||
description?: string
|
||||
value?: (params: Record<string, any>) => string
|
||||
condition?: {
|
||||
field: string
|
||||
value: string | number | boolean | Array<string | number | boolean>
|
||||
not?: boolean
|
||||
and?: {
|
||||
field: string
|
||||
value: string | number | boolean | Array<string | number | boolean> | undefined
|
||||
not?: boolean
|
||||
}
|
||||
}
|
||||
condition?:
|
||||
| {
|
||||
field: string
|
||||
value: string | number | boolean | Array<string | number | boolean>
|
||||
not?: boolean
|
||||
and?: {
|
||||
field: string
|
||||
value: string | number | boolean | Array<string | number | boolean> | undefined
|
||||
not?: boolean
|
||||
}
|
||||
}
|
||||
| (() => {
|
||||
field: string
|
||||
value: string | number | boolean | Array<string | number | boolean>
|
||||
not?: boolean
|
||||
and?: {
|
||||
field: string
|
||||
value: string | number | boolean | Array<string | number | boolean> | undefined
|
||||
not?: boolean
|
||||
}
|
||||
})
|
||||
// Props specific to 'code' sub-block type
|
||||
language?: 'javascript' | 'json'
|
||||
generationType?: GenerationType
|
||||
|
||||
@@ -58,7 +58,7 @@ export class InputResolver {
|
||||
|
||||
/**
|
||||
* Evaluates if a sub-block should be active based on its condition
|
||||
* @param condition - The condition to evaluate
|
||||
* @param condition - The condition to evaluate (can be static object or function)
|
||||
* @param currentValues - Current values of all inputs
|
||||
* @returns True if the sub-block should be active
|
||||
*/
|
||||
@@ -70,37 +70,46 @@ export class InputResolver {
|
||||
not?: boolean
|
||||
and?: { field: string; value: any; not?: boolean }
|
||||
}
|
||||
| (() => {
|
||||
field: string
|
||||
value: any
|
||||
not?: boolean
|
||||
and?: { field: string; value: any; not?: boolean }
|
||||
})
|
||||
| undefined,
|
||||
currentValues: Record<string, any>
|
||||
): boolean {
|
||||
if (!condition) return true
|
||||
|
||||
// If condition is a function, call it to get the actual condition object
|
||||
const actualCondition = typeof condition === 'function' ? condition() : condition
|
||||
|
||||
// Get the field value
|
||||
const fieldValue = currentValues[condition.field]
|
||||
const fieldValue = currentValues[actualCondition.field]
|
||||
|
||||
// Check if the condition value is an array
|
||||
const isValueMatch = Array.isArray(condition.value)
|
||||
const isValueMatch = Array.isArray(actualCondition.value)
|
||||
? fieldValue != null &&
|
||||
(condition.not
|
||||
? !condition.value.includes(fieldValue)
|
||||
: condition.value.includes(fieldValue))
|
||||
: condition.not
|
||||
? fieldValue !== condition.value
|
||||
: fieldValue === condition.value
|
||||
(actualCondition.not
|
||||
? !actualCondition.value.includes(fieldValue)
|
||||
: actualCondition.value.includes(fieldValue))
|
||||
: actualCondition.not
|
||||
? fieldValue !== actualCondition.value
|
||||
: fieldValue === actualCondition.value
|
||||
|
||||
// Check both conditions if 'and' is present
|
||||
const isAndValueMatch =
|
||||
!condition.and ||
|
||||
!actualCondition.and ||
|
||||
(() => {
|
||||
const andFieldValue = currentValues[condition.and!.field]
|
||||
return Array.isArray(condition.and!.value)
|
||||
const andFieldValue = currentValues[actualCondition.and!.field]
|
||||
return Array.isArray(actualCondition.and!.value)
|
||||
? andFieldValue != null &&
|
||||
(condition.and!.not
|
||||
? !condition.and!.value.includes(andFieldValue)
|
||||
: condition.and!.value.includes(andFieldValue))
|
||||
: condition.and!.not
|
||||
? andFieldValue !== condition.and!.value
|
||||
: andFieldValue === condition.and!.value
|
||||
(actualCondition.and!.not
|
||||
? !actualCondition.and!.value.includes(andFieldValue)
|
||||
: actualCondition.and!.value.includes(andFieldValue))
|
||||
: actualCondition.and!.not
|
||||
? andFieldValue !== actualCondition.and!.value
|
||||
: andFieldValue === actualCondition.and!.value
|
||||
})()
|
||||
|
||||
return isValueMatch && isAndValueMatch
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import OpenAI from 'openai'
|
||||
import { env } from '@/lib/env'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import type { StreamingExecution } from '@/executor/types'
|
||||
import type { ModelsObject } from '@/providers/ollama/types'
|
||||
import type {
|
||||
ProviderConfig,
|
||||
@@ -8,12 +9,57 @@ import type {
|
||||
ProviderResponse,
|
||||
TimeSegment,
|
||||
} from '@/providers/types'
|
||||
import {
|
||||
prepareToolExecution,
|
||||
prepareToolsWithUsageControl,
|
||||
trackForcedToolUsage,
|
||||
} from '@/providers/utils'
|
||||
import { useOllamaStore } from '@/stores/ollama/store'
|
||||
import { executeTool } from '@/tools'
|
||||
|
||||
const logger = createLogger('OllamaProvider')
|
||||
const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
|
||||
|
||||
/**
|
||||
* Helper function to convert an Ollama stream to a standard ReadableStream
|
||||
* and collect completion metrics
|
||||
*/
|
||||
function createReadableStreamFromOllamaStream(
|
||||
ollamaStream: any,
|
||||
onComplete?: (content: string, usage?: any) => void
|
||||
): ReadableStream {
|
||||
let fullContent = ''
|
||||
let usageData: any = null
|
||||
|
||||
return new ReadableStream({
|
||||
async start(controller) {
|
||||
try {
|
||||
for await (const chunk of ollamaStream) {
|
||||
// Check for usage data in the final chunk
|
||||
if (chunk.usage) {
|
||||
usageData = chunk.usage
|
||||
}
|
||||
|
||||
const content = chunk.choices[0]?.delta?.content || ''
|
||||
if (content) {
|
||||
fullContent += content
|
||||
controller.enqueue(new TextEncoder().encode(content))
|
||||
}
|
||||
}
|
||||
|
||||
// Once stream is complete, call the completion callback with the final content and usage
|
||||
if (onComplete) {
|
||||
onComplete(fullContent, usageData)
|
||||
}
|
||||
|
||||
controller.close()
|
||||
} catch (error) {
|
||||
controller.error(error)
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
export const ollamaProvider: ProviderConfig = {
|
||||
id: 'ollama',
|
||||
name: 'Ollama',
|
||||
@@ -46,91 +92,238 @@ export const ollamaProvider: ProviderConfig = {
|
||||
}
|
||||
},
|
||||
|
||||
executeRequest: async (request: ProviderRequest): Promise<ProviderResponse> => {
|
||||
executeRequest: async (
|
||||
request: ProviderRequest
|
||||
): Promise<ProviderResponse | StreamingExecution> => {
|
||||
logger.info('Preparing Ollama request', {
|
||||
model: request.model,
|
||||
hasSystemPrompt: !!request.systemPrompt,
|
||||
hasMessages: !!request.context,
|
||||
hasMessages: !!request.messages?.length,
|
||||
hasTools: !!request.tools?.length,
|
||||
toolCount: request.tools?.length || 0,
|
||||
hasResponseFormat: !!request.responseFormat,
|
||||
stream: !!request.stream,
|
||||
})
|
||||
|
||||
const startTime = Date.now()
|
||||
// Create Ollama client using OpenAI-compatible API
|
||||
const ollama = new OpenAI({
|
||||
apiKey: 'empty',
|
||||
baseURL: `${OLLAMA_HOST}/v1`,
|
||||
})
|
||||
|
||||
// Start with an empty array for all messages
|
||||
const allMessages = []
|
||||
|
||||
// Add system prompt if present
|
||||
if (request.systemPrompt) {
|
||||
allMessages.push({
|
||||
role: 'system',
|
||||
content: request.systemPrompt,
|
||||
})
|
||||
}
|
||||
|
||||
// Add context if present
|
||||
if (request.context) {
|
||||
allMessages.push({
|
||||
role: 'user',
|
||||
content: request.context,
|
||||
})
|
||||
}
|
||||
|
||||
// Add remaining messages
|
||||
if (request.messages) {
|
||||
allMessages.push(...request.messages)
|
||||
}
|
||||
|
||||
// Transform tools to OpenAI format if provided
|
||||
const tools = request.tools?.length
|
||||
? request.tools.map((tool) => ({
|
||||
type: 'function',
|
||||
function: {
|
||||
name: tool.id,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
},
|
||||
}))
|
||||
: undefined
|
||||
|
||||
// Build the request payload
|
||||
const payload: any = {
|
||||
model: request.model,
|
||||
messages: allMessages,
|
||||
}
|
||||
|
||||
// Add optional parameters
|
||||
if (request.temperature !== undefined) payload.temperature = request.temperature
|
||||
if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
|
||||
|
||||
// Add response format for structured output if specified
|
||||
if (request.responseFormat) {
|
||||
// Use OpenAI's JSON schema format (Ollama supports this)
|
||||
payload.response_format = {
|
||||
type: 'json_schema',
|
||||
json_schema: {
|
||||
name: request.responseFormat.name || 'response_schema',
|
||||
schema: request.responseFormat.schema || request.responseFormat,
|
||||
strict: request.responseFormat.strict !== false,
|
||||
},
|
||||
}
|
||||
|
||||
logger.info('Added JSON schema response format to Ollama request')
|
||||
}
|
||||
|
||||
// Handle tools and tool usage control
|
||||
let preparedTools: ReturnType<typeof prepareToolsWithUsageControl> | null = null
|
||||
|
||||
if (tools?.length) {
|
||||
preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'ollama')
|
||||
const { tools: filteredTools, toolChoice } = preparedTools
|
||||
|
||||
if (filteredTools?.length && toolChoice) {
|
||||
payload.tools = filteredTools
|
||||
// Ollama supports 'auto' but not forced tool selection - convert 'force' to 'auto'
|
||||
payload.tool_choice = typeof toolChoice === 'string' ? toolChoice : 'auto'
|
||||
|
||||
logger.info('Ollama request configuration:', {
|
||||
toolCount: filteredTools.length,
|
||||
toolChoice: payload.tool_choice,
|
||||
model: request.model,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Start execution timer for the entire provider execution
|
||||
const providerStartTime = Date.now()
|
||||
const providerStartTimeISO = new Date(providerStartTime).toISOString()
|
||||
|
||||
try {
|
||||
// Prepare messages array
|
||||
const ollama = new OpenAI({
|
||||
apiKey: 'empty',
|
||||
baseURL: `${OLLAMA_HOST}/v1`,
|
||||
})
|
||||
// Check if we can stream directly (no tools required)
|
||||
if (request.stream && (!tools || tools.length === 0)) {
|
||||
logger.info('Using streaming response for Ollama request')
|
||||
|
||||
// Start with an empty array for all messages
|
||||
const allMessages = []
|
||||
|
||||
// Add system prompt if present
|
||||
if (request.systemPrompt) {
|
||||
allMessages.push({ role: 'system', content: request.systemPrompt })
|
||||
}
|
||||
|
||||
// Add context if present
|
||||
if (request.context) {
|
||||
allMessages.push({ role: 'user', content: request.context })
|
||||
}
|
||||
|
||||
// Add remaining messages
|
||||
if (request.messages) {
|
||||
allMessages.push(...request.messages)
|
||||
}
|
||||
|
||||
// Build the basic payload
|
||||
const payload: any = {
|
||||
model: request.model,
|
||||
messages: allMessages,
|
||||
stream: false,
|
||||
}
|
||||
|
||||
// Add optional parameters
|
||||
if (request.temperature !== undefined) payload.temperature = request.temperature
|
||||
if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
|
||||
|
||||
// Transform tools to OpenAI format if provided
|
||||
const tools = request.tools?.length
|
||||
? request.tools.map((tool) => ({
|
||||
type: 'function',
|
||||
function: {
|
||||
name: tool.id,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
},
|
||||
}))
|
||||
: undefined
|
||||
|
||||
// Handle tools and tool usage control
|
||||
if (tools?.length) {
|
||||
// Filter out any tools with usageControl='none', but ignore 'force' since Ollama doesn't support it
|
||||
const filteredTools = tools.filter((tool) => {
|
||||
const toolId = tool.function?.name
|
||||
const toolConfig = request.tools?.find((t) => t.id === toolId)
|
||||
// Only filter out 'none', treat 'force' as 'auto'
|
||||
return toolConfig?.usageControl !== 'none'
|
||||
// Create a streaming request with token usage tracking
|
||||
const streamResponse = await ollama.chat.completions.create({
|
||||
...payload,
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
})
|
||||
|
||||
if (filteredTools?.length) {
|
||||
payload.tools = filteredTools
|
||||
// Always use 'auto' for Ollama, regardless of the tool_choice setting
|
||||
payload.tool_choice = 'auto'
|
||||
// Start collecting token usage from the stream
|
||||
const tokenUsage = {
|
||||
prompt: 0,
|
||||
completion: 0,
|
||||
total: 0,
|
||||
}
|
||||
|
||||
logger.info('Ollama request configuration:', {
|
||||
toolCount: filteredTools.length,
|
||||
toolChoice: 'auto', // Ollama always uses auto
|
||||
model: request.model,
|
||||
})
|
||||
// Create a StreamingExecution response with a callback to update content and tokens
|
||||
const streamingResult = {
|
||||
stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
|
||||
// Update the execution data with the final content and token usage
|
||||
streamingResult.execution.output.content = content
|
||||
|
||||
// Clean up the response content
|
||||
if (content) {
|
||||
streamingResult.execution.output.content = content
|
||||
.replace(/```json\n?|\n?```/g, '')
|
||||
.trim()
|
||||
}
|
||||
|
||||
// Update the timing information with the actual completion time
|
||||
const streamEndTime = Date.now()
|
||||
const streamEndTimeISO = new Date(streamEndTime).toISOString()
|
||||
|
||||
if (streamingResult.execution.output.providerTiming) {
|
||||
streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
|
||||
streamingResult.execution.output.providerTiming.duration =
|
||||
streamEndTime - providerStartTime
|
||||
|
||||
// Update the time segment as well
|
||||
if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
|
||||
streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
|
||||
streamEndTime
|
||||
streamingResult.execution.output.providerTiming.timeSegments[0].duration =
|
||||
streamEndTime - providerStartTime
|
||||
}
|
||||
}
|
||||
|
||||
// Update token usage if available from the stream
|
||||
if (usage) {
|
||||
const newTokens = {
|
||||
prompt: usage.prompt_tokens || tokenUsage.prompt,
|
||||
completion: usage.completion_tokens || tokenUsage.completion,
|
||||
total: usage.total_tokens || tokenUsage.total,
|
||||
}
|
||||
|
||||
streamingResult.execution.output.tokens = newTokens
|
||||
}
|
||||
}),
|
||||
execution: {
|
||||
success: true,
|
||||
output: {
|
||||
content: '', // Will be filled by the stream completion callback
|
||||
model: request.model,
|
||||
tokens: tokenUsage,
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
endTime: new Date().toISOString(),
|
||||
duration: Date.now() - providerStartTime,
|
||||
timeSegments: [
|
||||
{
|
||||
type: 'model',
|
||||
name: 'Streaming response',
|
||||
startTime: providerStartTime,
|
||||
endTime: Date.now(),
|
||||
duration: Date.now() - providerStartTime,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
logs: [], // No block logs for direct streaming
|
||||
metadata: {
|
||||
startTime: providerStartTimeISO,
|
||||
endTime: new Date().toISOString(),
|
||||
duration: Date.now() - providerStartTime,
|
||||
},
|
||||
},
|
||||
} as StreamingExecution
|
||||
|
||||
// Return the streaming execution object
|
||||
return streamingResult as StreamingExecution
|
||||
}
|
||||
|
||||
// Make the initial API request
|
||||
const initialCallTime = Date.now()
|
||||
|
||||
// Track the original tool_choice for forced tool tracking
|
||||
const originalToolChoice = payload.tool_choice
|
||||
|
||||
// Track forced tools and their usage
|
||||
const forcedTools = preparedTools?.forcedTools || []
|
||||
let usedForcedTools: string[] = []
|
||||
|
||||
// Helper function to check for forced tool usage in responses
|
||||
const checkForForcedToolUsage = (
|
||||
response: any,
|
||||
toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any }
|
||||
) => {
|
||||
if (typeof toolChoice === 'object' && response.choices[0]?.message?.tool_calls) {
|
||||
const toolCallsResponse = response.choices[0].message.tool_calls
|
||||
const result = trackForcedToolUsage(
|
||||
toolCallsResponse,
|
||||
toolChoice,
|
||||
logger,
|
||||
'ollama',
|
||||
forcedTools,
|
||||
usedForcedTools
|
||||
)
|
||||
hasUsedForcedTool = result.hasUsedForcedTool
|
||||
usedForcedTools = result.usedForcedTools
|
||||
}
|
||||
}
|
||||
|
||||
let currentResponse = await ollama.chat.completions.create(payload)
|
||||
const firstResponseTime = Date.now() - startTime
|
||||
const firstResponseTime = Date.now() - initialCallTime
|
||||
|
||||
let content = currentResponse.choices[0]?.message?.content || ''
|
||||
|
||||
@@ -140,6 +333,7 @@ export const ollamaProvider: ProviderConfig = {
|
||||
content = content.trim()
|
||||
}
|
||||
|
||||
// Collect token information
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
@@ -155,201 +349,307 @@ export const ollamaProvider: ProviderConfig = {
|
||||
let modelTime = firstResponseTime
|
||||
let toolsTime = 0
|
||||
|
||||
// Track if a forced tool has been used
|
||||
let hasUsedForcedTool = false
|
||||
|
||||
// Track each model and tool call segment with timestamps
|
||||
const timeSegments: TimeSegment[] = [
|
||||
{
|
||||
type: 'model',
|
||||
name: 'Initial response',
|
||||
startTime: startTime,
|
||||
endTime: startTime + firstResponseTime,
|
||||
startTime: initialCallTime,
|
||||
endTime: initialCallTime + firstResponseTime,
|
||||
duration: firstResponseTime,
|
||||
},
|
||||
]
|
||||
|
||||
try {
|
||||
while (iterationCount < MAX_ITERATIONS) {
|
||||
// Check for tool calls
|
||||
const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
|
||||
if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
|
||||
break
|
||||
}
|
||||
// Check if a forced tool was used in the first response
|
||||
checkForForcedToolUsage(currentResponse, originalToolChoice)
|
||||
|
||||
// Track time for tool calls in this batch
|
||||
const toolsStartTime = Date.now()
|
||||
|
||||
// Process each tool call
|
||||
for (const toolCall of toolCallsInResponse) {
|
||||
try {
|
||||
const toolName = toolCall.function.name
|
||||
const toolArgs = JSON.parse(toolCall.function.arguments)
|
||||
|
||||
// Get the tool from the tools registry
|
||||
const tool = request.tools?.find((t) => t.id === toolName)
|
||||
if (!tool) continue
|
||||
|
||||
// Execute the tool
|
||||
const toolCallStartTime = Date.now()
|
||||
|
||||
// Only merge actual tool parameters for logging
|
||||
const toolParams = {
|
||||
...tool.params,
|
||||
...toolArgs,
|
||||
}
|
||||
|
||||
// Add system parameters for execution
|
||||
const executionParams = {
|
||||
...toolParams,
|
||||
...(request.workflowId
|
||||
? {
|
||||
_context: {
|
||||
workflowId: request.workflowId,
|
||||
...(request.chatId ? { chatId: request.chatId } : {}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
...(request.environmentVariables ? { envVars: request.environmentVariables } : {}),
|
||||
}
|
||||
|
||||
const result = await executeTool(toolName, executionParams, true)
|
||||
const toolCallEndTime = Date.now()
|
||||
const toolCallDuration = toolCallEndTime - toolCallStartTime
|
||||
|
||||
// Add to time segments for both success and failure
|
||||
timeSegments.push({
|
||||
type: 'tool',
|
||||
name: toolName,
|
||||
startTime: toolCallStartTime,
|
||||
endTime: toolCallEndTime,
|
||||
duration: toolCallDuration,
|
||||
})
|
||||
|
||||
// Prepare result content for the LLM
|
||||
let resultContent: any
|
||||
if (result.success) {
|
||||
toolResults.push(result.output)
|
||||
resultContent = result.output
|
||||
} else {
|
||||
// Include error information so LLM can respond appropriately
|
||||
resultContent = {
|
||||
error: true,
|
||||
message: result.error || 'Tool execution failed',
|
||||
tool: toolName,
|
||||
}
|
||||
}
|
||||
|
||||
toolCalls.push({
|
||||
name: toolName,
|
||||
arguments: toolParams,
|
||||
startTime: new Date(toolCallStartTime).toISOString(),
|
||||
endTime: new Date(toolCallEndTime).toISOString(),
|
||||
duration: toolCallDuration,
|
||||
result: resultContent,
|
||||
success: result.success,
|
||||
})
|
||||
|
||||
// Add the tool call and result to messages (both success and failure)
|
||||
currentMessages.push({
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
tool_calls: [
|
||||
{
|
||||
id: toolCall.id,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: toolName,
|
||||
arguments: toolCall.function.arguments,
|
||||
},
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
currentMessages.push({
|
||||
role: 'tool',
|
||||
tool_call_id: toolCall.id,
|
||||
content: JSON.stringify(resultContent),
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Error processing tool call:', { error })
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate tool call time for this iteration
|
||||
const thisToolsTime = Date.now() - toolsStartTime
|
||||
toolsTime += thisToolsTime
|
||||
|
||||
// Make the next request with updated messages
|
||||
const nextPayload = {
|
||||
...payload,
|
||||
messages: currentMessages,
|
||||
}
|
||||
|
||||
// Time the next model call
|
||||
const nextModelStartTime = Date.now()
|
||||
|
||||
// Make the next request
|
||||
currentResponse = await ollama.chat.completions.create(nextPayload)
|
||||
|
||||
const nextModelEndTime = Date.now()
|
||||
const thisModelTime = nextModelEndTime - nextModelStartTime
|
||||
|
||||
// Add to time segments
|
||||
timeSegments.push({
|
||||
type: 'model',
|
||||
name: `Model response (iteration ${iterationCount + 1})`,
|
||||
startTime: nextModelStartTime,
|
||||
endTime: nextModelEndTime,
|
||||
duration: thisModelTime,
|
||||
})
|
||||
|
||||
// Add to model time
|
||||
modelTime += thisModelTime
|
||||
|
||||
// Update content if we have a text response
|
||||
if (currentResponse.choices[0]?.message?.content) {
|
||||
content = currentResponse.choices[0].message.content
|
||||
// Clean up the response content
|
||||
content = content.replace(/```json\n?|\n?```/g, '')
|
||||
content = content.trim()
|
||||
}
|
||||
|
||||
// Update token counts
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
iterationCount++
|
||||
while (iterationCount < MAX_ITERATIONS) {
|
||||
// Check for tool calls
|
||||
const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
|
||||
if (!toolCallsInResponse || toolCallsInResponse.length === 0) {
|
||||
break
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error in Ollama request:', { error })
|
||||
|
||||
logger.info(
|
||||
`Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
|
||||
)
|
||||
|
||||
// Track time for tool calls in this batch
|
||||
const toolsStartTime = Date.now()
|
||||
|
||||
// Process each tool call
|
||||
for (const toolCall of toolCallsInResponse) {
|
||||
try {
|
||||
const toolName = toolCall.function.name
|
||||
const toolArgs = JSON.parse(toolCall.function.arguments)
|
||||
|
||||
// Get the tool from the tools registry
|
||||
const tool = request.tools?.find((t) => t.id === toolName)
|
||||
if (!tool) continue
|
||||
|
||||
// Execute the tool
|
||||
const toolCallStartTime = Date.now()
|
||||
|
||||
const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
|
||||
const result = await executeTool(toolName, executionParams, true)
|
||||
const toolCallEndTime = Date.now()
|
||||
const toolCallDuration = toolCallEndTime - toolCallStartTime
|
||||
|
||||
// Add to time segments for both success and failure
|
||||
timeSegments.push({
|
||||
type: 'tool',
|
||||
name: toolName,
|
||||
startTime: toolCallStartTime,
|
||||
endTime: toolCallEndTime,
|
||||
duration: toolCallDuration,
|
||||
})
|
||||
|
||||
// Prepare result content for the LLM
|
||||
let resultContent: any
|
||||
if (result.success) {
|
||||
toolResults.push(result.output)
|
||||
resultContent = result.output
|
||||
} else {
|
||||
// Include error information so LLM can respond appropriately
|
||||
resultContent = {
|
||||
error: true,
|
||||
message: result.error || 'Tool execution failed',
|
||||
tool: toolName,
|
||||
}
|
||||
}
|
||||
|
||||
toolCalls.push({
|
||||
name: toolName,
|
||||
arguments: toolParams,
|
||||
startTime: new Date(toolCallStartTime).toISOString(),
|
||||
endTime: new Date(toolCallEndTime).toISOString(),
|
||||
duration: toolCallDuration,
|
||||
result: resultContent,
|
||||
success: result.success,
|
||||
})
|
||||
|
||||
// Add the tool call and result to messages (both success and failure)
|
||||
currentMessages.push({
|
||||
role: 'assistant',
|
||||
content: null,
|
||||
tool_calls: [
|
||||
{
|
||||
id: toolCall.id,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: toolName,
|
||||
arguments: toolCall.function.arguments,
|
||||
},
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
currentMessages.push({
|
||||
role: 'tool',
|
||||
tool_call_id: toolCall.id,
|
||||
content: JSON.stringify(resultContent),
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Error processing tool call:', {
|
||||
error,
|
||||
toolName: toolCall?.function?.name,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate tool call time for this iteration
|
||||
const thisToolsTime = Date.now() - toolsStartTime
|
||||
toolsTime += thisToolsTime
|
||||
|
||||
// Make the next request with updated messages
|
||||
const nextPayload = {
|
||||
...payload,
|
||||
messages: currentMessages,
|
||||
}
|
||||
|
||||
// Update tool_choice based on which forced tools have been used
|
||||
if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
|
||||
// If we have remaining forced tools, get the next one to force
|
||||
const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
|
||||
|
||||
if (remainingTools.length > 0) {
|
||||
// Ollama doesn't support forced tool selection, so we keep using 'auto'
|
||||
nextPayload.tool_choice = 'auto'
|
||||
logger.info(`Ollama doesn't support forced tools, using auto for: ${remainingTools[0]}`)
|
||||
} else {
|
||||
// All forced tools have been used, continue with auto
|
||||
nextPayload.tool_choice = 'auto'
|
||||
logger.info('All forced tools have been used, continuing with auto tool_choice')
|
||||
}
|
||||
}
|
||||
|
||||
// Time the next model call
|
||||
const nextModelStartTime = Date.now()
|
||||
|
||||
// Make the next request
|
||||
currentResponse = await ollama.chat.completions.create(nextPayload)
|
||||
|
||||
// Check if any forced tools were used in this response
|
||||
checkForForcedToolUsage(currentResponse, nextPayload.tool_choice)
|
||||
|
||||
const nextModelEndTime = Date.now()
|
||||
const thisModelTime = nextModelEndTime - nextModelStartTime
|
||||
|
||||
// Add to time segments
|
||||
timeSegments.push({
|
||||
type: 'model',
|
||||
name: `Model response (iteration ${iterationCount + 1})`,
|
||||
startTime: nextModelStartTime,
|
||||
endTime: nextModelEndTime,
|
||||
duration: thisModelTime,
|
||||
})
|
||||
|
||||
// Add to model time
|
||||
modelTime += thisModelTime
|
||||
|
||||
// Update content if we have a text response
|
||||
if (currentResponse.choices[0]?.message?.content) {
|
||||
content = currentResponse.choices[0].message.content
|
||||
// Clean up the response content
|
||||
content = content.replace(/```json\n?|\n?```/g, '')
|
||||
content = content.trim()
|
||||
}
|
||||
|
||||
// Update token counts
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
iterationCount++
|
||||
}
|
||||
|
||||
const endTime = Date.now()
|
||||
// After all tool processing complete, if streaming was requested and we have messages, use streaming for the final response
|
||||
if (request.stream && iterationCount > 0) {
|
||||
logger.info('Using streaming for final response after tool calls')
|
||||
|
||||
const streamingPayload = {
|
||||
...payload,
|
||||
messages: currentMessages,
|
||||
tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
}
|
||||
|
||||
const streamResponse = await ollama.chat.completions.create(streamingPayload)
|
||||
|
||||
// Create the StreamingExecution object with all collected data
|
||||
const streamingResult = {
|
||||
stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
|
||||
// Update the execution data with the final content and token usage
|
||||
streamingResult.execution.output.content = content
|
||||
|
||||
// Clean up the response content
|
||||
if (content) {
|
||||
streamingResult.execution.output.content = content
|
||||
.replace(/```json\n?|\n?```/g, '')
|
||||
.trim()
|
||||
}
|
||||
|
||||
// Update token usage if available from the stream
|
||||
if (usage) {
|
||||
const newTokens = {
|
||||
prompt: usage.prompt_tokens || tokens.prompt,
|
||||
completion: usage.completion_tokens || tokens.completion,
|
||||
total: usage.total_tokens || tokens.total,
|
||||
}
|
||||
|
||||
streamingResult.execution.output.tokens = newTokens
|
||||
}
|
||||
}),
|
||||
execution: {
|
||||
success: true,
|
||||
output: {
|
||||
content: '', // Will be filled by the callback
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
toolCalls.length > 0
|
||||
? {
|
||||
list: toolCalls,
|
||||
count: toolCalls.length,
|
||||
}
|
||||
: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
endTime: new Date().toISOString(),
|
||||
duration: Date.now() - providerStartTime,
|
||||
modelTime: modelTime,
|
||||
toolsTime: toolsTime,
|
||||
firstResponseTime: firstResponseTime,
|
||||
iterations: iterationCount + 1,
|
||||
timeSegments: timeSegments,
|
||||
},
|
||||
},
|
||||
logs: [], // No block logs at provider level
|
||||
metadata: {
|
||||
startTime: providerStartTimeISO,
|
||||
endTime: new Date().toISOString(),
|
||||
duration: Date.now() - providerStartTime,
|
||||
},
|
||||
},
|
||||
} as StreamingExecution
|
||||
|
||||
// Return the streaming execution object
|
||||
return streamingResult as StreamingExecution
|
||||
}
|
||||
|
||||
// Calculate overall timing
|
||||
const providerEndTime = Date.now()
|
||||
const providerEndTimeISO = new Date(providerEndTime).toISOString()
|
||||
const totalDuration = providerEndTime - providerStartTime
|
||||
|
||||
return {
|
||||
content: content,
|
||||
content,
|
||||
model: request.model,
|
||||
tokens,
|
||||
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
||||
toolResults: toolResults.length > 0 ? toolResults : undefined,
|
||||
timing: {
|
||||
startTime: new Date(startTime).toISOString(),
|
||||
endTime: new Date(endTime).toISOString(),
|
||||
duration: endTime - startTime,
|
||||
startTime: providerStartTimeISO,
|
||||
endTime: providerEndTimeISO,
|
||||
duration: totalDuration,
|
||||
modelTime: modelTime,
|
||||
toolsTime: toolsTime,
|
||||
firstResponseTime: firstResponseTime,
|
||||
iterations: iterationCount + 1,
|
||||
timeSegments,
|
||||
timeSegments: timeSegments,
|
||||
},
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error in Ollama request', {
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
model: request.model,
|
||||
// Include timing information even for errors
|
||||
const providerEndTime = Date.now()
|
||||
const providerEndTimeISO = new Date(providerEndTime).toISOString()
|
||||
const totalDuration = providerEndTime - providerStartTime
|
||||
|
||||
logger.error('Error in Ollama request:', {
|
||||
error,
|
||||
duration: totalDuration,
|
||||
})
|
||||
throw error
|
||||
|
||||
// Create a new error with timing information
|
||||
const enhancedError = new Error(error instanceof Error ? error.message : String(error))
|
||||
// @ts-ignore - Adding timing property to the error
|
||||
enhancedError.timing = {
|
||||
startTime: providerStartTimeISO,
|
||||
endTime: providerEndTimeISO,
|
||||
duration: totalDuration,
|
||||
}
|
||||
|
||||
throw enhancedError
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ import { openaiProvider } from '@/providers/openai'
|
||||
import type { ProviderConfig, ProviderId, ProviderToolConfig } from '@/providers/types'
|
||||
import { xAIProvider } from '@/providers/xai'
|
||||
import { useCustomToolsStore } from '@/stores/custom-tools/store'
|
||||
import { useOllamaStore } from '@/stores/ollama/store'
|
||||
|
||||
const logger = createLogger('ProviderUtils')
|
||||
|
||||
@@ -548,6 +549,12 @@ export function getApiKey(provider: string, model: string, userProvidedKey?: str
|
||||
// If user provided a key, use it as a fallback
|
||||
const hasUserKey = !!userProvidedKey
|
||||
|
||||
// Ollama models don't require API keys - they run locally
|
||||
const isOllamaModel = provider === 'ollama' || useOllamaStore.getState().models.includes(model)
|
||||
if (isOllamaModel) {
|
||||
return 'empty' // Ollama uses 'empty' as a placeholder API key
|
||||
}
|
||||
|
||||
// Use server key rotation for all OpenAI models and Anthropic's Claude models on the hosted platform
|
||||
const isOpenAIModel = provider === 'openai'
|
||||
const isClaudeModel = provider === 'anthropic'
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Check that at least one argument is provided. If not, display the usage help.
|
||||
if [ "$#" -eq 0 ]; then
|
||||
echo "Usage: $(basename "$0") <ollama command> [args...]"
|
||||
echo "Example: $(basename "$0") ps # This will run 'ollama ps' inside the container"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Start a detached container from the ollama/ollama image,
|
||||
# mounting the host's ~/.ollama directory directly into the container.
|
||||
# Here we mount it to /root/.ollama, assuming that's where the image expects it.
|
||||
CONTAINER_ID=$(docker run -d -v ~/.ollama:/root/.ollama -p 11434:11434 ollama/ollama
|
||||
)
|
||||
|
||||
# Define a cleanup function to stop the container regardless of how the script exits.
|
||||
cleanup() {
|
||||
docker stop "$CONTAINER_ID" >/dev/null
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# Execute the command provided by the user within the running container.
|
||||
# The command runs as: "ollama <user-arguments>"
|
||||
docker exec -it "$CONTAINER_ID" ollama "$@"
|
||||
@@ -5,11 +5,72 @@ import type { OllamaStore } from '@/stores/ollama/types'
|
||||
|
||||
const logger = createLogger('OllamaStore')
|
||||
|
||||
export const useOllamaStore = create<OllamaStore>((set) => ({
|
||||
// Fetch models from the server API when on client side
|
||||
const fetchOllamaModels = async (): Promise<string[]> => {
|
||||
try {
|
||||
const response = await fetch('/api/providers/ollama/models')
|
||||
if (!response.ok) {
|
||||
logger.warn('Failed to fetch Ollama models from API', {
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
})
|
||||
return []
|
||||
}
|
||||
const data = await response.json()
|
||||
return data.models || []
|
||||
} catch (error) {
|
||||
logger.error('Error fetching Ollama models', {
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
})
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
export const useOllamaStore = create<OllamaStore>((set, get) => ({
|
||||
models: [],
|
||||
isLoading: false,
|
||||
setModels: (models) => {
|
||||
set({ models })
|
||||
// Update the providers when models change
|
||||
updateOllamaProviderModels(models)
|
||||
},
|
||||
|
||||
// Fetch models from API (client-side only)
|
||||
fetchModels: async () => {
|
||||
if (typeof window === 'undefined') {
|
||||
logger.info('Skipping client-side model fetch on server')
|
||||
return
|
||||
}
|
||||
|
||||
if (get().isLoading) {
|
||||
logger.info('Model fetch already in progress')
|
||||
return
|
||||
}
|
||||
|
||||
logger.info('Fetching Ollama models from API')
|
||||
set({ isLoading: true })
|
||||
|
||||
try {
|
||||
const models = await fetchOllamaModels()
|
||||
logger.info('Successfully fetched Ollama models', {
|
||||
count: models.length,
|
||||
models,
|
||||
})
|
||||
get().setModels(models)
|
||||
} catch (error) {
|
||||
logger.error('Failed to fetch Ollama models', {
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
})
|
||||
} finally {
|
||||
set({ isLoading: false })
|
||||
}
|
||||
},
|
||||
}))
|
||||
|
||||
// Auto-fetch models when the store is first accessed on the client
|
||||
if (typeof window !== 'undefined') {
|
||||
// Delay to avoid hydration issues
|
||||
setTimeout(() => {
|
||||
useOllamaStore.getState().fetchModels()
|
||||
}, 1000)
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
export interface OllamaStore {
|
||||
models: string[]
|
||||
isLoading: boolean
|
||||
setModels: (models: string[]) => void
|
||||
fetchModels: () => Promise<void>
|
||||
}
|
||||
|
||||
@@ -1,11 +1,106 @@
|
||||
name: sim-with-ollama
|
||||
|
||||
services:
|
||||
local-llm-gpu:
|
||||
profiles:
|
||||
- local-gpu # This profile requires both 'local' and 'gpu'
|
||||
# Main Sim Studio Application
|
||||
simstudio:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/app.Dockerfile
|
||||
ports:
|
||||
- '3000:3000'
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 8G
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
|
||||
- BETTER_AUTH_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
|
||||
- NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
|
||||
- BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
|
||||
- ENCRYPTION_KEY=${ENCRYPTION_KEY:-$(openssl rand -hex 32)}
|
||||
- OLLAMA_URL=http://ollama:11434
|
||||
- NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002}
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
migrations:
|
||||
condition: service_completed_successfully
|
||||
realtime:
|
||||
condition: service_healthy
|
||||
ollama:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3000']
|
||||
interval: 90s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
restart: unless-stopped
|
||||
|
||||
# Realtime Socket Server
|
||||
realtime:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/realtime.Dockerfile
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
|
||||
- NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
|
||||
- BETTER_AUTH_URL=${BETTER_AUTH_URL:-http://localhost:3000}
|
||||
- BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- '3002:3002'
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 8G
|
||||
healthcheck:
|
||||
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health']
|
||||
interval: 90s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
# Database Migrations
|
||||
migrations:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: docker/db.Dockerfile
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
command: ['bun', 'run', 'db:migrate']
|
||||
restart: 'no'
|
||||
|
||||
# PostgreSQL Database with Vector Extension
|
||||
db:
|
||||
image: pgvector/pgvector:pg17
|
||||
restart: always
|
||||
ports:
|
||||
- '5432:5432'
|
||||
environment:
|
||||
- POSTGRES_USER=${POSTGRES_USER:-postgres}
|
||||
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
|
||||
- POSTGRES_DB=${POSTGRES_DB:-simstudio}
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'pg_isready -U postgres']
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# Ollama with GPU support (default)
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
pull_policy: always
|
||||
volumes:
|
||||
- ${HOME}/.ollama:/root/.ollama
|
||||
- ollama_data:/root/.ollama
|
||||
ports:
|
||||
- '11434:11434'
|
||||
environment:
|
||||
@@ -13,6 +108,7 @@ services:
|
||||
- OLLAMA_LOAD_TIMEOUT=-1
|
||||
- OLLAMA_KEEP_ALIVE=-1
|
||||
- OLLAMA_DEBUG=1
|
||||
- OLLAMA_HOST=0.0.0.0:11434
|
||||
command: 'serve'
|
||||
deploy:
|
||||
resources:
|
||||
@@ -26,23 +122,56 @@ services:
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
restart: unless-stopped
|
||||
|
||||
local-llm-cpu:
|
||||
# Ollama CPU-only version (use with --profile cpu profile)
|
||||
ollama-cpu:
|
||||
profiles:
|
||||
- local-cpu # This profile requires both 'local' and 'cpu'
|
||||
- cpu
|
||||
image: ollama/ollama:latest
|
||||
pull_policy: always
|
||||
volumes:
|
||||
- ${HOME}/.ollama:/root/.ollama
|
||||
- ollama_data:/root/.ollama
|
||||
ports:
|
||||
- '11434:11434'
|
||||
environment:
|
||||
- OLLAMA_LOAD_TIMEOUT=-1
|
||||
- OLLAMA_KEEP_ALIVE=-1
|
||||
- OLLAMA_DEBUG=1
|
||||
- OLLAMA_HOST=0.0.0.0:11434
|
||||
command: 'serve'
|
||||
healthcheck:
|
||||
test: ['CMD', 'curl', '-f', 'http://localhost:11434/']
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
restart: unless-stopped
|
||||
|
||||
# Helper container to pull models automatically
|
||||
model-setup:
|
||||
image: ollama/ollama:latest
|
||||
profiles:
|
||||
- setup
|
||||
volumes:
|
||||
- ollama_data:/root/.ollama
|
||||
environment:
|
||||
- OLLAMA_HOST=ollama:11434
|
||||
depends_on:
|
||||
ollama:
|
||||
condition: service_healthy
|
||||
command: >
|
||||
sh -c "
|
||||
echo 'Waiting for Ollama to be ready...' &&
|
||||
sleep 10 &&
|
||||
echo 'Pulling gemma3:4b model (recommended starter model)...' &&
|
||||
ollama pull gemma3:4b &&
|
||||
echo 'Model setup complete! You can now use gemma3:4b in Sim.' &&
|
||||
echo 'To add more models, run: docker compose -f docker-compose.ollama.yml exec ollama ollama pull <model-name>'
|
||||
"
|
||||
restart: 'no'
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
ollama_data:
|
||||
|
||||
Reference in New Issue
Block a user