mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-09 15:07:55 -05:00
feat(ollama): added streaming & tool call support for ollama, updated docs (#884)
This commit is contained in:
8
.github/CONTRIBUTING.md
vendored
8
.github/CONTRIBUTING.md
vendored
@@ -164,10 +164,14 @@ Access the application at [http://localhost:3000/](http://localhost:3000/)
|
|||||||
|
|
||||||
To use local models with Sim:
|
To use local models with Sim:
|
||||||
|
|
||||||
1. Pull models using our helper script:
|
1. Install Ollama and pull models:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
./apps/sim/scripts/ollama_docker.sh pull <model_name>
|
# Install Ollama (if not already installed)
|
||||||
|
curl -fsSL https://ollama.ai/install.sh | sh
|
||||||
|
|
||||||
|
# Pull a model (e.g., gemma3:4b)
|
||||||
|
ollama pull gemma3:4b
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Start Sim with local model support:
|
2. Start Sim with local model support:
|
||||||
|
|||||||
24
README.md
24
README.md
@@ -59,27 +59,21 @@ docker compose -f docker-compose.prod.yml up -d
|
|||||||
|
|
||||||
Access the application at [http://localhost:3000/](http://localhost:3000/)
|
Access the application at [http://localhost:3000/](http://localhost:3000/)
|
||||||
|
|
||||||
#### Using Local Models
|
#### Using Local Models with Ollama
|
||||||
|
|
||||||
To use local models with Sim:
|
Run Sim with local AI models using [Ollama](https://ollama.ai) - no external APIs required:
|
||||||
|
|
||||||
1. Pull models using our helper script:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
./apps/sim/scripts/ollama_docker.sh pull <model_name>
|
# Start with GPU support (automatically downloads gemma3:4b model)
|
||||||
|
docker compose -f docker-compose.ollama.yml --profile setup up -d
|
||||||
|
|
||||||
|
# For CPU-only systems:
|
||||||
|
docker compose -f docker-compose.ollama.yml --profile cpu --profile setup up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Start Sim with local model support:
|
Wait for the model to download, then visit [http://localhost:3000](http://localhost:3000). Add more models with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# With NVIDIA GPU support
|
docker compose -f docker-compose.ollama.yml exec ollama ollama pull llama3.1:8b
|
||||||
docker compose --profile local-gpu -f docker-compose.ollama.yml up -d
|
|
||||||
|
|
||||||
# Without GPU (CPU only)
|
|
||||||
docker compose --profile local-cpu -f docker-compose.ollama.yml up -d
|
|
||||||
|
|
||||||
# If hosting on a server, update the environment variables in the docker-compose.prod.yml file to include the server's public IP then start again (OLLAMA_URL to i.e. http://1.1.1.1:11434)
|
|
||||||
docker compose -f docker-compose.prod.yml up -d
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Option 3: Dev Containers
|
### Option 3: Dev Containers
|
||||||
|
|||||||
@@ -15,3 +15,6 @@ ENCRYPTION_KEY=your_encryption_key # Use `openssl rand -hex 32` to generate
|
|||||||
# RESEND_API_KEY= # Uncomment and add your key from https://resend.com to send actual emails
|
# RESEND_API_KEY= # Uncomment and add your key from https://resend.com to send actual emails
|
||||||
# If left commented out, emails will be logged to console instead
|
# If left commented out, emails will be logged to console instead
|
||||||
|
|
||||||
|
# Local AI Models (Optional)
|
||||||
|
# OLLAMA_URL=http://localhost:11434 # URL for local Ollama server - uncomment if using local models
|
||||||
|
|
||||||
|
|||||||
52
apps/sim/app/api/providers/ollama/models/route.ts
Normal file
52
apps/sim/app/api/providers/ollama/models/route.ts
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import { type NextRequest, NextResponse } from 'next/server'
|
||||||
|
import { env } from '@/lib/env'
|
||||||
|
import { createLogger } from '@/lib/logs/console/logger'
|
||||||
|
import type { ModelsObject } from '@/providers/ollama/types'
|
||||||
|
|
||||||
|
const logger = createLogger('OllamaModelsAPI')
|
||||||
|
const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
|
||||||
|
|
||||||
|
export const dynamic = 'force-dynamic'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get available Ollama models
|
||||||
|
*/
|
||||||
|
export async function GET(request: NextRequest) {
|
||||||
|
try {
|
||||||
|
logger.info('Fetching Ollama models', {
|
||||||
|
host: OLLAMA_HOST,
|
||||||
|
})
|
||||||
|
|
||||||
|
const response = await fetch(`${OLLAMA_HOST}/api/tags`, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
logger.warn('Ollama service is not available', {
|
||||||
|
status: response.status,
|
||||||
|
statusText: response.statusText,
|
||||||
|
})
|
||||||
|
return NextResponse.json({ models: [] })
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = (await response.json()) as ModelsObject
|
||||||
|
const models = data.models.map((model) => model.name)
|
||||||
|
|
||||||
|
logger.info('Successfully fetched Ollama models', {
|
||||||
|
count: models.length,
|
||||||
|
models,
|
||||||
|
})
|
||||||
|
|
||||||
|
return NextResponse.json({ models })
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Failed to fetch Ollama models', {
|
||||||
|
error: error instanceof Error ? error.message : 'Unknown error',
|
||||||
|
host: OLLAMA_HOST,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Return empty array instead of error to avoid breaking the UI
|
||||||
|
return NextResponse.json({ models: [] })
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -405,33 +405,37 @@ export function WorkflowBlock({ id, data }: NodeProps<WorkflowBlockProps>) {
|
|||||||
// If there's no condition, the block should be shown
|
// If there's no condition, the block should be shown
|
||||||
if (!block.condition) return true
|
if (!block.condition) return true
|
||||||
|
|
||||||
|
// If condition is a function, call it to get the actual condition object
|
||||||
|
const actualCondition =
|
||||||
|
typeof block.condition === 'function' ? block.condition() : block.condition
|
||||||
|
|
||||||
// Get the values of the fields this block depends on from the appropriate state
|
// Get the values of the fields this block depends on from the appropriate state
|
||||||
const fieldValue = stateToUse[block.condition.field]?.value
|
const fieldValue = stateToUse[actualCondition.field]?.value
|
||||||
const andFieldValue = block.condition.and
|
const andFieldValue = actualCondition.and
|
||||||
? stateToUse[block.condition.and.field]?.value
|
? stateToUse[actualCondition.and.field]?.value
|
||||||
: undefined
|
: undefined
|
||||||
|
|
||||||
// Check if the condition value is an array
|
// Check if the condition value is an array
|
||||||
const isValueMatch = Array.isArray(block.condition.value)
|
const isValueMatch = Array.isArray(actualCondition.value)
|
||||||
? fieldValue != null &&
|
? fieldValue != null &&
|
||||||
(block.condition.not
|
(actualCondition.not
|
||||||
? !block.condition.value.includes(fieldValue as string | number | boolean)
|
? !actualCondition.value.includes(fieldValue as string | number | boolean)
|
||||||
: block.condition.value.includes(fieldValue as string | number | boolean))
|
: actualCondition.value.includes(fieldValue as string | number | boolean))
|
||||||
: block.condition.not
|
: actualCondition.not
|
||||||
? fieldValue !== block.condition.value
|
? fieldValue !== actualCondition.value
|
||||||
: fieldValue === block.condition.value
|
: fieldValue === actualCondition.value
|
||||||
|
|
||||||
// Check both conditions if 'and' is present
|
// Check both conditions if 'and' is present
|
||||||
const isAndValueMatch =
|
const isAndValueMatch =
|
||||||
!block.condition.and ||
|
!actualCondition.and ||
|
||||||
(Array.isArray(block.condition.and.value)
|
(Array.isArray(actualCondition.and.value)
|
||||||
? andFieldValue != null &&
|
? andFieldValue != null &&
|
||||||
(block.condition.and.not
|
(actualCondition.and.not
|
||||||
? !block.condition.and.value.includes(andFieldValue as string | number | boolean)
|
? !actualCondition.and.value.includes(andFieldValue as string | number | boolean)
|
||||||
: block.condition.and.value.includes(andFieldValue as string | number | boolean))
|
: actualCondition.and.value.includes(andFieldValue as string | number | boolean))
|
||||||
: block.condition.and.not
|
: actualCondition.and.not
|
||||||
? andFieldValue !== block.condition.and.value
|
? andFieldValue !== actualCondition.and.value
|
||||||
: andFieldValue === block.condition.and.value)
|
: andFieldValue === actualCondition.and.value)
|
||||||
|
|
||||||
return isValueMatch && isAndValueMatch
|
return isValueMatch && isAndValueMatch
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -12,6 +12,12 @@ import {
|
|||||||
MODELS_WITH_TEMPERATURE_SUPPORT,
|
MODELS_WITH_TEMPERATURE_SUPPORT,
|
||||||
providers,
|
providers,
|
||||||
} from '@/providers/utils'
|
} from '@/providers/utils'
|
||||||
|
|
||||||
|
// Get current Ollama models dynamically
|
||||||
|
const getCurrentOllamaModels = () => {
|
||||||
|
return useOllamaStore.getState().models
|
||||||
|
}
|
||||||
|
|
||||||
import { useOllamaStore } from '@/stores/ollama/store'
|
import { useOllamaStore } from '@/stores/ollama/store'
|
||||||
import type { ToolResponse } from '@/tools/types'
|
import type { ToolResponse } from '@/tools/types'
|
||||||
|
|
||||||
@@ -213,14 +219,18 @@ Create a system prompt appropriately detailed for the request, using clear langu
|
|||||||
password: true,
|
password: true,
|
||||||
connectionDroppable: false,
|
connectionDroppable: false,
|
||||||
required: true,
|
required: true,
|
||||||
// Hide API key for all hosted models when running on hosted version
|
// Hide API key for hosted models and Ollama models
|
||||||
condition: isHosted
|
condition: isHosted
|
||||||
? {
|
? {
|
||||||
field: 'model',
|
field: 'model',
|
||||||
value: getHostedModels(),
|
value: getHostedModels(),
|
||||||
not: true, // Show for all models EXCEPT those listed
|
not: true, // Show for all models EXCEPT those listed
|
||||||
}
|
}
|
||||||
: undefined, // Show for all models in non-hosted environments
|
: () => ({
|
||||||
|
field: 'model',
|
||||||
|
value: getCurrentOllamaModels(),
|
||||||
|
not: true, // Show for all models EXCEPT Ollama models
|
||||||
|
}),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'azureEndpoint',
|
id: 'azureEndpoint',
|
||||||
|
|||||||
@@ -118,7 +118,8 @@ export interface SubBlockConfig {
|
|||||||
hidden?: boolean
|
hidden?: boolean
|
||||||
description?: string
|
description?: string
|
||||||
value?: (params: Record<string, any>) => string
|
value?: (params: Record<string, any>) => string
|
||||||
condition?: {
|
condition?:
|
||||||
|
| {
|
||||||
field: string
|
field: string
|
||||||
value: string | number | boolean | Array<string | number | boolean>
|
value: string | number | boolean | Array<string | number | boolean>
|
||||||
not?: boolean
|
not?: boolean
|
||||||
@@ -128,6 +129,16 @@ export interface SubBlockConfig {
|
|||||||
not?: boolean
|
not?: boolean
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
| (() => {
|
||||||
|
field: string
|
||||||
|
value: string | number | boolean | Array<string | number | boolean>
|
||||||
|
not?: boolean
|
||||||
|
and?: {
|
||||||
|
field: string
|
||||||
|
value: string | number | boolean | Array<string | number | boolean> | undefined
|
||||||
|
not?: boolean
|
||||||
|
}
|
||||||
|
})
|
||||||
// Props specific to 'code' sub-block type
|
// Props specific to 'code' sub-block type
|
||||||
language?: 'javascript' | 'json'
|
language?: 'javascript' | 'json'
|
||||||
generationType?: GenerationType
|
generationType?: GenerationType
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ export class InputResolver {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Evaluates if a sub-block should be active based on its condition
|
* Evaluates if a sub-block should be active based on its condition
|
||||||
* @param condition - The condition to evaluate
|
* @param condition - The condition to evaluate (can be static object or function)
|
||||||
* @param currentValues - Current values of all inputs
|
* @param currentValues - Current values of all inputs
|
||||||
* @returns True if the sub-block should be active
|
* @returns True if the sub-block should be active
|
||||||
*/
|
*/
|
||||||
@@ -70,37 +70,46 @@ export class InputResolver {
|
|||||||
not?: boolean
|
not?: boolean
|
||||||
and?: { field: string; value: any; not?: boolean }
|
and?: { field: string; value: any; not?: boolean }
|
||||||
}
|
}
|
||||||
|
| (() => {
|
||||||
|
field: string
|
||||||
|
value: any
|
||||||
|
not?: boolean
|
||||||
|
and?: { field: string; value: any; not?: boolean }
|
||||||
|
})
|
||||||
| undefined,
|
| undefined,
|
||||||
currentValues: Record<string, any>
|
currentValues: Record<string, any>
|
||||||
): boolean {
|
): boolean {
|
||||||
if (!condition) return true
|
if (!condition) return true
|
||||||
|
|
||||||
|
// If condition is a function, call it to get the actual condition object
|
||||||
|
const actualCondition = typeof condition === 'function' ? condition() : condition
|
||||||
|
|
||||||
// Get the field value
|
// Get the field value
|
||||||
const fieldValue = currentValues[condition.field]
|
const fieldValue = currentValues[actualCondition.field]
|
||||||
|
|
||||||
// Check if the condition value is an array
|
// Check if the condition value is an array
|
||||||
const isValueMatch = Array.isArray(condition.value)
|
const isValueMatch = Array.isArray(actualCondition.value)
|
||||||
? fieldValue != null &&
|
? fieldValue != null &&
|
||||||
(condition.not
|
(actualCondition.not
|
||||||
? !condition.value.includes(fieldValue)
|
? !actualCondition.value.includes(fieldValue)
|
||||||
: condition.value.includes(fieldValue))
|
: actualCondition.value.includes(fieldValue))
|
||||||
: condition.not
|
: actualCondition.not
|
||||||
? fieldValue !== condition.value
|
? fieldValue !== actualCondition.value
|
||||||
: fieldValue === condition.value
|
: fieldValue === actualCondition.value
|
||||||
|
|
||||||
// Check both conditions if 'and' is present
|
// Check both conditions if 'and' is present
|
||||||
const isAndValueMatch =
|
const isAndValueMatch =
|
||||||
!condition.and ||
|
!actualCondition.and ||
|
||||||
(() => {
|
(() => {
|
||||||
const andFieldValue = currentValues[condition.and!.field]
|
const andFieldValue = currentValues[actualCondition.and!.field]
|
||||||
return Array.isArray(condition.and!.value)
|
return Array.isArray(actualCondition.and!.value)
|
||||||
? andFieldValue != null &&
|
? andFieldValue != null &&
|
||||||
(condition.and!.not
|
(actualCondition.and!.not
|
||||||
? !condition.and!.value.includes(andFieldValue)
|
? !actualCondition.and!.value.includes(andFieldValue)
|
||||||
: condition.and!.value.includes(andFieldValue))
|
: actualCondition.and!.value.includes(andFieldValue))
|
||||||
: condition.and!.not
|
: actualCondition.and!.not
|
||||||
? andFieldValue !== condition.and!.value
|
? andFieldValue !== actualCondition.and!.value
|
||||||
: andFieldValue === condition.and!.value
|
: andFieldValue === actualCondition.and!.value
|
||||||
})()
|
})()
|
||||||
|
|
||||||
return isValueMatch && isAndValueMatch
|
return isValueMatch && isAndValueMatch
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import OpenAI from 'openai'
|
import OpenAI from 'openai'
|
||||||
import { env } from '@/lib/env'
|
import { env } from '@/lib/env'
|
||||||
import { createLogger } from '@/lib/logs/console/logger'
|
import { createLogger } from '@/lib/logs/console/logger'
|
||||||
|
import type { StreamingExecution } from '@/executor/types'
|
||||||
import type { ModelsObject } from '@/providers/ollama/types'
|
import type { ModelsObject } from '@/providers/ollama/types'
|
||||||
import type {
|
import type {
|
||||||
ProviderConfig,
|
ProviderConfig,
|
||||||
@@ -8,12 +9,57 @@ import type {
|
|||||||
ProviderResponse,
|
ProviderResponse,
|
||||||
TimeSegment,
|
TimeSegment,
|
||||||
} from '@/providers/types'
|
} from '@/providers/types'
|
||||||
|
import {
|
||||||
|
prepareToolExecution,
|
||||||
|
prepareToolsWithUsageControl,
|
||||||
|
trackForcedToolUsage,
|
||||||
|
} from '@/providers/utils'
|
||||||
import { useOllamaStore } from '@/stores/ollama/store'
|
import { useOllamaStore } from '@/stores/ollama/store'
|
||||||
import { executeTool } from '@/tools'
|
import { executeTool } from '@/tools'
|
||||||
|
|
||||||
const logger = createLogger('OllamaProvider')
|
const logger = createLogger('OllamaProvider')
|
||||||
const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
|
const OLLAMA_HOST = env.OLLAMA_URL || 'http://localhost:11434'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to convert an Ollama stream to a standard ReadableStream
|
||||||
|
* and collect completion metrics
|
||||||
|
*/
|
||||||
|
function createReadableStreamFromOllamaStream(
|
||||||
|
ollamaStream: any,
|
||||||
|
onComplete?: (content: string, usage?: any) => void
|
||||||
|
): ReadableStream {
|
||||||
|
let fullContent = ''
|
||||||
|
let usageData: any = null
|
||||||
|
|
||||||
|
return new ReadableStream({
|
||||||
|
async start(controller) {
|
||||||
|
try {
|
||||||
|
for await (const chunk of ollamaStream) {
|
||||||
|
// Check for usage data in the final chunk
|
||||||
|
if (chunk.usage) {
|
||||||
|
usageData = chunk.usage
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = chunk.choices[0]?.delta?.content || ''
|
||||||
|
if (content) {
|
||||||
|
fullContent += content
|
||||||
|
controller.enqueue(new TextEncoder().encode(content))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Once stream is complete, call the completion callback with the final content and usage
|
||||||
|
if (onComplete) {
|
||||||
|
onComplete(fullContent, usageData)
|
||||||
|
}
|
||||||
|
|
||||||
|
controller.close()
|
||||||
|
} catch (error) {
|
||||||
|
controller.error(error)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
export const ollamaProvider: ProviderConfig = {
|
export const ollamaProvider: ProviderConfig = {
|
||||||
id: 'ollama',
|
id: 'ollama',
|
||||||
name: 'Ollama',
|
name: 'Ollama',
|
||||||
@@ -46,20 +92,20 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
executeRequest: async (request: ProviderRequest): Promise<ProviderResponse> => {
|
executeRequest: async (
|
||||||
|
request: ProviderRequest
|
||||||
|
): Promise<ProviderResponse | StreamingExecution> => {
|
||||||
logger.info('Preparing Ollama request', {
|
logger.info('Preparing Ollama request', {
|
||||||
model: request.model,
|
model: request.model,
|
||||||
hasSystemPrompt: !!request.systemPrompt,
|
hasSystemPrompt: !!request.systemPrompt,
|
||||||
hasMessages: !!request.context,
|
hasMessages: !!request.messages?.length,
|
||||||
hasTools: !!request.tools?.length,
|
hasTools: !!request.tools?.length,
|
||||||
toolCount: request.tools?.length || 0,
|
toolCount: request.tools?.length || 0,
|
||||||
hasResponseFormat: !!request.responseFormat,
|
hasResponseFormat: !!request.responseFormat,
|
||||||
|
stream: !!request.stream,
|
||||||
})
|
})
|
||||||
|
|
||||||
const startTime = Date.now()
|
// Create Ollama client using OpenAI-compatible API
|
||||||
|
|
||||||
try {
|
|
||||||
// Prepare messages array
|
|
||||||
const ollama = new OpenAI({
|
const ollama = new OpenAI({
|
||||||
apiKey: 'empty',
|
apiKey: 'empty',
|
||||||
baseURL: `${OLLAMA_HOST}/v1`,
|
baseURL: `${OLLAMA_HOST}/v1`,
|
||||||
@@ -70,12 +116,18 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
|
|
||||||
// Add system prompt if present
|
// Add system prompt if present
|
||||||
if (request.systemPrompt) {
|
if (request.systemPrompt) {
|
||||||
allMessages.push({ role: 'system', content: request.systemPrompt })
|
allMessages.push({
|
||||||
|
role: 'system',
|
||||||
|
content: request.systemPrompt,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add context if present
|
// Add context if present
|
||||||
if (request.context) {
|
if (request.context) {
|
||||||
allMessages.push({ role: 'user', content: request.context })
|
allMessages.push({
|
||||||
|
role: 'user',
|
||||||
|
content: request.context,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add remaining messages
|
// Add remaining messages
|
||||||
@@ -83,17 +135,6 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
allMessages.push(...request.messages)
|
allMessages.push(...request.messages)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build the basic payload
|
|
||||||
const payload: any = {
|
|
||||||
model: request.model,
|
|
||||||
messages: allMessages,
|
|
||||||
stream: false,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add optional parameters
|
|
||||||
if (request.temperature !== undefined) payload.temperature = request.temperature
|
|
||||||
if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
|
|
||||||
|
|
||||||
// Transform tools to OpenAI format if provided
|
// Transform tools to OpenAI format if provided
|
||||||
const tools = request.tools?.length
|
const tools = request.tools?.length
|
||||||
? request.tools.map((tool) => ({
|
? request.tools.map((tool) => ({
|
||||||
@@ -106,31 +147,183 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
}))
|
}))
|
||||||
: undefined
|
: undefined
|
||||||
|
|
||||||
// Handle tools and tool usage control
|
// Build the request payload
|
||||||
if (tools?.length) {
|
const payload: any = {
|
||||||
// Filter out any tools with usageControl='none', but ignore 'force' since Ollama doesn't support it
|
model: request.model,
|
||||||
const filteredTools = tools.filter((tool) => {
|
messages: allMessages,
|
||||||
const toolId = tool.function?.name
|
}
|
||||||
const toolConfig = request.tools?.find((t) => t.id === toolId)
|
|
||||||
// Only filter out 'none', treat 'force' as 'auto'
|
|
||||||
return toolConfig?.usageControl !== 'none'
|
|
||||||
})
|
|
||||||
|
|
||||||
if (filteredTools?.length) {
|
// Add optional parameters
|
||||||
|
if (request.temperature !== undefined) payload.temperature = request.temperature
|
||||||
|
if (request.maxTokens !== undefined) payload.max_tokens = request.maxTokens
|
||||||
|
|
||||||
|
// Add response format for structured output if specified
|
||||||
|
if (request.responseFormat) {
|
||||||
|
// Use OpenAI's JSON schema format (Ollama supports this)
|
||||||
|
payload.response_format = {
|
||||||
|
type: 'json_schema',
|
||||||
|
json_schema: {
|
||||||
|
name: request.responseFormat.name || 'response_schema',
|
||||||
|
schema: request.responseFormat.schema || request.responseFormat,
|
||||||
|
strict: request.responseFormat.strict !== false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info('Added JSON schema response format to Ollama request')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle tools and tool usage control
|
||||||
|
let preparedTools: ReturnType<typeof prepareToolsWithUsageControl> | null = null
|
||||||
|
|
||||||
|
if (tools?.length) {
|
||||||
|
preparedTools = prepareToolsWithUsageControl(tools, request.tools, logger, 'ollama')
|
||||||
|
const { tools: filteredTools, toolChoice } = preparedTools
|
||||||
|
|
||||||
|
if (filteredTools?.length && toolChoice) {
|
||||||
payload.tools = filteredTools
|
payload.tools = filteredTools
|
||||||
// Always use 'auto' for Ollama, regardless of the tool_choice setting
|
// Ollama supports 'auto' but not forced tool selection - convert 'force' to 'auto'
|
||||||
payload.tool_choice = 'auto'
|
payload.tool_choice = typeof toolChoice === 'string' ? toolChoice : 'auto'
|
||||||
|
|
||||||
logger.info('Ollama request configuration:', {
|
logger.info('Ollama request configuration:', {
|
||||||
toolCount: filteredTools.length,
|
toolCount: filteredTools.length,
|
||||||
toolChoice: 'auto', // Ollama always uses auto
|
toolChoice: payload.tool_choice,
|
||||||
model: request.model,
|
model: request.model,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start execution timer for the entire provider execution
|
||||||
|
const providerStartTime = Date.now()
|
||||||
|
const providerStartTimeISO = new Date(providerStartTime).toISOString()
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Check if we can stream directly (no tools required)
|
||||||
|
if (request.stream && (!tools || tools.length === 0)) {
|
||||||
|
logger.info('Using streaming response for Ollama request')
|
||||||
|
|
||||||
|
// Create a streaming request with token usage tracking
|
||||||
|
const streamResponse = await ollama.chat.completions.create({
|
||||||
|
...payload,
|
||||||
|
stream: true,
|
||||||
|
stream_options: { include_usage: true },
|
||||||
|
})
|
||||||
|
|
||||||
|
// Start collecting token usage from the stream
|
||||||
|
const tokenUsage = {
|
||||||
|
prompt: 0,
|
||||||
|
completion: 0,
|
||||||
|
total: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a StreamingExecution response with a callback to update content and tokens
|
||||||
|
const streamingResult = {
|
||||||
|
stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
|
||||||
|
// Update the execution data with the final content and token usage
|
||||||
|
streamingResult.execution.output.content = content
|
||||||
|
|
||||||
|
// Clean up the response content
|
||||||
|
if (content) {
|
||||||
|
streamingResult.execution.output.content = content
|
||||||
|
.replace(/```json\n?|\n?```/g, '')
|
||||||
|
.trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the timing information with the actual completion time
|
||||||
|
const streamEndTime = Date.now()
|
||||||
|
const streamEndTimeISO = new Date(streamEndTime).toISOString()
|
||||||
|
|
||||||
|
if (streamingResult.execution.output.providerTiming) {
|
||||||
|
streamingResult.execution.output.providerTiming.endTime = streamEndTimeISO
|
||||||
|
streamingResult.execution.output.providerTiming.duration =
|
||||||
|
streamEndTime - providerStartTime
|
||||||
|
|
||||||
|
// Update the time segment as well
|
||||||
|
if (streamingResult.execution.output.providerTiming.timeSegments?.[0]) {
|
||||||
|
streamingResult.execution.output.providerTiming.timeSegments[0].endTime =
|
||||||
|
streamEndTime
|
||||||
|
streamingResult.execution.output.providerTiming.timeSegments[0].duration =
|
||||||
|
streamEndTime - providerStartTime
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update token usage if available from the stream
|
||||||
|
if (usage) {
|
||||||
|
const newTokens = {
|
||||||
|
prompt: usage.prompt_tokens || tokenUsage.prompt,
|
||||||
|
completion: usage.completion_tokens || tokenUsage.completion,
|
||||||
|
total: usage.total_tokens || tokenUsage.total,
|
||||||
|
}
|
||||||
|
|
||||||
|
streamingResult.execution.output.tokens = newTokens
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
execution: {
|
||||||
|
success: true,
|
||||||
|
output: {
|
||||||
|
content: '', // Will be filled by the stream completion callback
|
||||||
|
model: request.model,
|
||||||
|
tokens: tokenUsage,
|
||||||
|
toolCalls: undefined,
|
||||||
|
providerTiming: {
|
||||||
|
startTime: providerStartTimeISO,
|
||||||
|
endTime: new Date().toISOString(),
|
||||||
|
duration: Date.now() - providerStartTime,
|
||||||
|
timeSegments: [
|
||||||
|
{
|
||||||
|
type: 'model',
|
||||||
|
name: 'Streaming response',
|
||||||
|
startTime: providerStartTime,
|
||||||
|
endTime: Date.now(),
|
||||||
|
duration: Date.now() - providerStartTime,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
logs: [], // No block logs for direct streaming
|
||||||
|
metadata: {
|
||||||
|
startTime: providerStartTimeISO,
|
||||||
|
endTime: new Date().toISOString(),
|
||||||
|
duration: Date.now() - providerStartTime,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as StreamingExecution
|
||||||
|
|
||||||
|
// Return the streaming execution object
|
||||||
|
return streamingResult as StreamingExecution
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make the initial API request
|
||||||
|
const initialCallTime = Date.now()
|
||||||
|
|
||||||
|
// Track the original tool_choice for forced tool tracking
|
||||||
|
const originalToolChoice = payload.tool_choice
|
||||||
|
|
||||||
|
// Track forced tools and their usage
|
||||||
|
const forcedTools = preparedTools?.forcedTools || []
|
||||||
|
let usedForcedTools: string[] = []
|
||||||
|
|
||||||
|
// Helper function to check for forced tool usage in responses
|
||||||
|
const checkForForcedToolUsage = (
|
||||||
|
response: any,
|
||||||
|
toolChoice: string | { type: string; function?: { name: string }; name?: string; any?: any }
|
||||||
|
) => {
|
||||||
|
if (typeof toolChoice === 'object' && response.choices[0]?.message?.tool_calls) {
|
||||||
|
const toolCallsResponse = response.choices[0].message.tool_calls
|
||||||
|
const result = trackForcedToolUsage(
|
||||||
|
toolCallsResponse,
|
||||||
|
toolChoice,
|
||||||
|
logger,
|
||||||
|
'ollama',
|
||||||
|
forcedTools,
|
||||||
|
usedForcedTools
|
||||||
|
)
|
||||||
|
hasUsedForcedTool = result.hasUsedForcedTool
|
||||||
|
usedForcedTools = result.usedForcedTools
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let currentResponse = await ollama.chat.completions.create(payload)
|
let currentResponse = await ollama.chat.completions.create(payload)
|
||||||
const firstResponseTime = Date.now() - startTime
|
const firstResponseTime = Date.now() - initialCallTime
|
||||||
|
|
||||||
let content = currentResponse.choices[0]?.message?.content || ''
|
let content = currentResponse.choices[0]?.message?.content || ''
|
||||||
|
|
||||||
@@ -140,6 +333,7 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
content = content.trim()
|
content = content.trim()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Collect token information
|
||||||
const tokens = {
|
const tokens = {
|
||||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||||
completion: currentResponse.usage?.completion_tokens || 0,
|
completion: currentResponse.usage?.completion_tokens || 0,
|
||||||
@@ -155,18 +349,23 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
let modelTime = firstResponseTime
|
let modelTime = firstResponseTime
|
||||||
let toolsTime = 0
|
let toolsTime = 0
|
||||||
|
|
||||||
|
// Track if a forced tool has been used
|
||||||
|
let hasUsedForcedTool = false
|
||||||
|
|
||||||
// Track each model and tool call segment with timestamps
|
// Track each model and tool call segment with timestamps
|
||||||
const timeSegments: TimeSegment[] = [
|
const timeSegments: TimeSegment[] = [
|
||||||
{
|
{
|
||||||
type: 'model',
|
type: 'model',
|
||||||
name: 'Initial response',
|
name: 'Initial response',
|
||||||
startTime: startTime,
|
startTime: initialCallTime,
|
||||||
endTime: startTime + firstResponseTime,
|
endTime: initialCallTime + firstResponseTime,
|
||||||
duration: firstResponseTime,
|
duration: firstResponseTime,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
try {
|
// Check if a forced tool was used in the first response
|
||||||
|
checkForForcedToolUsage(currentResponse, originalToolChoice)
|
||||||
|
|
||||||
while (iterationCount < MAX_ITERATIONS) {
|
while (iterationCount < MAX_ITERATIONS) {
|
||||||
// Check for tool calls
|
// Check for tool calls
|
||||||
const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
|
const toolCallsInResponse = currentResponse.choices[0]?.message?.tool_calls
|
||||||
@@ -174,6 +373,10 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
`Processing ${toolCallsInResponse.length} tool calls (iteration ${iterationCount + 1}/${MAX_ITERATIONS})`
|
||||||
|
)
|
||||||
|
|
||||||
// Track time for tool calls in this batch
|
// Track time for tool calls in this batch
|
||||||
const toolsStartTime = Date.now()
|
const toolsStartTime = Date.now()
|
||||||
|
|
||||||
@@ -190,26 +393,7 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
// Execute the tool
|
// Execute the tool
|
||||||
const toolCallStartTime = Date.now()
|
const toolCallStartTime = Date.now()
|
||||||
|
|
||||||
// Only merge actual tool parameters for logging
|
const { toolParams, executionParams } = prepareToolExecution(tool, toolArgs, request)
|
||||||
const toolParams = {
|
|
||||||
...tool.params,
|
|
||||||
...toolArgs,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add system parameters for execution
|
|
||||||
const executionParams = {
|
|
||||||
...toolParams,
|
|
||||||
...(request.workflowId
|
|
||||||
? {
|
|
||||||
_context: {
|
|
||||||
workflowId: request.workflowId,
|
|
||||||
...(request.chatId ? { chatId: request.chatId } : {}),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
: {}),
|
|
||||||
...(request.environmentVariables ? { envVars: request.environmentVariables } : {}),
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = await executeTool(toolName, executionParams, true)
|
const result = await executeTool(toolName, executionParams, true)
|
||||||
const toolCallEndTime = Date.now()
|
const toolCallEndTime = Date.now()
|
||||||
const toolCallDuration = toolCallEndTime - toolCallStartTime
|
const toolCallDuration = toolCallEndTime - toolCallStartTime
|
||||||
@@ -269,7 +453,10 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
content: JSON.stringify(resultContent),
|
content: JSON.stringify(resultContent),
|
||||||
})
|
})
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Error processing tool call:', { error })
|
logger.error('Error processing tool call:', {
|
||||||
|
error,
|
||||||
|
toolName: toolCall?.function?.name,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -283,12 +470,31 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
messages: currentMessages,
|
messages: currentMessages,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update tool_choice based on which forced tools have been used
|
||||||
|
if (typeof originalToolChoice === 'object' && hasUsedForcedTool && forcedTools.length > 0) {
|
||||||
|
// If we have remaining forced tools, get the next one to force
|
||||||
|
const remainingTools = forcedTools.filter((tool) => !usedForcedTools.includes(tool))
|
||||||
|
|
||||||
|
if (remainingTools.length > 0) {
|
||||||
|
// Ollama doesn't support forced tool selection, so we keep using 'auto'
|
||||||
|
nextPayload.tool_choice = 'auto'
|
||||||
|
logger.info(`Ollama doesn't support forced tools, using auto for: ${remainingTools[0]}`)
|
||||||
|
} else {
|
||||||
|
// All forced tools have been used, continue with auto
|
||||||
|
nextPayload.tool_choice = 'auto'
|
||||||
|
logger.info('All forced tools have been used, continuing with auto tool_choice')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Time the next model call
|
// Time the next model call
|
||||||
const nextModelStartTime = Date.now()
|
const nextModelStartTime = Date.now()
|
||||||
|
|
||||||
// Make the next request
|
// Make the next request
|
||||||
currentResponse = await ollama.chat.completions.create(nextPayload)
|
currentResponse = await ollama.chat.completions.create(nextPayload)
|
||||||
|
|
||||||
|
// Check if any forced tools were used in this response
|
||||||
|
checkForForcedToolUsage(currentResponse, nextPayload.tool_choice)
|
||||||
|
|
||||||
const nextModelEndTime = Date.now()
|
const nextModelEndTime = Date.now()
|
||||||
const thisModelTime = nextModelEndTime - nextModelStartTime
|
const thisModelTime = nextModelEndTime - nextModelStartTime
|
||||||
|
|
||||||
@@ -321,35 +527,129 @@ export const ollamaProvider: ProviderConfig = {
|
|||||||
|
|
||||||
iterationCount++
|
iterationCount++
|
||||||
}
|
}
|
||||||
} catch (error) {
|
|
||||||
logger.error('Error in Ollama request:', { error })
|
// After all tool processing complete, if streaming was requested and we have messages, use streaming for the final response
|
||||||
|
if (request.stream && iterationCount > 0) {
|
||||||
|
logger.info('Using streaming for final response after tool calls')
|
||||||
|
|
||||||
|
const streamingPayload = {
|
||||||
|
...payload,
|
||||||
|
messages: currentMessages,
|
||||||
|
tool_choice: 'auto', // Always use 'auto' for the streaming response after tool calls
|
||||||
|
stream: true,
|
||||||
|
stream_options: { include_usage: true },
|
||||||
}
|
}
|
||||||
|
|
||||||
const endTime = Date.now()
|
const streamResponse = await ollama.chat.completions.create(streamingPayload)
|
||||||
|
|
||||||
|
// Create the StreamingExecution object with all collected data
|
||||||
|
const streamingResult = {
|
||||||
|
stream: createReadableStreamFromOllamaStream(streamResponse, (content, usage) => {
|
||||||
|
// Update the execution data with the final content and token usage
|
||||||
|
streamingResult.execution.output.content = content
|
||||||
|
|
||||||
|
// Clean up the response content
|
||||||
|
if (content) {
|
||||||
|
streamingResult.execution.output.content = content
|
||||||
|
.replace(/```json\n?|\n?```/g, '')
|
||||||
|
.trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update token usage if available from the stream
|
||||||
|
if (usage) {
|
||||||
|
const newTokens = {
|
||||||
|
prompt: usage.prompt_tokens || tokens.prompt,
|
||||||
|
completion: usage.completion_tokens || tokens.completion,
|
||||||
|
total: usage.total_tokens || tokens.total,
|
||||||
|
}
|
||||||
|
|
||||||
|
streamingResult.execution.output.tokens = newTokens
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
execution: {
|
||||||
|
success: true,
|
||||||
|
output: {
|
||||||
|
content: '', // Will be filled by the callback
|
||||||
|
model: request.model,
|
||||||
|
tokens: {
|
||||||
|
prompt: tokens.prompt,
|
||||||
|
completion: tokens.completion,
|
||||||
|
total: tokens.total,
|
||||||
|
},
|
||||||
|
toolCalls:
|
||||||
|
toolCalls.length > 0
|
||||||
|
? {
|
||||||
|
list: toolCalls,
|
||||||
|
count: toolCalls.length,
|
||||||
|
}
|
||||||
|
: undefined,
|
||||||
|
providerTiming: {
|
||||||
|
startTime: providerStartTimeISO,
|
||||||
|
endTime: new Date().toISOString(),
|
||||||
|
duration: Date.now() - providerStartTime,
|
||||||
|
modelTime: modelTime,
|
||||||
|
toolsTime: toolsTime,
|
||||||
|
firstResponseTime: firstResponseTime,
|
||||||
|
iterations: iterationCount + 1,
|
||||||
|
timeSegments: timeSegments,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
logs: [], // No block logs at provider level
|
||||||
|
metadata: {
|
||||||
|
startTime: providerStartTimeISO,
|
||||||
|
endTime: new Date().toISOString(),
|
||||||
|
duration: Date.now() - providerStartTime,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as StreamingExecution
|
||||||
|
|
||||||
|
// Return the streaming execution object
|
||||||
|
return streamingResult as StreamingExecution
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate overall timing
|
||||||
|
const providerEndTime = Date.now()
|
||||||
|
const providerEndTimeISO = new Date(providerEndTime).toISOString()
|
||||||
|
const totalDuration = providerEndTime - providerStartTime
|
||||||
|
|
||||||
return {
|
return {
|
||||||
content: content,
|
content,
|
||||||
model: request.model,
|
model: request.model,
|
||||||
tokens,
|
tokens,
|
||||||
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
||||||
toolResults: toolResults.length > 0 ? toolResults : undefined,
|
toolResults: toolResults.length > 0 ? toolResults : undefined,
|
||||||
timing: {
|
timing: {
|
||||||
startTime: new Date(startTime).toISOString(),
|
startTime: providerStartTimeISO,
|
||||||
endTime: new Date(endTime).toISOString(),
|
endTime: providerEndTimeISO,
|
||||||
duration: endTime - startTime,
|
duration: totalDuration,
|
||||||
modelTime: modelTime,
|
modelTime: modelTime,
|
||||||
toolsTime: toolsTime,
|
toolsTime: toolsTime,
|
||||||
firstResponseTime: firstResponseTime,
|
firstResponseTime: firstResponseTime,
|
||||||
iterations: iterationCount + 1,
|
iterations: iterationCount + 1,
|
||||||
timeSegments,
|
timeSegments: timeSegments,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error('Error in Ollama request', {
|
// Include timing information even for errors
|
||||||
error: error instanceof Error ? error.message : 'Unknown error',
|
const providerEndTime = Date.now()
|
||||||
model: request.model,
|
const providerEndTimeISO = new Date(providerEndTime).toISOString()
|
||||||
|
const totalDuration = providerEndTime - providerStartTime
|
||||||
|
|
||||||
|
logger.error('Error in Ollama request:', {
|
||||||
|
error,
|
||||||
|
duration: totalDuration,
|
||||||
})
|
})
|
||||||
throw error
|
|
||||||
|
// Create a new error with timing information
|
||||||
|
const enhancedError = new Error(error instanceof Error ? error.message : String(error))
|
||||||
|
// @ts-ignore - Adding timing property to the error
|
||||||
|
enhancedError.timing = {
|
||||||
|
startTime: providerStartTimeISO,
|
||||||
|
endTime: providerEndTimeISO,
|
||||||
|
duration: totalDuration,
|
||||||
|
}
|
||||||
|
|
||||||
|
throw enhancedError
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ import { openaiProvider } from '@/providers/openai'
|
|||||||
import type { ProviderConfig, ProviderId, ProviderToolConfig } from '@/providers/types'
|
import type { ProviderConfig, ProviderId, ProviderToolConfig } from '@/providers/types'
|
||||||
import { xAIProvider } from '@/providers/xai'
|
import { xAIProvider } from '@/providers/xai'
|
||||||
import { useCustomToolsStore } from '@/stores/custom-tools/store'
|
import { useCustomToolsStore } from '@/stores/custom-tools/store'
|
||||||
|
import { useOllamaStore } from '@/stores/ollama/store'
|
||||||
|
|
||||||
const logger = createLogger('ProviderUtils')
|
const logger = createLogger('ProviderUtils')
|
||||||
|
|
||||||
@@ -548,6 +549,12 @@ export function getApiKey(provider: string, model: string, userProvidedKey?: str
|
|||||||
// If user provided a key, use it as a fallback
|
// If user provided a key, use it as a fallback
|
||||||
const hasUserKey = !!userProvidedKey
|
const hasUserKey = !!userProvidedKey
|
||||||
|
|
||||||
|
// Ollama models don't require API keys - they run locally
|
||||||
|
const isOllamaModel = provider === 'ollama' || useOllamaStore.getState().models.includes(model)
|
||||||
|
if (isOllamaModel) {
|
||||||
|
return 'empty' // Ollama uses 'empty' as a placeholder API key
|
||||||
|
}
|
||||||
|
|
||||||
// Use server key rotation for all OpenAI models and Anthropic's Claude models on the hosted platform
|
// Use server key rotation for all OpenAI models and Anthropic's Claude models on the hosted platform
|
||||||
const isOpenAIModel = provider === 'openai'
|
const isOpenAIModel = provider === 'openai'
|
||||||
const isClaudeModel = provider === 'anthropic'
|
const isClaudeModel = provider === 'anthropic'
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Check that at least one argument is provided. If not, display the usage help.
|
|
||||||
if [ "$#" -eq 0 ]; then
|
|
||||||
echo "Usage: $(basename "$0") <ollama command> [args...]"
|
|
||||||
echo "Example: $(basename "$0") ps # This will run 'ollama ps' inside the container"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Start a detached container from the ollama/ollama image,
|
|
||||||
# mounting the host's ~/.ollama directory directly into the container.
|
|
||||||
# Here we mount it to /root/.ollama, assuming that's where the image expects it.
|
|
||||||
CONTAINER_ID=$(docker run -d -v ~/.ollama:/root/.ollama -p 11434:11434 ollama/ollama
|
|
||||||
)
|
|
||||||
|
|
||||||
# Define a cleanup function to stop the container regardless of how the script exits.
|
|
||||||
cleanup() {
|
|
||||||
docker stop "$CONTAINER_ID" >/dev/null
|
|
||||||
}
|
|
||||||
trap cleanup EXIT
|
|
||||||
|
|
||||||
# Execute the command provided by the user within the running container.
|
|
||||||
# The command runs as: "ollama <user-arguments>"
|
|
||||||
docker exec -it "$CONTAINER_ID" ollama "$@"
|
|
||||||
@@ -5,11 +5,72 @@ import type { OllamaStore } from '@/stores/ollama/types'
|
|||||||
|
|
||||||
const logger = createLogger('OllamaStore')
|
const logger = createLogger('OllamaStore')
|
||||||
|
|
||||||
export const useOllamaStore = create<OllamaStore>((set) => ({
|
// Fetch models from the server API when on client side
|
||||||
|
const fetchOllamaModels = async (): Promise<string[]> => {
|
||||||
|
try {
|
||||||
|
const response = await fetch('/api/providers/ollama/models')
|
||||||
|
if (!response.ok) {
|
||||||
|
logger.warn('Failed to fetch Ollama models from API', {
|
||||||
|
status: response.status,
|
||||||
|
statusText: response.statusText,
|
||||||
|
})
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
const data = await response.json()
|
||||||
|
return data.models || []
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Error fetching Ollama models', {
|
||||||
|
error: error instanceof Error ? error.message : 'Unknown error',
|
||||||
|
})
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const useOllamaStore = create<OllamaStore>((set, get) => ({
|
||||||
models: [],
|
models: [],
|
||||||
|
isLoading: false,
|
||||||
setModels: (models) => {
|
setModels: (models) => {
|
||||||
set({ models })
|
set({ models })
|
||||||
// Update the providers when models change
|
// Update the providers when models change
|
||||||
updateOllamaProviderModels(models)
|
updateOllamaProviderModels(models)
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Fetch models from API (client-side only)
|
||||||
|
fetchModels: async () => {
|
||||||
|
if (typeof window === 'undefined') {
|
||||||
|
logger.info('Skipping client-side model fetch on server')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (get().isLoading) {
|
||||||
|
logger.info('Model fetch already in progress')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info('Fetching Ollama models from API')
|
||||||
|
set({ isLoading: true })
|
||||||
|
|
||||||
|
try {
|
||||||
|
const models = await fetchOllamaModels()
|
||||||
|
logger.info('Successfully fetched Ollama models', {
|
||||||
|
count: models.length,
|
||||||
|
models,
|
||||||
|
})
|
||||||
|
get().setModels(models)
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Failed to fetch Ollama models', {
|
||||||
|
error: error instanceof Error ? error.message : 'Unknown error',
|
||||||
|
})
|
||||||
|
} finally {
|
||||||
|
set({ isLoading: false })
|
||||||
|
}
|
||||||
|
},
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
// Auto-fetch models when the store is first accessed on the client
|
||||||
|
if (typeof window !== 'undefined') {
|
||||||
|
// Delay to avoid hydration issues
|
||||||
|
setTimeout(() => {
|
||||||
|
useOllamaStore.getState().fetchModels()
|
||||||
|
}, 1000)
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
export interface OllamaStore {
|
export interface OllamaStore {
|
||||||
models: string[]
|
models: string[]
|
||||||
|
isLoading: boolean
|
||||||
setModels: (models: string[]) => void
|
setModels: (models: string[]) => void
|
||||||
|
fetchModels: () => Promise<void>
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,106 @@
|
|||||||
|
name: sim-with-ollama
|
||||||
|
|
||||||
services:
|
services:
|
||||||
local-llm-gpu:
|
# Main Sim Studio Application
|
||||||
profiles:
|
simstudio:
|
||||||
- local-gpu # This profile requires both 'local' and 'gpu'
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: docker/app.Dockerfile
|
||||||
|
ports:
|
||||||
|
- '3000:3000'
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 8G
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
|
||||||
|
- BETTER_AUTH_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
|
||||||
|
- NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
|
||||||
|
- BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
|
||||||
|
- ENCRYPTION_KEY=${ENCRYPTION_KEY:-$(openssl rand -hex 32)}
|
||||||
|
- OLLAMA_URL=http://ollama:11434
|
||||||
|
- NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002}
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
migrations:
|
||||||
|
condition: service_completed_successfully
|
||||||
|
realtime:
|
||||||
|
condition: service_healthy
|
||||||
|
ollama:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3000']
|
||||||
|
interval: 90s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# Realtime Socket Server
|
||||||
|
realtime:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: docker/realtime.Dockerfile
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
|
||||||
|
- NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL:-http://localhost:3000}
|
||||||
|
- BETTER_AUTH_URL=${BETTER_AUTH_URL:-http://localhost:3000}
|
||||||
|
- BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET:-sim_auth_secret_$(openssl rand -hex 16)}
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '3002:3002'
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 8G
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health']
|
||||||
|
interval: 90s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
|
|
||||||
|
# Database Migrations
|
||||||
|
migrations:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: docker/db.Dockerfile
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio}
|
||||||
|
depends_on:
|
||||||
|
db:
|
||||||
|
condition: service_healthy
|
||||||
|
command: ['bun', 'run', 'db:migrate']
|
||||||
|
restart: 'no'
|
||||||
|
|
||||||
|
# PostgreSQL Database with Vector Extension
|
||||||
|
db:
|
||||||
|
image: pgvector/pgvector:pg17
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- '5432:5432'
|
||||||
|
environment:
|
||||||
|
- POSTGRES_USER=${POSTGRES_USER:-postgres}
|
||||||
|
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}
|
||||||
|
- POSTGRES_DB=${POSTGRES_DB:-simstudio}
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD-SHELL', 'pg_isready -U postgres']
|
||||||
|
interval: 5s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# Ollama with GPU support (default)
|
||||||
|
ollama:
|
||||||
image: ollama/ollama:latest
|
image: ollama/ollama:latest
|
||||||
pull_policy: always
|
pull_policy: always
|
||||||
volumes:
|
volumes:
|
||||||
- ${HOME}/.ollama:/root/.ollama
|
- ollama_data:/root/.ollama
|
||||||
ports:
|
ports:
|
||||||
- '11434:11434'
|
- '11434:11434'
|
||||||
environment:
|
environment:
|
||||||
@@ -13,6 +108,7 @@ services:
|
|||||||
- OLLAMA_LOAD_TIMEOUT=-1
|
- OLLAMA_LOAD_TIMEOUT=-1
|
||||||
- OLLAMA_KEEP_ALIVE=-1
|
- OLLAMA_KEEP_ALIVE=-1
|
||||||
- OLLAMA_DEBUG=1
|
- OLLAMA_DEBUG=1
|
||||||
|
- OLLAMA_HOST=0.0.0.0:11434
|
||||||
command: 'serve'
|
command: 'serve'
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
@@ -26,23 +122,56 @@ services:
|
|||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
local-llm-cpu:
|
# Ollama CPU-only version (use with --profile cpu profile)
|
||||||
|
ollama-cpu:
|
||||||
profiles:
|
profiles:
|
||||||
- local-cpu # This profile requires both 'local' and 'cpu'
|
- cpu
|
||||||
image: ollama/ollama:latest
|
image: ollama/ollama:latest
|
||||||
pull_policy: always
|
pull_policy: always
|
||||||
volumes:
|
volumes:
|
||||||
- ${HOME}/.ollama:/root/.ollama
|
- ollama_data:/root/.ollama
|
||||||
ports:
|
ports:
|
||||||
- '11434:11434'
|
- '11434:11434'
|
||||||
environment:
|
environment:
|
||||||
- OLLAMA_LOAD_TIMEOUT=-1
|
- OLLAMA_LOAD_TIMEOUT=-1
|
||||||
- OLLAMA_KEEP_ALIVE=-1
|
- OLLAMA_KEEP_ALIVE=-1
|
||||||
- OLLAMA_DEBUG=1
|
- OLLAMA_DEBUG=1
|
||||||
|
- OLLAMA_HOST=0.0.0.0:11434
|
||||||
command: 'serve'
|
command: 'serve'
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ['CMD', 'curl', '-f', 'http://localhost:11434/']
|
test: ['CMD', 'curl', '-f', 'http://localhost:11434/']
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# Helper container to pull models automatically
|
||||||
|
model-setup:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
profiles:
|
||||||
|
- setup
|
||||||
|
volumes:
|
||||||
|
- ollama_data:/root/.ollama
|
||||||
|
environment:
|
||||||
|
- OLLAMA_HOST=ollama:11434
|
||||||
|
depends_on:
|
||||||
|
ollama:
|
||||||
|
condition: service_healthy
|
||||||
|
command: >
|
||||||
|
sh -c "
|
||||||
|
echo 'Waiting for Ollama to be ready...' &&
|
||||||
|
sleep 10 &&
|
||||||
|
echo 'Pulling gemma3:4b model (recommended starter model)...' &&
|
||||||
|
ollama pull gemma3:4b &&
|
||||||
|
echo 'Model setup complete! You can now use gemma3:4b in Sim.' &&
|
||||||
|
echo 'To add more models, run: docker compose -f docker-compose.ollama.yml exec ollama ollama pull <model-name>'
|
||||||
|
"
|
||||||
|
restart: 'no'
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
ollama_data:
|
||||||
|
|||||||
Reference in New Issue
Block a user