Compare commits

...

3 Commits

Author SHA1 Message Date
Waleed Latif
c8f4df7683 scaffolding for blog post 2026-02-11 01:06:19 -08:00
Waleed
2f492cacc1 feat(providers): add Gemini Deep Research via Interactions API (#3192)
* feat(providers): add Gemini Deep Research via Interactions API

* fix(providers): hide memory UI for deep research models

* feat(providers): add multi-turn support and token logging for deep research

* fix(providers): only collect user messages as deep research input

* fix(providers): forward previousInteractionId to provider request

* fix(blocks): hide memory child fields for deep research models

* remove memory params from models that don't support it in provider requests

* update blog
2026-02-11 01:01:59 -08:00
Vikhyath Mondreti
5792e7e5f9 fix(auth): workflow system handler (#3193) 2026-02-10 22:25:48 -08:00
12 changed files with 724 additions and 27 deletions

View File

@@ -38,6 +38,7 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
} }
const isInternalCall = auth.authType === 'internal_jwt'
const userId = auth.userId || null const userId = auth.userId || null
let workflowData = await getWorkflowById(workflowId) let workflowData = await getWorkflowById(workflowId)
@@ -47,29 +48,32 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{
return NextResponse.json({ error: 'Workflow not found' }, { status: 404 }) return NextResponse.json({ error: 'Workflow not found' }, { status: 404 })
} }
// Check if user has access to this workflow if (isInternalCall && !userId) {
if (!userId) { // Internal system calls (e.g. workflow-in-workflow executor) may not carry a userId.
// These are already authenticated via internal JWT; allow read access.
logger.info(`[${requestId}] Internal API call for workflow ${workflowId}`)
} else if (!userId) {
logger.warn(`[${requestId}] Unauthorized access attempt for workflow ${workflowId}`) logger.warn(`[${requestId}] Unauthorized access attempt for workflow ${workflowId}`)
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
} } else {
const authorization = await authorizeWorkflowByWorkspacePermission({
workflowId,
userId,
action: 'read',
})
if (!authorization.workflow) {
logger.warn(`[${requestId}] Workflow ${workflowId} not found`)
return NextResponse.json({ error: 'Workflow not found' }, { status: 404 })
}
const authorization = await authorizeWorkflowByWorkspacePermission({ workflowData = authorization.workflow
workflowId, if (!authorization.allowed) {
userId, logger.warn(`[${requestId}] User ${userId} denied access to workflow ${workflowId}`)
action: 'read', return NextResponse.json(
}) { error: authorization.message || 'Access denied' },
if (!authorization.workflow) { { status: authorization.status }
logger.warn(`[${requestId}] Workflow ${workflowId} not found`) )
return NextResponse.json({ error: 'Workflow not found' }, { status: 404 }) }
}
workflowData = authorization.workflow
if (!authorization.allowed) {
logger.warn(`[${requestId}] User ${userId} denied access to workflow ${workflowId}`)
return NextResponse.json(
{ error: authorization.message || 'Access denied' },
{ status: authorization.status }
)
} }
logger.debug(`[${requestId}] Attempting to load workflow ${workflowId} from normalized tables`) logger.debug(`[${requestId}] Attempting to load workflow ${workflowId} from normalized tables`)

View File

@@ -10,9 +10,11 @@ import {
getReasoningEffortValuesForModel, getReasoningEffortValuesForModel,
getThinkingLevelsForModel, getThinkingLevelsForModel,
getVerbosityValuesForModel, getVerbosityValuesForModel,
MODELS_WITH_DEEP_RESEARCH,
MODELS_WITH_REASONING_EFFORT, MODELS_WITH_REASONING_EFFORT,
MODELS_WITH_THINKING, MODELS_WITH_THINKING,
MODELS_WITH_VERBOSITY, MODELS_WITH_VERBOSITY,
MODELS_WITHOUT_MEMORY,
providers, providers,
supportsTemperature, supportsTemperature,
} from '@/providers/utils' } from '@/providers/utils'
@@ -412,12 +414,22 @@ Return ONLY the JSON array.`,
title: 'Tools', title: 'Tools',
type: 'tool-input', type: 'tool-input',
defaultValue: [], defaultValue: [],
condition: {
field: 'model',
value: MODELS_WITH_DEEP_RESEARCH,
not: true,
},
}, },
{ {
id: 'skills', id: 'skills',
title: 'Skills', title: 'Skills',
type: 'skill-input', type: 'skill-input',
defaultValue: [], defaultValue: [],
condition: {
field: 'model',
value: MODELS_WITH_DEEP_RESEARCH,
not: true,
},
}, },
{ {
id: 'memoryType', id: 'memoryType',
@@ -431,6 +443,11 @@ Return ONLY the JSON array.`,
{ label: 'Sliding window (tokens)', id: 'sliding_window_tokens' }, { label: 'Sliding window (tokens)', id: 'sliding_window_tokens' },
], ],
defaultValue: 'none', defaultValue: 'none',
condition: {
field: 'model',
value: MODELS_WITHOUT_MEMORY,
not: true,
},
}, },
{ {
id: 'conversationId', id: 'conversationId',
@@ -444,6 +461,7 @@ Return ONLY the JSON array.`,
condition: { condition: {
field: 'memoryType', field: 'memoryType',
value: ['conversation', 'sliding_window', 'sliding_window_tokens'], value: ['conversation', 'sliding_window', 'sliding_window_tokens'],
and: { field: 'model', value: MODELS_WITHOUT_MEMORY, not: true },
}, },
}, },
{ {
@@ -454,6 +472,7 @@ Return ONLY the JSON array.`,
condition: { condition: {
field: 'memoryType', field: 'memoryType',
value: ['sliding_window'], value: ['sliding_window'],
and: { field: 'model', value: MODELS_WITHOUT_MEMORY, not: true },
}, },
}, },
{ {
@@ -464,6 +483,7 @@ Return ONLY the JSON array.`,
condition: { condition: {
field: 'memoryType', field: 'memoryType',
value: ['sliding_window_tokens'], value: ['sliding_window_tokens'],
and: { field: 'model', value: MODELS_WITHOUT_MEMORY, not: true },
}, },
}, },
{ {
@@ -477,9 +497,13 @@ Return ONLY the JSON array.`,
condition: () => ({ condition: () => ({
field: 'model', field: 'model',
value: (() => { value: (() => {
const deepResearch = new Set(MODELS_WITH_DEEP_RESEARCH.map((m) => m.toLowerCase()))
const allModels = Object.keys(getBaseModelProviders()) const allModels = Object.keys(getBaseModelProviders())
return allModels.filter( return allModels.filter(
(model) => supportsTemperature(model) && getMaxTemperature(model) === 1 (model) =>
supportsTemperature(model) &&
getMaxTemperature(model) === 1 &&
!deepResearch.has(model.toLowerCase())
) )
})(), })(),
}), }),
@@ -495,9 +519,13 @@ Return ONLY the JSON array.`,
condition: () => ({ condition: () => ({
field: 'model', field: 'model',
value: (() => { value: (() => {
const deepResearch = new Set(MODELS_WITH_DEEP_RESEARCH.map((m) => m.toLowerCase()))
const allModels = Object.keys(getBaseModelProviders()) const allModels = Object.keys(getBaseModelProviders())
return allModels.filter( return allModels.filter(
(model) => supportsTemperature(model) && getMaxTemperature(model) === 2 (model) =>
supportsTemperature(model) &&
getMaxTemperature(model) === 2 &&
!deepResearch.has(model.toLowerCase())
) )
})(), })(),
}), }),
@@ -508,6 +536,11 @@ Return ONLY the JSON array.`,
type: 'short-input', type: 'short-input',
placeholder: 'Enter max tokens (e.g., 4096)...', placeholder: 'Enter max tokens (e.g., 4096)...',
mode: 'advanced', mode: 'advanced',
condition: {
field: 'model',
value: MODELS_WITH_DEEP_RESEARCH,
not: true,
},
}, },
{ {
id: 'responseFormat', id: 'responseFormat',
@@ -515,6 +548,11 @@ Return ONLY the JSON array.`,
type: 'code', type: 'code',
placeholder: 'Enter JSON schema...', placeholder: 'Enter JSON schema...',
language: 'json', language: 'json',
condition: {
field: 'model',
value: MODELS_WITH_DEEP_RESEARCH,
not: true,
},
wandConfig: { wandConfig: {
enabled: true, enabled: true,
maintainHistory: true, maintainHistory: true,
@@ -607,6 +645,16 @@ Example 3 (Array Input):
generationType: 'json-schema', generationType: 'json-schema',
}, },
}, },
{
id: 'previousInteractionId',
title: 'Previous Interaction ID',
type: 'short-input',
placeholder: 'e.g., {{agent_1.interactionId}}',
condition: {
field: 'model',
value: MODELS_WITH_DEEP_RESEARCH,
},
},
], ],
tools: { tools: {
access: [ access: [
@@ -770,5 +818,13 @@ Example 3 (Array Input):
description: 'Provider timing information', description: 'Provider timing information',
}, },
cost: { type: 'json', description: 'Cost of the API call' }, cost: { type: 'json', description: 'Cost of the API call' },
interactionId: {
type: 'string',
description: 'Interaction ID for multi-turn deep research follow-ups',
condition: {
field: 'model',
value: MODELS_WITH_DEEP_RESEARCH,
},
},
}, },
} }

View File

@@ -2,8 +2,8 @@
slug: enterprise slug: enterprise
title: 'Build with Sim for Enterprise' title: 'Build with Sim for Enterprise'
description: 'Access control, BYOK, self-hosted deployments, on-prem Copilot, SSO & SAML, whitelabeling, Admin API, and flexible data retention—enterprise features for teams with strict security and compliance requirements.' description: 'Access control, BYOK, self-hosted deployments, on-prem Copilot, SSO & SAML, whitelabeling, Admin API, and flexible data retention—enterprise features for teams with strict security and compliance requirements.'
date: 2026-01-23 date: 2026-02-11
updated: 2026-01-23 updated: 2026-02-11
authors: authors:
- vik - vik
readingTime: 10 readingTime: 10
@@ -13,8 +13,8 @@ ogAlt: 'Sim Enterprise features overview'
about: ['Enterprise Software', 'Security', 'Compliance', 'Self-Hosting'] about: ['Enterprise Software', 'Security', 'Compliance', 'Self-Hosting']
timeRequired: PT10M timeRequired: PT10M
canonical: https://sim.ai/studio/enterprise canonical: https://sim.ai/studio/enterprise
featured: false featured: true
draft: true draft: false
--- ---
We've been working with security teams at larger organizations to bring Sim into environments with strict compliance and data handling requirements. This post covers the enterprise capabilities we've built: granular access control, bring-your-own-keys, self-hosted deployments, on-prem Copilot, SSO & SAML, whitelabeling, compliance, and programmatic management via the Admin API. We've been working with security teams at larger organizations to bring Sim into environments with strict compliance and data handling requirements. This post covers the enterprise capabilities we've built: granular access control, bring-your-own-keys, self-hosted deployments, on-prem Copilot, SSO & SAML, whitelabeling, compliance, and programmatic management via the Admin API.

View File

@@ -0,0 +1,90 @@
---
slug: workflow-bench
title: 'Introducing Workflow Bench - Benchmarking Natural Language Workflow Building'
description: 'How we built a benchmark to measure how well AI models translate natural language instructions into executable workflows, and what we learned along the way'
date: 2026-02-11
updated: 2026-02-11
authors:
- sid
readingTime: 10
tags: [Benchmark, Evaluation, Workflows, Natural Language]
ogImage: /studio/workflow-bench/cover.png
ogAlt: 'Workflow Bench benchmark overview'
about: ['Benchmarking', 'Workflow Building', 'Natural Language']
timeRequired: PT10M
canonical: https://sim.ai/studio/workflow-bench
featured: false
draft: true
---
Building workflows from natural language sounds straightforward until you try to measure it. When a user says "send me a Slack message every morning with a summary of my unread emails," how do you evaluate whether the resulting workflow is correct? Is partial credit fair? What about workflows that are functionally equivalent but structurally different?
We built Workflow Bench to answer these questions. This post covers why we needed a dedicated benchmark, how we designed it, and what the results tell us about the current state of natural language workflow building.
## Why a Workflow Benchmark?
<!-- TODO: Motivation for building Workflow Bench -->
<!-- - Gap in existing benchmarks (code gen benchmarks don't capture workflow semantics) -->
<!-- - Need to track progress as we iterate on the copilot / natural language builder -->
<!-- - Workflows are structured artifacts, not just code — they have topology, block types, connections, configs -->
## What We're Measuring
<!-- TODO: Define the core evaluation dimensions -->
<!-- - Structural correctness (right blocks, right connections) -->
<!-- - Configuration accuracy (correct params, API mappings) -->
<!-- - Functional equivalence (does it do the same thing even if shaped differently?) -->
<!-- - Edge cases: loops, conditionals, parallel branches, error handling -->
## Benchmark Design
<!-- TODO: How the benchmark dataset is constructed -->
<!-- - Task categories and complexity tiers -->
<!-- - How ground truth workflows are defined -->
<!-- - Natural language prompt variations (terse vs. detailed, ambiguous vs. precise) -->
### Task Categories
<!-- TODO: Break down the types of workflows in the benchmark -->
<!-- - Simple linear (A → B → C) -->
<!-- - Branching / conditional -->
<!-- - Looping / iterative -->
<!-- - Parallel fan-out / fan-in -->
<!-- - Multi-trigger -->
### Scoring
<!-- TODO: Explain the scoring methodology -->
<!-- - How partial credit works -->
<!-- - Structural similarity metrics -->
<!-- - Config-level accuracy -->
<!-- - Overall composite score -->
## Evaluation Pipeline
<!-- TODO: How we run the benchmark end to end -->
<!-- - Prompt → model → workflow JSON → evaluator → score -->
<!-- - Automation and reproducibility -->
<!-- - How we handle non-determinism across runs -->
## Results
<!-- TODO: Present the benchmark results -->
<!-- - Model comparisons -->
<!-- - Performance by task category -->
<!-- - Where models struggle most -->
<!-- - Trends over time as we iterate -->
## What We Learned
<!-- TODO: Key takeaways from running the benchmark -->
<!-- - Surprising strengths and weaknesses -->
<!-- - How benchmark results influenced product decisions -->
<!-- - Common failure modes -->
## What's Next
<!-- TODO: Future directions -->
<!-- - Expanding the benchmark (more tasks, more complexity) -->
<!-- - Community contributions / open-sourcing -->
<!-- - Using the benchmark to guide copilot improvements -->

View File

@@ -999,6 +999,7 @@ export class AgentBlockHandler implements BlockHandler {
reasoningEffort: inputs.reasoningEffort, reasoningEffort: inputs.reasoningEffort,
verbosity: inputs.verbosity, verbosity: inputs.verbosity,
thinkingLevel: inputs.thinkingLevel, thinkingLevel: inputs.thinkingLevel,
previousInteractionId: inputs.previousInteractionId,
} }
} }
@@ -1069,6 +1070,7 @@ export class AgentBlockHandler implements BlockHandler {
reasoningEffort: providerRequest.reasoningEffort, reasoningEffort: providerRequest.reasoningEffort,
verbosity: providerRequest.verbosity, verbosity: providerRequest.verbosity,
thinkingLevel: providerRequest.thinkingLevel, thinkingLevel: providerRequest.thinkingLevel,
previousInteractionId: providerRequest.previousInteractionId,
}) })
return this.processProviderResponse(response, block, responseFormat) return this.processProviderResponse(response, block, responseFormat)
@@ -1269,6 +1271,7 @@ export class AgentBlockHandler implements BlockHandler {
content: result.content, content: result.content,
model: result.model, model: result.model,
...this.createResponseMetadata(result), ...this.createResponseMetadata(result),
...(result.interactionId && { interactionId: result.interactionId }),
} }
} }

View File

@@ -20,6 +20,8 @@ export interface AgentInputs {
conversationId?: string // Required for all non-none memory types conversationId?: string // Required for all non-none memory types
slidingWindowSize?: string // For message-based sliding window slidingWindowSize?: string // For message-based sliding window
slidingWindowTokens?: string // For token-based sliding window slidingWindowTokens?: string // For token-based sliding window
// Deep research multi-turn
previousInteractionId?: string // Interactions API previous interaction reference
// LLM parameters // LLM parameters
temperature?: string temperature?: string
maxTokens?: string maxTokens?: string

View File

@@ -5,6 +5,7 @@ import {
type GenerateContentConfig, type GenerateContentConfig,
type GenerateContentResponse, type GenerateContentResponse,
type GoogleGenAI, type GoogleGenAI,
type Interactions,
type Part, type Part,
type Schema, type Schema,
type ThinkingConfig, type ThinkingConfig,
@@ -27,6 +28,7 @@ import {
import type { FunctionCallResponse, ProviderRequest, ProviderResponse } from '@/providers/types' import type { FunctionCallResponse, ProviderRequest, ProviderResponse } from '@/providers/types'
import { import {
calculateCost, calculateCost,
isDeepResearchModel,
prepareToolExecution, prepareToolExecution,
prepareToolsWithUsageControl, prepareToolsWithUsageControl,
} from '@/providers/utils' } from '@/providers/utils'
@@ -381,6 +383,468 @@ export interface GeminiExecutionConfig {
providerType: GeminiProviderType providerType: GeminiProviderType
} }
const DEEP_RESEARCH_POLL_INTERVAL_MS = 10_000
const DEEP_RESEARCH_MAX_DURATION_MS = 60 * 60 * 1000
/**
* Sleeps for the specified number of milliseconds
*/
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}
/**
* Collapses a ProviderRequest into a single input string and optional system instruction
* for the Interactions API, which takes a flat input rather than a messages array.
*
* Deep research is single-turn only — it takes one research query and returns a report.
* Memory/conversation history is hidden in the UI for deep research models, so only
* the last user message is used as input. System messages are passed via system_instruction.
*/
function collapseMessagesToInput(request: ProviderRequest): {
input: string
systemInstruction: string | undefined
} {
const systemParts: string[] = []
const userParts: string[] = []
if (request.systemPrompt) {
systemParts.push(request.systemPrompt)
}
if (request.messages) {
for (const msg of request.messages) {
if (msg.role === 'system' && msg.content) {
systemParts.push(msg.content)
} else if (msg.role === 'user' && msg.content) {
userParts.push(msg.content)
}
}
}
return {
input:
userParts.length > 0
? userParts[userParts.length - 1]
: 'Please conduct research on the provided topic.',
systemInstruction: systemParts.length > 0 ? systemParts.join('\n\n') : undefined,
}
}
/**
* Extracts text content from a completed interaction's outputs array.
* The outputs array can contain text, thought, google_search_result, and other types.
* We concatenate all text outputs to get the full research report.
*/
function extractTextFromInteractionOutputs(outputs: Interactions.Interaction['outputs']): string {
if (!outputs || outputs.length === 0) return ''
const textParts: string[] = []
for (const output of outputs) {
if (output.type === 'text') {
const text = (output as Interactions.TextContent).text
if (text) textParts.push(text)
}
}
return textParts.join('\n\n')
}
/**
* Extracts token usage from an Interaction's Usage object.
* The Interactions API provides total_input_tokens, total_output_tokens, total_tokens,
* and total_reasoning_tokens (for thinking models).
*
* Also handles the raw API field name total_thought_tokens which the SDK may
* map to total_reasoning_tokens.
*/
function extractInteractionUsage(usage: Interactions.Usage | undefined): {
inputTokens: number
outputTokens: number
reasoningTokens: number
totalTokens: number
} {
if (!usage) {
return { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, totalTokens: 0 }
}
const usageLogger = createLogger('DeepResearchUsage')
usageLogger.info('Raw interaction usage', { usage: JSON.stringify(usage) })
const inputTokens = usage.total_input_tokens ?? 0
const outputTokens = usage.total_output_tokens ?? 0
const reasoningTokens =
usage.total_reasoning_tokens ??
((usage as Record<string, unknown>).total_thought_tokens as number) ??
0
const totalTokens = usage.total_tokens ?? inputTokens + outputTokens
return { inputTokens, outputTokens, reasoningTokens, totalTokens }
}
/**
* Builds a standard ProviderResponse from a completed deep research interaction.
*/
function buildDeepResearchResponse(
content: string,
model: string,
usage: {
inputTokens: number
outputTokens: number
reasoningTokens: number
totalTokens: number
},
providerStartTime: number,
providerStartTimeISO: string,
interactionId?: string
): ProviderResponse {
const providerEndTime = Date.now()
const duration = providerEndTime - providerStartTime
return {
content,
model,
tokens: {
input: usage.inputTokens,
output: usage.outputTokens,
total: usage.totalTokens,
},
timing: {
startTime: providerStartTimeISO,
endTime: new Date(providerEndTime).toISOString(),
duration,
modelTime: duration,
toolsTime: 0,
firstResponseTime: duration,
iterations: 1,
timeSegments: [
{
type: 'model',
name: 'Deep research',
startTime: providerStartTime,
endTime: providerEndTime,
duration,
},
],
},
cost: calculateCost(model, usage.inputTokens, usage.outputTokens),
interactionId,
}
}
/**
* Creates a ReadableStream from a deep research streaming interaction.
*
* Deep research streaming returns InteractionSSEEvent chunks including:
* - interaction.start: initial interaction with ID
* - content.delta: incremental text and thought_summary updates
* - content.start / content.stop: output boundaries
* - interaction.complete: final event (outputs is undefined in streaming; must reconstruct)
* - error: error events
*
* We stream text deltas to the client and track usage from the interaction.complete event.
*/
function createDeepResearchStream(
stream: AsyncIterable<Interactions.InteractionSSEEvent>,
onComplete?: (
content: string,
usage: {
inputTokens: number
outputTokens: number
reasoningTokens: number
totalTokens: number
},
interactionId?: string
) => void
): ReadableStream<Uint8Array> {
const streamLogger = createLogger('DeepResearchStream')
let fullContent = ''
let completionUsage = { inputTokens: 0, outputTokens: 0, reasoningTokens: 0, totalTokens: 0 }
let completedInteractionId: string | undefined
return new ReadableStream({
async start(controller) {
try {
for await (const event of stream) {
if (event.event_type === 'content.delta') {
const delta = (event as Interactions.ContentDelta).delta
if (delta?.type === 'text' && 'text' in delta && delta.text) {
fullContent += delta.text
controller.enqueue(new TextEncoder().encode(delta.text))
}
} else if (event.event_type === 'interaction.complete') {
const interaction = (event as Interactions.InteractionEvent).interaction
if (interaction?.usage) {
completionUsage = extractInteractionUsage(interaction.usage)
}
completedInteractionId = interaction?.id
} else if (event.event_type === 'interaction.start') {
const interaction = (event as Interactions.InteractionEvent).interaction
if (interaction?.id) {
completedInteractionId = interaction.id
}
} else if (event.event_type === 'error') {
const errorEvent = event as { error?: { code?: string; message?: string } }
const message = errorEvent.error?.message ?? 'Unknown deep research stream error'
streamLogger.error('Deep research stream error', {
code: errorEvent.error?.code,
message,
})
controller.error(new Error(message))
return
}
}
onComplete?.(fullContent, completionUsage, completedInteractionId)
controller.close()
} catch (error) {
streamLogger.error('Error reading deep research stream', {
error: error instanceof Error ? error.message : String(error),
})
controller.error(error)
}
},
})
}
/**
* Executes a deep research request using the Interactions API.
*
* Deep research uses the Interactions API ({@link https://ai.google.dev/api/interactions-api}),
* a completely different surface from generateContent. It creates a background interaction
* that performs comprehensive research (up to 60 minutes).
*
* Supports both streaming and non-streaming modes:
* - Streaming: returns a StreamingExecution with a ReadableStream of text deltas
* - Non-streaming: polls until completion and returns a ProviderResponse
*
* Deep research does NOT support custom function calling tools, MCP servers,
* or structured output (response_format). These are gracefully ignored.
*/
export async function executeDeepResearchRequest(
config: GeminiExecutionConfig
): Promise<ProviderResponse | StreamingExecution> {
const { ai, model, request, providerType } = config
const logger = createLogger(providerType === 'google' ? 'GoogleProvider' : 'VertexProvider')
logger.info('Preparing deep research request', {
model,
hasSystemPrompt: !!request.systemPrompt,
hasMessages: !!request.messages?.length,
streaming: !!request.stream,
hasPreviousInteractionId: !!request.previousInteractionId,
})
if (request.tools?.length) {
logger.warn('Deep research does not support custom tools — ignoring tools parameter')
}
if (request.responseFormat) {
logger.warn(
'Deep research does not support structured output — ignoring responseFormat parameter'
)
}
const providerStartTime = Date.now()
const providerStartTimeISO = new Date(providerStartTime).toISOString()
try {
const { input, systemInstruction } = collapseMessagesToInput(request)
// Deep research requires background=true and store=true (store defaults to true,
// but we set it explicitly per API requirements)
const baseParams = {
agent: model as Interactions.CreateAgentInteractionParamsNonStreaming['agent'],
input,
background: true,
store: true,
...(systemInstruction && { system_instruction: systemInstruction }),
...(request.previousInteractionId && {
previous_interaction_id: request.previousInteractionId,
}),
agent_config: {
type: 'deep-research' as const,
thinking_summaries: 'auto' as const,
},
}
logger.info('Creating deep research interaction', {
inputLength: input.length,
hasSystemInstruction: !!systemInstruction,
streaming: !!request.stream,
})
// Streaming mode: create a streaming interaction and return a StreamingExecution
if (request.stream) {
const streamParams: Interactions.CreateAgentInteractionParamsStreaming = {
...baseParams,
stream: true,
}
const streamResponse = await ai.interactions.create(streamParams)
const firstResponseTime = Date.now() - providerStartTime
const streamingResult: StreamingExecution = {
stream: undefined as unknown as ReadableStream<Uint8Array>,
execution: {
success: true,
output: {
content: '',
model,
tokens: { input: 0, output: 0, total: 0 },
providerTiming: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
modelTime: firstResponseTime,
toolsTime: 0,
firstResponseTime,
iterations: 1,
timeSegments: [
{
type: 'model',
name: 'Deep research (streaming)',
startTime: providerStartTime,
endTime: providerStartTime + firstResponseTime,
duration: firstResponseTime,
},
],
},
cost: {
input: 0,
output: 0,
total: 0,
pricing: { input: 0, output: 0, updatedAt: new Date().toISOString() },
},
},
logs: [],
metadata: {
startTime: providerStartTimeISO,
endTime: new Date().toISOString(),
duration: Date.now() - providerStartTime,
},
isStreaming: true,
},
}
streamingResult.stream = createDeepResearchStream(
streamResponse,
(content, usage, streamInteractionId) => {
streamingResult.execution.output.content = content
streamingResult.execution.output.tokens = {
input: usage.inputTokens,
output: usage.outputTokens,
total: usage.totalTokens,
}
streamingResult.execution.output.interactionId = streamInteractionId
const cost = calculateCost(model, usage.inputTokens, usage.outputTokens)
streamingResult.execution.output.cost = cost
const streamEndTime = Date.now()
if (streamingResult.execution.output.providerTiming) {
streamingResult.execution.output.providerTiming.endTime = new Date(
streamEndTime
).toISOString()
streamingResult.execution.output.providerTiming.duration =
streamEndTime - providerStartTime
const segments = streamingResult.execution.output.providerTiming.timeSegments
if (segments?.[0]) {
segments[0].endTime = streamEndTime
segments[0].duration = streamEndTime - providerStartTime
}
}
}
)
return streamingResult
}
// Non-streaming mode: create and poll
const createParams: Interactions.CreateAgentInteractionParamsNonStreaming = {
...baseParams,
stream: false,
}
const interaction = await ai.interactions.create(createParams)
const interactionId = interaction.id
logger.info('Deep research interaction created', { interactionId, status: interaction.status })
// Poll until a terminal status
const pollStartTime = Date.now()
let result: Interactions.Interaction = interaction
while (Date.now() - pollStartTime < DEEP_RESEARCH_MAX_DURATION_MS) {
if (result.status === 'completed') {
break
}
if (result.status === 'failed') {
throw new Error(`Deep research interaction failed: ${interactionId}`)
}
if (result.status === 'cancelled') {
throw new Error(`Deep research interaction was cancelled: ${interactionId}`)
}
logger.info('Deep research in progress, polling...', {
interactionId,
status: result.status,
elapsedMs: Date.now() - pollStartTime,
})
await sleep(DEEP_RESEARCH_POLL_INTERVAL_MS)
result = await ai.interactions.get(interactionId)
}
if (result.status !== 'completed') {
throw new Error(
`Deep research timed out after ${DEEP_RESEARCH_MAX_DURATION_MS / 1000}s (status: ${result.status})`
)
}
const content = extractTextFromInteractionOutputs(result.outputs)
const usage = extractInteractionUsage(result.usage)
logger.info('Deep research completed', {
interactionId,
contentLength: content.length,
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
reasoningTokens: usage.reasoningTokens,
totalTokens: usage.totalTokens,
durationMs: Date.now() - providerStartTime,
})
return buildDeepResearchResponse(
content,
model,
usage,
providerStartTime,
providerStartTimeISO,
interactionId
)
} catch (error) {
const providerEndTime = Date.now()
const duration = providerEndTime - providerStartTime
logger.error('Error in deep research request:', {
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined,
})
const enhancedError = error instanceof Error ? error : new Error(String(error))
Object.assign(enhancedError, {
timing: {
startTime: providerStartTimeISO,
endTime: new Date(providerEndTime).toISOString(),
duration,
},
})
throw enhancedError
}
}
/** /**
* Executes a request using the Gemini API * Executes a request using the Gemini API
* *
@@ -391,6 +855,12 @@ export async function executeGeminiRequest(
config: GeminiExecutionConfig config: GeminiExecutionConfig
): Promise<ProviderResponse | StreamingExecution> { ): Promise<ProviderResponse | StreamingExecution> {
const { ai, model, request, providerType } = config const { ai, model, request, providerType } = config
// Route deep research models to the interactions API
if (isDeepResearchModel(model)) {
return executeDeepResearchRequest(config)
}
const logger = createLogger(providerType === 'google' ? 'GoogleProvider' : 'VertexProvider') const logger = createLogger(providerType === 'google' ? 'GoogleProvider' : 'VertexProvider')
logger.info(`Preparing ${providerType} Gemini request`, { logger.info(`Preparing ${providerType} Gemini request`, {

View File

@@ -46,6 +46,9 @@ export interface ModelCapabilities {
levels: string[] levels: string[]
default?: string default?: string
} }
deepResearch?: boolean
/** Whether this model supports conversation memory. Defaults to true if omitted. */
memory?: boolean
} }
export interface ModelDefinition { export interface ModelDefinition {
@@ -825,7 +828,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
name: 'Google', name: 'Google',
description: "Google's Gemini models", description: "Google's Gemini models",
defaultModel: 'gemini-2.5-pro', defaultModel: 'gemini-2.5-pro',
modelPatterns: [/^gemini/], modelPatterns: [/^gemini/, /^deep-research/],
capabilities: { capabilities: {
toolUsageControl: true, toolUsageControl: true,
}, },
@@ -928,6 +931,19 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
}, },
contextWindow: 1000000, contextWindow: 1000000,
}, },
{
id: 'deep-research-pro-preview-12-2025',
pricing: {
input: 2.0,
output: 2.0,
updatedAt: '2026-02-10',
},
capabilities: {
deepResearch: true,
memory: false,
},
contextWindow: 1000000,
},
], ],
}, },
vertex: { vertex: {
@@ -1038,6 +1054,19 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
}, },
contextWindow: 1000000, contextWindow: 1000000,
}, },
{
id: 'vertex/deep-research-pro-preview-12-2025',
pricing: {
input: 2.0,
output: 2.0,
updatedAt: '2026-02-10',
},
capabilities: {
deepResearch: true,
memory: false,
},
contextWindow: 1000000,
},
], ],
}, },
deepseek: { deepseek: {
@@ -2480,6 +2509,37 @@ export function getThinkingLevelsForModel(modelId: string): string[] | null {
return capability?.levels ?? null return capability?.levels ?? null
} }
/**
* Get all models that support deep research capability
*/
export function getModelsWithDeepResearch(): string[] {
const models: string[] = []
for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
for (const model of provider.models) {
if (model.capabilities.deepResearch) {
models.push(model.id)
}
}
}
return models
}
/**
* Get all models that explicitly disable memory support (memory: false).
* Models without this capability default to supporting memory.
*/
export function getModelsWithoutMemory(): string[] {
const models: string[] = []
for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
for (const model of provider.models) {
if (model.capabilities.memory === false) {
models.push(model.id)
}
}
}
return models
}
/** /**
* Get the max output tokens for a specific model. * Get the max output tokens for a specific model.
* *

View File

@@ -95,6 +95,8 @@ export interface ProviderResponse {
total: number total: number
pricing: ModelPricing pricing: ModelPricing
} }
/** Interaction ID returned by the Interactions API (used for multi-turn deep research) */
interactionId?: string
} }
export type ToolUsageControl = 'auto' | 'force' | 'none' export type ToolUsageControl = 'auto' | 'force' | 'none'
@@ -169,6 +171,8 @@ export interface ProviderRequest {
verbosity?: string verbosity?: string
thinkingLevel?: string thinkingLevel?: string
isDeployedContext?: boolean isDeployedContext?: boolean
/** Previous interaction ID for multi-turn Interactions API requests (deep research follow-ups) */
previousInteractionId?: string
} }
export const providers: Record<string, ProviderConfig> = {} export const providers: Record<string, ProviderConfig> = {}

View File

@@ -12,6 +12,8 @@ import {
getMaxOutputTokensForModel as getMaxOutputTokensForModelFromDefinitions, getMaxOutputTokensForModel as getMaxOutputTokensForModelFromDefinitions,
getMaxTemperature as getMaxTempFromDefinitions, getMaxTemperature as getMaxTempFromDefinitions,
getModelPricing as getModelPricingFromDefinitions, getModelPricing as getModelPricingFromDefinitions,
getModelsWithDeepResearch,
getModelsWithoutMemory,
getModelsWithReasoningEffort, getModelsWithReasoningEffort,
getModelsWithTemperatureSupport, getModelsWithTemperatureSupport,
getModelsWithTempRange01, getModelsWithTempRange01,
@@ -953,6 +955,8 @@ export const MODELS_WITH_TEMPERATURE_SUPPORT = getModelsWithTemperatureSupport()
export const MODELS_WITH_REASONING_EFFORT = getModelsWithReasoningEffort() export const MODELS_WITH_REASONING_EFFORT = getModelsWithReasoningEffort()
export const MODELS_WITH_VERBOSITY = getModelsWithVerbosity() export const MODELS_WITH_VERBOSITY = getModelsWithVerbosity()
export const MODELS_WITH_THINKING = getModelsWithThinking() export const MODELS_WITH_THINKING = getModelsWithThinking()
export const MODELS_WITH_DEEP_RESEARCH = getModelsWithDeepResearch()
export const MODELS_WITHOUT_MEMORY = getModelsWithoutMemory()
export const PROVIDERS_WITH_TOOL_USAGE_CONTROL = getProvidersWithToolUsageControl() export const PROVIDERS_WITH_TOOL_USAGE_CONTROL = getProvidersWithToolUsageControl()
export function supportsTemperature(model: string): boolean { export function supportsTemperature(model: string): boolean {
@@ -971,6 +975,10 @@ export function supportsThinking(model: string): boolean {
return MODELS_WITH_THINKING.includes(model.toLowerCase()) return MODELS_WITH_THINKING.includes(model.toLowerCase())
} }
export function isDeepResearchModel(model: string): boolean {
return MODELS_WITH_DEEP_RESEARCH.includes(model.toLowerCase())
}
/** /**
* Get the maximum temperature value for a model * Get the maximum temperature value for a model
* @returns Maximum temperature value (1 or 2) or undefined if temperature not supported * @returns Maximum temperature value (1 or 2) or undefined if temperature not supported

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 58 KiB

After

Width:  |  Height:  |  Size: 58 KiB