mirror of
https://github.com/simstudioai/sim.git
synced 2026-02-05 04:05:14 -05:00
fix(tracespans): update tracespans tool calls to accurately display inputs for successive identical tool calls (#3140)
This commit is contained in:
@@ -1,16 +1,11 @@
|
||||
import { beforeEach, describe, expect, test } from 'vitest'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { SnapshotService } from '@/lib/logs/execution/snapshot/service'
|
||||
import type { WorkflowState } from '@/lib/logs/types'
|
||||
|
||||
describe('SnapshotService', () => {
|
||||
let service: SnapshotService
|
||||
|
||||
beforeEach(() => {
|
||||
service = new SnapshotService()
|
||||
})
|
||||
|
||||
describe('computeStateHash', () => {
|
||||
test('should generate consistent hashes for identical states', () => {
|
||||
it.concurrent('should generate consistent hashes for identical states', () => {
|
||||
const service = new SnapshotService()
|
||||
const state: WorkflowState = {
|
||||
blocks: {
|
||||
block1: {
|
||||
@@ -39,7 +34,8 @@ describe('SnapshotService', () => {
|
||||
expect(hash1).toHaveLength(64) // SHA-256 hex string
|
||||
})
|
||||
|
||||
test('should ignore position changes', () => {
|
||||
it.concurrent('should ignore position changes', () => {
|
||||
const service = new SnapshotService()
|
||||
const baseState: WorkflowState = {
|
||||
blocks: {
|
||||
block1: {
|
||||
@@ -77,7 +73,8 @@ describe('SnapshotService', () => {
|
||||
expect(hash1).toBe(hash2)
|
||||
})
|
||||
|
||||
test('should detect meaningful changes', () => {
|
||||
it.concurrent('should detect meaningful changes', () => {
|
||||
const service = new SnapshotService()
|
||||
const baseState: WorkflowState = {
|
||||
blocks: {
|
||||
block1: {
|
||||
@@ -128,7 +125,8 @@ describe('SnapshotService', () => {
|
||||
expect(hash1).not.toBe(hash2)
|
||||
})
|
||||
|
||||
test('should handle edge order consistently', () => {
|
||||
it.concurrent('should handle edge order consistently', () => {
|
||||
const service = new SnapshotService()
|
||||
const state1: WorkflowState = {
|
||||
blocks: {},
|
||||
edges: [
|
||||
@@ -155,7 +153,8 @@ describe('SnapshotService', () => {
|
||||
expect(hash1).toBe(hash2) // Should be same despite different order
|
||||
})
|
||||
|
||||
test('should handle empty states', () => {
|
||||
it.concurrent('should handle empty states', () => {
|
||||
const service = new SnapshotService()
|
||||
const emptyState: WorkflowState = {
|
||||
blocks: {},
|
||||
edges: [],
|
||||
@@ -167,7 +166,8 @@ describe('SnapshotService', () => {
|
||||
expect(hash).toHaveLength(64)
|
||||
})
|
||||
|
||||
test('should handle complex nested structures', () => {
|
||||
it.concurrent('should handle complex nested structures', () => {
|
||||
const service = new SnapshotService()
|
||||
const complexState: WorkflowState = {
|
||||
blocks: {
|
||||
block1: {
|
||||
@@ -224,7 +224,8 @@ describe('SnapshotService', () => {
|
||||
expect(hash).toBe(hash2)
|
||||
})
|
||||
|
||||
test('should include variables in hash computation', () => {
|
||||
it.concurrent('should include variables in hash computation', () => {
|
||||
const service = new SnapshotService()
|
||||
const stateWithVariables: WorkflowState = {
|
||||
blocks: {},
|
||||
edges: [],
|
||||
@@ -253,7 +254,8 @@ describe('SnapshotService', () => {
|
||||
expect(hashWith).not.toBe(hashWithout)
|
||||
})
|
||||
|
||||
test('should detect changes in variable values', () => {
|
||||
it.concurrent('should detect changes in variable values', () => {
|
||||
const service = new SnapshotService()
|
||||
const state1: WorkflowState = {
|
||||
blocks: {},
|
||||
edges: [],
|
||||
@@ -290,7 +292,8 @@ describe('SnapshotService', () => {
|
||||
expect(hash1).not.toBe(hash2)
|
||||
})
|
||||
|
||||
test('should generate consistent hashes for states with variables', () => {
|
||||
it.concurrent('should generate consistent hashes for states with variables', () => {
|
||||
const service = new SnapshotService()
|
||||
const stateWithVariables: WorkflowState = {
|
||||
blocks: {
|
||||
block1: {
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
import { describe, expect, test } from 'vitest'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans'
|
||||
import { stripCustomToolPrefix } from '@/executor/constants'
|
||||
import type { ExecutionResult } from '@/executor/types'
|
||||
|
||||
describe('buildTraceSpans', () => {
|
||||
test('should extract sequential segments from timeSegments data', () => {
|
||||
it.concurrent('extracts sequential segments from timeSegments data', () => {
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { content: 'Final output' },
|
||||
@@ -119,7 +119,7 @@ describe('buildTraceSpans', () => {
|
||||
expect(segments[3].status).toBe('success')
|
||||
})
|
||||
|
||||
test('should fallback to toolCalls extraction when timeSegments not available', () => {
|
||||
it.concurrent('falls back to toolCalls extraction when timeSegments not available', () => {
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { content: 'Final output' },
|
||||
@@ -194,60 +194,63 @@ describe('buildTraceSpans', () => {
|
||||
expect(secondToolCall.output).toEqual({ status: 200, data: 'response' })
|
||||
})
|
||||
|
||||
test('should extract tool calls from agent block output with direct toolCalls array format (fallback)', () => {
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { content: 'Final output' },
|
||||
logs: [
|
||||
{
|
||||
blockId: 'agent-2',
|
||||
blockName: 'Test Agent 2',
|
||||
blockType: 'agent',
|
||||
startedAt: '2024-01-01T10:00:00.000Z',
|
||||
endedAt: '2024-01-01T10:00:03.000Z',
|
||||
durationMs: 3000,
|
||||
success: true,
|
||||
input: { userPrompt: 'Test prompt' },
|
||||
output: {
|
||||
content: 'Agent response',
|
||||
model: 'gpt-4o',
|
||||
providerTiming: {
|
||||
duration: 2500,
|
||||
startTime: '2024-01-01T10:00:00.250Z',
|
||||
endTime: '2024-01-01T10:00:02.750Z',
|
||||
// No timeSegments - should fallback to toolCalls
|
||||
},
|
||||
toolCalls: [
|
||||
{
|
||||
name: 'serper_search',
|
||||
arguments: { query: 'test search' },
|
||||
result: { results: ['result1', 'result2'] },
|
||||
duration: 1500,
|
||||
startTime: '2024-01-01T10:00:00.500Z',
|
||||
endTime: '2024-01-01T10:00:02.000Z',
|
||||
it.concurrent(
|
||||
'extracts tool calls from agent block output with direct toolCalls array format',
|
||||
() => {
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { content: 'Final output' },
|
||||
logs: [
|
||||
{
|
||||
blockId: 'agent-2',
|
||||
blockName: 'Test Agent 2',
|
||||
blockType: 'agent',
|
||||
startedAt: '2024-01-01T10:00:00.000Z',
|
||||
endedAt: '2024-01-01T10:00:03.000Z',
|
||||
durationMs: 3000,
|
||||
success: true,
|
||||
input: { userPrompt: 'Test prompt' },
|
||||
output: {
|
||||
content: 'Agent response',
|
||||
model: 'gpt-4o',
|
||||
providerTiming: {
|
||||
duration: 2500,
|
||||
startTime: '2024-01-01T10:00:00.250Z',
|
||||
endTime: '2024-01-01T10:00:02.750Z',
|
||||
// No timeSegments - should fallback to toolCalls
|
||||
},
|
||||
],
|
||||
toolCalls: [
|
||||
{
|
||||
name: 'serper_search',
|
||||
arguments: { query: 'test search' },
|
||||
result: { results: ['result1', 'result2'] },
|
||||
duration: 1500,
|
||||
startTime: '2024-01-01T10:00:00.500Z',
|
||||
endTime: '2024-01-01T10:00:02.000Z',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
}
|
||||
|
||||
const { traceSpans } = buildTraceSpans(mockExecutionResult)
|
||||
|
||||
expect(traceSpans).toHaveLength(1)
|
||||
const agentSpan = traceSpans[0]
|
||||
expect(agentSpan.toolCalls).toBeDefined()
|
||||
expect(agentSpan.toolCalls).toHaveLength(1)
|
||||
|
||||
const toolCall = agentSpan.toolCalls![0]
|
||||
expect(toolCall.name).toBe('serper_search')
|
||||
expect(toolCall.duration).toBe(1500)
|
||||
expect(toolCall.status).toBe('success')
|
||||
expect(toolCall.input).toEqual({ query: 'test search' })
|
||||
expect(toolCall.output).toEqual({ results: ['result1', 'result2'] })
|
||||
}
|
||||
)
|
||||
|
||||
const { traceSpans } = buildTraceSpans(mockExecutionResult)
|
||||
|
||||
expect(traceSpans).toHaveLength(1)
|
||||
const agentSpan = traceSpans[0]
|
||||
expect(agentSpan.toolCalls).toBeDefined()
|
||||
expect(agentSpan.toolCalls).toHaveLength(1)
|
||||
|
||||
const toolCall = agentSpan.toolCalls![0]
|
||||
expect(toolCall.name).toBe('serper_search')
|
||||
expect(toolCall.duration).toBe(1500)
|
||||
expect(toolCall.status).toBe('success')
|
||||
expect(toolCall.input).toEqual({ query: 'test search' })
|
||||
expect(toolCall.output).toEqual({ results: ['result1', 'result2'] })
|
||||
})
|
||||
|
||||
test('should extract tool calls from streaming response with executionData format (fallback)', () => {
|
||||
it.concurrent('extracts tool calls from streaming response with executionData format', () => {
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { content: 'Final output' },
|
||||
@@ -301,7 +304,7 @@ describe('buildTraceSpans', () => {
|
||||
expect(toolCall.output).toEqual({ analysis: 'completed' })
|
||||
})
|
||||
|
||||
test('should handle tool calls with errors in timeSegments', () => {
|
||||
it.concurrent('handles tool calls with errors in timeSegments', () => {
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { content: 'Final output' },
|
||||
@@ -380,7 +383,7 @@ describe('buildTraceSpans', () => {
|
||||
expect(toolSegment.output).toEqual({ error: 'Tool execution failed' })
|
||||
})
|
||||
|
||||
test('should handle blocks without tool calls', () => {
|
||||
it.concurrent('handles blocks without tool calls', () => {
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { content: 'Final output' },
|
||||
@@ -407,7 +410,7 @@ describe('buildTraceSpans', () => {
|
||||
expect(textSpan.toolCalls).toBeUndefined()
|
||||
})
|
||||
|
||||
test('should handle complex multi-iteration agent execution with sequential segments', () => {
|
||||
it.concurrent('handles complex multi-iteration agent execution with sequential segments', () => {
|
||||
// This test simulates a real agent execution with multiple tool calls and model iterations
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
@@ -581,7 +584,7 @@ describe('buildTraceSpans', () => {
|
||||
expect(agentSpan.toolCalls).toBeUndefined()
|
||||
})
|
||||
|
||||
test('should flatten nested child workflow trace spans recursively', () => {
|
||||
it.concurrent('flattens nested child workflow trace spans recursively', () => {
|
||||
const nestedChildSpan = {
|
||||
id: 'nested-workflow-span',
|
||||
name: 'Nested Workflow Block',
|
||||
@@ -685,7 +688,7 @@ describe('buildTraceSpans', () => {
|
||||
expect(syntheticWrappers).toHaveLength(0)
|
||||
})
|
||||
|
||||
test('should handle nested child workflow errors with proper hierarchy', () => {
|
||||
it.concurrent('handles nested child workflow errors with proper hierarchy', () => {
|
||||
const functionErrorSpan = {
|
||||
id: 'function-error-span',
|
||||
name: 'Function 1',
|
||||
@@ -770,7 +773,7 @@ describe('buildTraceSpans', () => {
|
||||
expect((functionSpan?.output as { error?: string })?.error).toContain('Syntax Error')
|
||||
})
|
||||
|
||||
test('should remove childTraceSpans from output after integrating them as children', () => {
|
||||
it.concurrent('removes childTraceSpans from output after integrating them as children', () => {
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { result: 'parent output' },
|
||||
@@ -843,15 +846,157 @@ describe('buildTraceSpans', () => {
|
||||
data: 'some result',
|
||||
})
|
||||
})
|
||||
|
||||
it.concurrent('matches multiple tool calls with same name by sequential order', () => {
|
||||
// This test verifies that when an agent makes multiple calls to the same tool
|
||||
// (e.g., search_tool called 3 times with different queries), each tool segment
|
||||
// is matched to the correct tool call by their sequential order, not just by name.
|
||||
const mockExecutionResult: ExecutionResult = {
|
||||
success: true,
|
||||
output: { content: 'Final output with multiple searches' },
|
||||
logs: [
|
||||
{
|
||||
blockId: 'agent-multi-search',
|
||||
blockName: 'Multi-Search Agent',
|
||||
blockType: 'agent',
|
||||
startedAt: '2024-01-01T10:00:00.000Z',
|
||||
endedAt: '2024-01-01T10:00:10.000Z',
|
||||
durationMs: 10000,
|
||||
success: true,
|
||||
input: { userPrompt: 'Search for multiple topics' },
|
||||
output: {
|
||||
content: 'Results from multiple searches',
|
||||
model: 'gpt-4o',
|
||||
tokens: { input: 50, output: 100, total: 150 },
|
||||
providerTiming: {
|
||||
duration: 10000,
|
||||
startTime: '2024-01-01T10:00:00.000Z',
|
||||
endTime: '2024-01-01T10:00:10.000Z',
|
||||
timeSegments: [
|
||||
{
|
||||
type: 'model',
|
||||
name: 'Initial response',
|
||||
startTime: 1704103200000, // 2024-01-01T10:00:00.000Z
|
||||
endTime: 1704103201000,
|
||||
duration: 1000,
|
||||
},
|
||||
{
|
||||
type: 'tool',
|
||||
name: 'search_tool',
|
||||
startTime: 1704103201000, // 2024-01-01T10:00:01.000Z
|
||||
endTime: 1704103202000,
|
||||
duration: 1000,
|
||||
},
|
||||
{
|
||||
type: 'model',
|
||||
name: 'Model response (iteration 1)',
|
||||
startTime: 1704103202000,
|
||||
endTime: 1704103203000,
|
||||
duration: 1000,
|
||||
},
|
||||
{
|
||||
type: 'tool',
|
||||
name: 'search_tool',
|
||||
startTime: 1704103203000, // 2024-01-01T10:00:03.000Z
|
||||
endTime: 1704103204500,
|
||||
duration: 1500,
|
||||
},
|
||||
{
|
||||
type: 'model',
|
||||
name: 'Model response (iteration 2)',
|
||||
startTime: 1704103204500,
|
||||
endTime: 1704103206000,
|
||||
duration: 1500,
|
||||
},
|
||||
{
|
||||
type: 'tool',
|
||||
name: 'search_tool',
|
||||
startTime: 1704103206000, // 2024-01-01T10:00:06.000Z
|
||||
endTime: 1704103208000,
|
||||
duration: 2000,
|
||||
},
|
||||
{
|
||||
type: 'model',
|
||||
name: 'Model response (iteration 3)',
|
||||
startTime: 1704103208000,
|
||||
endTime: 1704103210000,
|
||||
duration: 2000,
|
||||
},
|
||||
],
|
||||
},
|
||||
toolCalls: {
|
||||
list: [
|
||||
{
|
||||
name: 'search_tool',
|
||||
arguments: { query: 'first query' },
|
||||
result: { results: ['first result'] },
|
||||
duration: 1000,
|
||||
startTime: '2024-01-01T10:00:01.000Z', // Matches first segment
|
||||
endTime: '2024-01-01T10:00:02.000Z',
|
||||
},
|
||||
{
|
||||
name: 'search_tool',
|
||||
arguments: { query: 'second query' },
|
||||
result: { results: ['second result'] },
|
||||
duration: 1500,
|
||||
startTime: '2024-01-01T10:00:03.000Z', // Matches second segment
|
||||
endTime: '2024-01-01T10:00:04.500Z',
|
||||
},
|
||||
{
|
||||
name: 'search_tool',
|
||||
arguments: { query: 'third query' },
|
||||
result: { results: ['third result'] },
|
||||
duration: 2000,
|
||||
startTime: '2024-01-01T10:00:06.000Z', // Matches third segment
|
||||
endTime: '2024-01-01T10:00:08.000Z',
|
||||
},
|
||||
],
|
||||
count: 3,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
const { traceSpans } = buildTraceSpans(mockExecutionResult)
|
||||
|
||||
expect(traceSpans).toHaveLength(1)
|
||||
const agentSpan = traceSpans[0]
|
||||
expect(agentSpan.children).toBeDefined()
|
||||
expect(agentSpan.children).toHaveLength(7)
|
||||
|
||||
const segments = agentSpan.children!
|
||||
|
||||
// First search_tool call should have "first query"
|
||||
const firstToolSegment = segments[1]
|
||||
expect(firstToolSegment.name).toBe('search_tool')
|
||||
expect(firstToolSegment.type).toBe('tool')
|
||||
expect(firstToolSegment.input).toEqual({ query: 'first query' })
|
||||
expect(firstToolSegment.output).toEqual({ results: ['first result'] })
|
||||
|
||||
// Second search_tool call should have "second query"
|
||||
const secondToolSegment = segments[3]
|
||||
expect(secondToolSegment.name).toBe('search_tool')
|
||||
expect(secondToolSegment.type).toBe('tool')
|
||||
expect(secondToolSegment.input).toEqual({ query: 'second query' })
|
||||
expect(secondToolSegment.output).toEqual({ results: ['second result'] })
|
||||
|
||||
// Third search_tool call should have "third query"
|
||||
const thirdToolSegment = segments[5]
|
||||
expect(thirdToolSegment.name).toBe('search_tool')
|
||||
expect(thirdToolSegment.type).toBe('tool')
|
||||
expect(thirdToolSegment.input).toEqual({ query: 'third query' })
|
||||
expect(thirdToolSegment.output).toEqual({ results: ['third result'] })
|
||||
})
|
||||
})
|
||||
|
||||
describe('stripCustomToolPrefix', () => {
|
||||
test('should strip custom_ prefix from tool names', () => {
|
||||
it.concurrent('strips custom_ prefix from tool names', () => {
|
||||
expect(stripCustomToolPrefix('custom_test_tool')).toBe('test_tool')
|
||||
expect(stripCustomToolPrefix('custom_analysis')).toBe('analysis')
|
||||
})
|
||||
|
||||
test('should leave non-custom tool names unchanged', () => {
|
||||
it.concurrent('leaves non-custom tool names unchanged', () => {
|
||||
expect(stripCustomToolPrefix('http_request')).toBe('http_request')
|
||||
expect(stripCustomToolPrefix('serper_search')).toBe('serper_search')
|
||||
expect(stripCustomToolPrefix('regular_tool')).toBe('regular_tool')
|
||||
|
||||
@@ -233,6 +233,17 @@ export function buildTraceSpans(result: ExecutionResult): {
|
||||
const timeSegments = log.output.providerTiming.timeSegments
|
||||
const toolCallsData = log.output?.toolCalls?.list || log.output?.toolCalls || []
|
||||
|
||||
const toolCallsByName = new Map<string, Array<Record<string, unknown>>>()
|
||||
for (const tc of toolCallsData as Array<{ name?: string; [key: string]: unknown }>) {
|
||||
const normalizedName = stripCustomToolPrefix(tc.name || '')
|
||||
if (!toolCallsByName.has(normalizedName)) {
|
||||
toolCallsByName.set(normalizedName, [])
|
||||
}
|
||||
toolCallsByName.get(normalizedName)!.push(tc)
|
||||
}
|
||||
|
||||
const toolCallIndices = new Map<string, number>()
|
||||
|
||||
span.children = timeSegments.map(
|
||||
(
|
||||
segment: {
|
||||
@@ -259,14 +270,25 @@ export function buildTraceSpans(result: ExecutionResult): {
|
||||
}
|
||||
|
||||
if (segment.type === 'tool') {
|
||||
const matchingToolCall = toolCallsData.find(
|
||||
(tc: { name?: string; [key: string]: unknown }) =>
|
||||
tc.name === segment.name || stripCustomToolPrefix(tc.name || '') === segment.name
|
||||
)
|
||||
const normalizedName = stripCustomToolPrefix(segment.name || '')
|
||||
|
||||
const toolCallsForName = toolCallsByName.get(normalizedName) || []
|
||||
const currentIndex = toolCallIndices.get(normalizedName) || 0
|
||||
const matchingToolCall = toolCallsForName[currentIndex] as
|
||||
| {
|
||||
error?: string
|
||||
arguments?: Record<string, unknown>
|
||||
input?: Record<string, unknown>
|
||||
result?: Record<string, unknown>
|
||||
output?: Record<string, unknown>
|
||||
}
|
||||
| undefined
|
||||
|
||||
toolCallIndices.set(normalizedName, currentIndex + 1)
|
||||
|
||||
return {
|
||||
id: `${span.id}-segment-${index}`,
|
||||
name: stripCustomToolPrefix(segment.name || ''),
|
||||
name: normalizedName,
|
||||
type: 'tool',
|
||||
duration: segment.duration,
|
||||
startTime: segmentStartTime,
|
||||
|
||||
Reference in New Issue
Block a user