fix(tracespans): update tracespans tool calls to accurately display inputs for successive identical tool calls (#3140)

This commit is contained in:
Waleed
2026-02-04 19:32:18 -08:00
committed by GitHub
parent 36ec68d93e
commit 2147309365
3 changed files with 252 additions and 82 deletions

View File

@@ -1,16 +1,11 @@
import { beforeEach, describe, expect, test } from 'vitest'
import { describe, expect, it } from 'vitest'
import { SnapshotService } from '@/lib/logs/execution/snapshot/service'
import type { WorkflowState } from '@/lib/logs/types'
describe('SnapshotService', () => {
let service: SnapshotService
beforeEach(() => {
service = new SnapshotService()
})
describe('computeStateHash', () => {
test('should generate consistent hashes for identical states', () => {
it.concurrent('should generate consistent hashes for identical states', () => {
const service = new SnapshotService()
const state: WorkflowState = {
blocks: {
block1: {
@@ -39,7 +34,8 @@ describe('SnapshotService', () => {
expect(hash1).toHaveLength(64) // SHA-256 hex string
})
test('should ignore position changes', () => {
it.concurrent('should ignore position changes', () => {
const service = new SnapshotService()
const baseState: WorkflowState = {
blocks: {
block1: {
@@ -77,7 +73,8 @@ describe('SnapshotService', () => {
expect(hash1).toBe(hash2)
})
test('should detect meaningful changes', () => {
it.concurrent('should detect meaningful changes', () => {
const service = new SnapshotService()
const baseState: WorkflowState = {
blocks: {
block1: {
@@ -128,7 +125,8 @@ describe('SnapshotService', () => {
expect(hash1).not.toBe(hash2)
})
test('should handle edge order consistently', () => {
it.concurrent('should handle edge order consistently', () => {
const service = new SnapshotService()
const state1: WorkflowState = {
blocks: {},
edges: [
@@ -155,7 +153,8 @@ describe('SnapshotService', () => {
expect(hash1).toBe(hash2) // Should be same despite different order
})
test('should handle empty states', () => {
it.concurrent('should handle empty states', () => {
const service = new SnapshotService()
const emptyState: WorkflowState = {
blocks: {},
edges: [],
@@ -167,7 +166,8 @@ describe('SnapshotService', () => {
expect(hash).toHaveLength(64)
})
test('should handle complex nested structures', () => {
it.concurrent('should handle complex nested structures', () => {
const service = new SnapshotService()
const complexState: WorkflowState = {
blocks: {
block1: {
@@ -224,7 +224,8 @@ describe('SnapshotService', () => {
expect(hash).toBe(hash2)
})
test('should include variables in hash computation', () => {
it.concurrent('should include variables in hash computation', () => {
const service = new SnapshotService()
const stateWithVariables: WorkflowState = {
blocks: {},
edges: [],
@@ -253,7 +254,8 @@ describe('SnapshotService', () => {
expect(hashWith).not.toBe(hashWithout)
})
test('should detect changes in variable values', () => {
it.concurrent('should detect changes in variable values', () => {
const service = new SnapshotService()
const state1: WorkflowState = {
blocks: {},
edges: [],
@@ -290,7 +292,8 @@ describe('SnapshotService', () => {
expect(hash1).not.toBe(hash2)
})
test('should generate consistent hashes for states with variables', () => {
it.concurrent('should generate consistent hashes for states with variables', () => {
const service = new SnapshotService()
const stateWithVariables: WorkflowState = {
blocks: {
block1: {

View File

@@ -1,10 +1,10 @@
import { describe, expect, test } from 'vitest'
import { describe, expect, it } from 'vitest'
import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans'
import { stripCustomToolPrefix } from '@/executor/constants'
import type { ExecutionResult } from '@/executor/types'
describe('buildTraceSpans', () => {
test('should extract sequential segments from timeSegments data', () => {
it.concurrent('extracts sequential segments from timeSegments data', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -119,7 +119,7 @@ describe('buildTraceSpans', () => {
expect(segments[3].status).toBe('success')
})
test('should fallback to toolCalls extraction when timeSegments not available', () => {
it.concurrent('falls back to toolCalls extraction when timeSegments not available', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -194,60 +194,63 @@ describe('buildTraceSpans', () => {
expect(secondToolCall.output).toEqual({ status: 200, data: 'response' })
})
test('should extract tool calls from agent block output with direct toolCalls array format (fallback)', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
logs: [
{
blockId: 'agent-2',
blockName: 'Test Agent 2',
blockType: 'agent',
startedAt: '2024-01-01T10:00:00.000Z',
endedAt: '2024-01-01T10:00:03.000Z',
durationMs: 3000,
success: true,
input: { userPrompt: 'Test prompt' },
output: {
content: 'Agent response',
model: 'gpt-4o',
providerTiming: {
duration: 2500,
startTime: '2024-01-01T10:00:00.250Z',
endTime: '2024-01-01T10:00:02.750Z',
// No timeSegments - should fallback to toolCalls
},
toolCalls: [
{
name: 'serper_search',
arguments: { query: 'test search' },
result: { results: ['result1', 'result2'] },
duration: 1500,
startTime: '2024-01-01T10:00:00.500Z',
endTime: '2024-01-01T10:00:02.000Z',
it.concurrent(
'extracts tool calls from agent block output with direct toolCalls array format',
() => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
logs: [
{
blockId: 'agent-2',
blockName: 'Test Agent 2',
blockType: 'agent',
startedAt: '2024-01-01T10:00:00.000Z',
endedAt: '2024-01-01T10:00:03.000Z',
durationMs: 3000,
success: true,
input: { userPrompt: 'Test prompt' },
output: {
content: 'Agent response',
model: 'gpt-4o',
providerTiming: {
duration: 2500,
startTime: '2024-01-01T10:00:00.250Z',
endTime: '2024-01-01T10:00:02.750Z',
// No timeSegments - should fallback to toolCalls
},
],
toolCalls: [
{
name: 'serper_search',
arguments: { query: 'test search' },
result: { results: ['result1', 'result2'] },
duration: 1500,
startTime: '2024-01-01T10:00:00.500Z',
endTime: '2024-01-01T10:00:02.000Z',
},
],
},
},
},
],
],
}
const { traceSpans } = buildTraceSpans(mockExecutionResult)
expect(traceSpans).toHaveLength(1)
const agentSpan = traceSpans[0]
expect(agentSpan.toolCalls).toBeDefined()
expect(agentSpan.toolCalls).toHaveLength(1)
const toolCall = agentSpan.toolCalls![0]
expect(toolCall.name).toBe('serper_search')
expect(toolCall.duration).toBe(1500)
expect(toolCall.status).toBe('success')
expect(toolCall.input).toEqual({ query: 'test search' })
expect(toolCall.output).toEqual({ results: ['result1', 'result2'] })
}
)
const { traceSpans } = buildTraceSpans(mockExecutionResult)
expect(traceSpans).toHaveLength(1)
const agentSpan = traceSpans[0]
expect(agentSpan.toolCalls).toBeDefined()
expect(agentSpan.toolCalls).toHaveLength(1)
const toolCall = agentSpan.toolCalls![0]
expect(toolCall.name).toBe('serper_search')
expect(toolCall.duration).toBe(1500)
expect(toolCall.status).toBe('success')
expect(toolCall.input).toEqual({ query: 'test search' })
expect(toolCall.output).toEqual({ results: ['result1', 'result2'] })
})
test('should extract tool calls from streaming response with executionData format (fallback)', () => {
it.concurrent('extracts tool calls from streaming response with executionData format', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -301,7 +304,7 @@ describe('buildTraceSpans', () => {
expect(toolCall.output).toEqual({ analysis: 'completed' })
})
test('should handle tool calls with errors in timeSegments', () => {
it.concurrent('handles tool calls with errors in timeSegments', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -380,7 +383,7 @@ describe('buildTraceSpans', () => {
expect(toolSegment.output).toEqual({ error: 'Tool execution failed' })
})
test('should handle blocks without tool calls', () => {
it.concurrent('handles blocks without tool calls', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -407,7 +410,7 @@ describe('buildTraceSpans', () => {
expect(textSpan.toolCalls).toBeUndefined()
})
test('should handle complex multi-iteration agent execution with sequential segments', () => {
it.concurrent('handles complex multi-iteration agent execution with sequential segments', () => {
// This test simulates a real agent execution with multiple tool calls and model iterations
const mockExecutionResult: ExecutionResult = {
success: true,
@@ -581,7 +584,7 @@ describe('buildTraceSpans', () => {
expect(agentSpan.toolCalls).toBeUndefined()
})
test('should flatten nested child workflow trace spans recursively', () => {
it.concurrent('flattens nested child workflow trace spans recursively', () => {
const nestedChildSpan = {
id: 'nested-workflow-span',
name: 'Nested Workflow Block',
@@ -685,7 +688,7 @@ describe('buildTraceSpans', () => {
expect(syntheticWrappers).toHaveLength(0)
})
test('should handle nested child workflow errors with proper hierarchy', () => {
it.concurrent('handles nested child workflow errors with proper hierarchy', () => {
const functionErrorSpan = {
id: 'function-error-span',
name: 'Function 1',
@@ -770,7 +773,7 @@ describe('buildTraceSpans', () => {
expect((functionSpan?.output as { error?: string })?.error).toContain('Syntax Error')
})
test('should remove childTraceSpans from output after integrating them as children', () => {
it.concurrent('removes childTraceSpans from output after integrating them as children', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { result: 'parent output' },
@@ -843,15 +846,157 @@ describe('buildTraceSpans', () => {
data: 'some result',
})
})
it.concurrent('matches multiple tool calls with same name by sequential order', () => {
// This test verifies that when an agent makes multiple calls to the same tool
// (e.g., search_tool called 3 times with different queries), each tool segment
// is matched to the correct tool call by their sequential order, not just by name.
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output with multiple searches' },
logs: [
{
blockId: 'agent-multi-search',
blockName: 'Multi-Search Agent',
blockType: 'agent',
startedAt: '2024-01-01T10:00:00.000Z',
endedAt: '2024-01-01T10:00:10.000Z',
durationMs: 10000,
success: true,
input: { userPrompt: 'Search for multiple topics' },
output: {
content: 'Results from multiple searches',
model: 'gpt-4o',
tokens: { input: 50, output: 100, total: 150 },
providerTiming: {
duration: 10000,
startTime: '2024-01-01T10:00:00.000Z',
endTime: '2024-01-01T10:00:10.000Z',
timeSegments: [
{
type: 'model',
name: 'Initial response',
startTime: 1704103200000, // 2024-01-01T10:00:00.000Z
endTime: 1704103201000,
duration: 1000,
},
{
type: 'tool',
name: 'search_tool',
startTime: 1704103201000, // 2024-01-01T10:00:01.000Z
endTime: 1704103202000,
duration: 1000,
},
{
type: 'model',
name: 'Model response (iteration 1)',
startTime: 1704103202000,
endTime: 1704103203000,
duration: 1000,
},
{
type: 'tool',
name: 'search_tool',
startTime: 1704103203000, // 2024-01-01T10:00:03.000Z
endTime: 1704103204500,
duration: 1500,
},
{
type: 'model',
name: 'Model response (iteration 2)',
startTime: 1704103204500,
endTime: 1704103206000,
duration: 1500,
},
{
type: 'tool',
name: 'search_tool',
startTime: 1704103206000, // 2024-01-01T10:00:06.000Z
endTime: 1704103208000,
duration: 2000,
},
{
type: 'model',
name: 'Model response (iteration 3)',
startTime: 1704103208000,
endTime: 1704103210000,
duration: 2000,
},
],
},
toolCalls: {
list: [
{
name: 'search_tool',
arguments: { query: 'first query' },
result: { results: ['first result'] },
duration: 1000,
startTime: '2024-01-01T10:00:01.000Z', // Matches first segment
endTime: '2024-01-01T10:00:02.000Z',
},
{
name: 'search_tool',
arguments: { query: 'second query' },
result: { results: ['second result'] },
duration: 1500,
startTime: '2024-01-01T10:00:03.000Z', // Matches second segment
endTime: '2024-01-01T10:00:04.500Z',
},
{
name: 'search_tool',
arguments: { query: 'third query' },
result: { results: ['third result'] },
duration: 2000,
startTime: '2024-01-01T10:00:06.000Z', // Matches third segment
endTime: '2024-01-01T10:00:08.000Z',
},
],
count: 3,
},
},
},
],
}
const { traceSpans } = buildTraceSpans(mockExecutionResult)
expect(traceSpans).toHaveLength(1)
const agentSpan = traceSpans[0]
expect(agentSpan.children).toBeDefined()
expect(agentSpan.children).toHaveLength(7)
const segments = agentSpan.children!
// First search_tool call should have "first query"
const firstToolSegment = segments[1]
expect(firstToolSegment.name).toBe('search_tool')
expect(firstToolSegment.type).toBe('tool')
expect(firstToolSegment.input).toEqual({ query: 'first query' })
expect(firstToolSegment.output).toEqual({ results: ['first result'] })
// Second search_tool call should have "second query"
const secondToolSegment = segments[3]
expect(secondToolSegment.name).toBe('search_tool')
expect(secondToolSegment.type).toBe('tool')
expect(secondToolSegment.input).toEqual({ query: 'second query' })
expect(secondToolSegment.output).toEqual({ results: ['second result'] })
// Third search_tool call should have "third query"
const thirdToolSegment = segments[5]
expect(thirdToolSegment.name).toBe('search_tool')
expect(thirdToolSegment.type).toBe('tool')
expect(thirdToolSegment.input).toEqual({ query: 'third query' })
expect(thirdToolSegment.output).toEqual({ results: ['third result'] })
})
})
describe('stripCustomToolPrefix', () => {
test('should strip custom_ prefix from tool names', () => {
it.concurrent('strips custom_ prefix from tool names', () => {
expect(stripCustomToolPrefix('custom_test_tool')).toBe('test_tool')
expect(stripCustomToolPrefix('custom_analysis')).toBe('analysis')
})
test('should leave non-custom tool names unchanged', () => {
it.concurrent('leaves non-custom tool names unchanged', () => {
expect(stripCustomToolPrefix('http_request')).toBe('http_request')
expect(stripCustomToolPrefix('serper_search')).toBe('serper_search')
expect(stripCustomToolPrefix('regular_tool')).toBe('regular_tool')

View File

@@ -233,6 +233,17 @@ export function buildTraceSpans(result: ExecutionResult): {
const timeSegments = log.output.providerTiming.timeSegments
const toolCallsData = log.output?.toolCalls?.list || log.output?.toolCalls || []
const toolCallsByName = new Map<string, Array<Record<string, unknown>>>()
for (const tc of toolCallsData as Array<{ name?: string; [key: string]: unknown }>) {
const normalizedName = stripCustomToolPrefix(tc.name || '')
if (!toolCallsByName.has(normalizedName)) {
toolCallsByName.set(normalizedName, [])
}
toolCallsByName.get(normalizedName)!.push(tc)
}
const toolCallIndices = new Map<string, number>()
span.children = timeSegments.map(
(
segment: {
@@ -259,14 +270,25 @@ export function buildTraceSpans(result: ExecutionResult): {
}
if (segment.type === 'tool') {
const matchingToolCall = toolCallsData.find(
(tc: { name?: string; [key: string]: unknown }) =>
tc.name === segment.name || stripCustomToolPrefix(tc.name || '') === segment.name
)
const normalizedName = stripCustomToolPrefix(segment.name || '')
const toolCallsForName = toolCallsByName.get(normalizedName) || []
const currentIndex = toolCallIndices.get(normalizedName) || 0
const matchingToolCall = toolCallsForName[currentIndex] as
| {
error?: string
arguments?: Record<string, unknown>
input?: Record<string, unknown>
result?: Record<string, unknown>
output?: Record<string, unknown>
}
| undefined
toolCallIndices.set(normalizedName, currentIndex + 1)
return {
id: `${span.id}-segment-${index}`,
name: stripCustomToolPrefix(segment.name || ''),
name: normalizedName,
type: 'tool',
duration: segment.duration,
startTime: segmentStartTime,