From 21473093654969398f32f87c25be6c7350e7be43 Mon Sep 17 00:00:00 2001 From: Waleed Date: Wed, 4 Feb 2026 19:32:18 -0800 Subject: [PATCH] fix(tracespans): update tracespans tool calls to accurately display inputs for successive identical tool calls (#3140) --- .../logs/execution/snapshot/service.test.ts | 35 +-- .../execution/trace-spans/trace-spans.test.ts | 267 ++++++++++++++---- .../logs/execution/trace-spans/trace-spans.ts | 32 ++- 3 files changed, 252 insertions(+), 82 deletions(-) diff --git a/apps/sim/lib/logs/execution/snapshot/service.test.ts b/apps/sim/lib/logs/execution/snapshot/service.test.ts index a0f775516..09353f7b2 100644 --- a/apps/sim/lib/logs/execution/snapshot/service.test.ts +++ b/apps/sim/lib/logs/execution/snapshot/service.test.ts @@ -1,16 +1,11 @@ -import { beforeEach, describe, expect, test } from 'vitest' +import { describe, expect, it } from 'vitest' import { SnapshotService } from '@/lib/logs/execution/snapshot/service' import type { WorkflowState } from '@/lib/logs/types' describe('SnapshotService', () => { - let service: SnapshotService - - beforeEach(() => { - service = new SnapshotService() - }) - describe('computeStateHash', () => { - test('should generate consistent hashes for identical states', () => { + it.concurrent('should generate consistent hashes for identical states', () => { + const service = new SnapshotService() const state: WorkflowState = { blocks: { block1: { @@ -39,7 +34,8 @@ describe('SnapshotService', () => { expect(hash1).toHaveLength(64) // SHA-256 hex string }) - test('should ignore position changes', () => { + it.concurrent('should ignore position changes', () => { + const service = new SnapshotService() const baseState: WorkflowState = { blocks: { block1: { @@ -77,7 +73,8 @@ describe('SnapshotService', () => { expect(hash1).toBe(hash2) }) - test('should detect meaningful changes', () => { + it.concurrent('should detect meaningful changes', () => { + const service = new SnapshotService() const baseState: WorkflowState = { blocks: { block1: { @@ -128,7 +125,8 @@ describe('SnapshotService', () => { expect(hash1).not.toBe(hash2) }) - test('should handle edge order consistently', () => { + it.concurrent('should handle edge order consistently', () => { + const service = new SnapshotService() const state1: WorkflowState = { blocks: {}, edges: [ @@ -155,7 +153,8 @@ describe('SnapshotService', () => { expect(hash1).toBe(hash2) // Should be same despite different order }) - test('should handle empty states', () => { + it.concurrent('should handle empty states', () => { + const service = new SnapshotService() const emptyState: WorkflowState = { blocks: {}, edges: [], @@ -167,7 +166,8 @@ describe('SnapshotService', () => { expect(hash).toHaveLength(64) }) - test('should handle complex nested structures', () => { + it.concurrent('should handle complex nested structures', () => { + const service = new SnapshotService() const complexState: WorkflowState = { blocks: { block1: { @@ -224,7 +224,8 @@ describe('SnapshotService', () => { expect(hash).toBe(hash2) }) - test('should include variables in hash computation', () => { + it.concurrent('should include variables in hash computation', () => { + const service = new SnapshotService() const stateWithVariables: WorkflowState = { blocks: {}, edges: [], @@ -253,7 +254,8 @@ describe('SnapshotService', () => { expect(hashWith).not.toBe(hashWithout) }) - test('should detect changes in variable values', () => { + it.concurrent('should detect changes in variable values', () => { + const service = new SnapshotService() const state1: WorkflowState = { blocks: {}, edges: [], @@ -290,7 +292,8 @@ describe('SnapshotService', () => { expect(hash1).not.toBe(hash2) }) - test('should generate consistent hashes for states with variables', () => { + it.concurrent('should generate consistent hashes for states with variables', () => { + const service = new SnapshotService() const stateWithVariables: WorkflowState = { blocks: { block1: { diff --git a/apps/sim/lib/logs/execution/trace-spans/trace-spans.test.ts b/apps/sim/lib/logs/execution/trace-spans/trace-spans.test.ts index 157ca17f6..987318d5e 100644 --- a/apps/sim/lib/logs/execution/trace-spans/trace-spans.test.ts +++ b/apps/sim/lib/logs/execution/trace-spans/trace-spans.test.ts @@ -1,10 +1,10 @@ -import { describe, expect, test } from 'vitest' +import { describe, expect, it } from 'vitest' import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' import { stripCustomToolPrefix } from '@/executor/constants' import type { ExecutionResult } from '@/executor/types' describe('buildTraceSpans', () => { - test('should extract sequential segments from timeSegments data', () => { + it.concurrent('extracts sequential segments from timeSegments data', () => { const mockExecutionResult: ExecutionResult = { success: true, output: { content: 'Final output' }, @@ -119,7 +119,7 @@ describe('buildTraceSpans', () => { expect(segments[3].status).toBe('success') }) - test('should fallback to toolCalls extraction when timeSegments not available', () => { + it.concurrent('falls back to toolCalls extraction when timeSegments not available', () => { const mockExecutionResult: ExecutionResult = { success: true, output: { content: 'Final output' }, @@ -194,60 +194,63 @@ describe('buildTraceSpans', () => { expect(secondToolCall.output).toEqual({ status: 200, data: 'response' }) }) - test('should extract tool calls from agent block output with direct toolCalls array format (fallback)', () => { - const mockExecutionResult: ExecutionResult = { - success: true, - output: { content: 'Final output' }, - logs: [ - { - blockId: 'agent-2', - blockName: 'Test Agent 2', - blockType: 'agent', - startedAt: '2024-01-01T10:00:00.000Z', - endedAt: '2024-01-01T10:00:03.000Z', - durationMs: 3000, - success: true, - input: { userPrompt: 'Test prompt' }, - output: { - content: 'Agent response', - model: 'gpt-4o', - providerTiming: { - duration: 2500, - startTime: '2024-01-01T10:00:00.250Z', - endTime: '2024-01-01T10:00:02.750Z', - // No timeSegments - should fallback to toolCalls - }, - toolCalls: [ - { - name: 'serper_search', - arguments: { query: 'test search' }, - result: { results: ['result1', 'result2'] }, - duration: 1500, - startTime: '2024-01-01T10:00:00.500Z', - endTime: '2024-01-01T10:00:02.000Z', + it.concurrent( + 'extracts tool calls from agent block output with direct toolCalls array format', + () => { + const mockExecutionResult: ExecutionResult = { + success: true, + output: { content: 'Final output' }, + logs: [ + { + blockId: 'agent-2', + blockName: 'Test Agent 2', + blockType: 'agent', + startedAt: '2024-01-01T10:00:00.000Z', + endedAt: '2024-01-01T10:00:03.000Z', + durationMs: 3000, + success: true, + input: { userPrompt: 'Test prompt' }, + output: { + content: 'Agent response', + model: 'gpt-4o', + providerTiming: { + duration: 2500, + startTime: '2024-01-01T10:00:00.250Z', + endTime: '2024-01-01T10:00:02.750Z', + // No timeSegments - should fallback to toolCalls }, - ], + toolCalls: [ + { + name: 'serper_search', + arguments: { query: 'test search' }, + result: { results: ['result1', 'result2'] }, + duration: 1500, + startTime: '2024-01-01T10:00:00.500Z', + endTime: '2024-01-01T10:00:02.000Z', + }, + ], + }, }, - }, - ], + ], + } + + const { traceSpans } = buildTraceSpans(mockExecutionResult) + + expect(traceSpans).toHaveLength(1) + const agentSpan = traceSpans[0] + expect(agentSpan.toolCalls).toBeDefined() + expect(agentSpan.toolCalls).toHaveLength(1) + + const toolCall = agentSpan.toolCalls![0] + expect(toolCall.name).toBe('serper_search') + expect(toolCall.duration).toBe(1500) + expect(toolCall.status).toBe('success') + expect(toolCall.input).toEqual({ query: 'test search' }) + expect(toolCall.output).toEqual({ results: ['result1', 'result2'] }) } + ) - const { traceSpans } = buildTraceSpans(mockExecutionResult) - - expect(traceSpans).toHaveLength(1) - const agentSpan = traceSpans[0] - expect(agentSpan.toolCalls).toBeDefined() - expect(agentSpan.toolCalls).toHaveLength(1) - - const toolCall = agentSpan.toolCalls![0] - expect(toolCall.name).toBe('serper_search') - expect(toolCall.duration).toBe(1500) - expect(toolCall.status).toBe('success') - expect(toolCall.input).toEqual({ query: 'test search' }) - expect(toolCall.output).toEqual({ results: ['result1', 'result2'] }) - }) - - test('should extract tool calls from streaming response with executionData format (fallback)', () => { + it.concurrent('extracts tool calls from streaming response with executionData format', () => { const mockExecutionResult: ExecutionResult = { success: true, output: { content: 'Final output' }, @@ -301,7 +304,7 @@ describe('buildTraceSpans', () => { expect(toolCall.output).toEqual({ analysis: 'completed' }) }) - test('should handle tool calls with errors in timeSegments', () => { + it.concurrent('handles tool calls with errors in timeSegments', () => { const mockExecutionResult: ExecutionResult = { success: true, output: { content: 'Final output' }, @@ -380,7 +383,7 @@ describe('buildTraceSpans', () => { expect(toolSegment.output).toEqual({ error: 'Tool execution failed' }) }) - test('should handle blocks without tool calls', () => { + it.concurrent('handles blocks without tool calls', () => { const mockExecutionResult: ExecutionResult = { success: true, output: { content: 'Final output' }, @@ -407,7 +410,7 @@ describe('buildTraceSpans', () => { expect(textSpan.toolCalls).toBeUndefined() }) - test('should handle complex multi-iteration agent execution with sequential segments', () => { + it.concurrent('handles complex multi-iteration agent execution with sequential segments', () => { // This test simulates a real agent execution with multiple tool calls and model iterations const mockExecutionResult: ExecutionResult = { success: true, @@ -581,7 +584,7 @@ describe('buildTraceSpans', () => { expect(agentSpan.toolCalls).toBeUndefined() }) - test('should flatten nested child workflow trace spans recursively', () => { + it.concurrent('flattens nested child workflow trace spans recursively', () => { const nestedChildSpan = { id: 'nested-workflow-span', name: 'Nested Workflow Block', @@ -685,7 +688,7 @@ describe('buildTraceSpans', () => { expect(syntheticWrappers).toHaveLength(0) }) - test('should handle nested child workflow errors with proper hierarchy', () => { + it.concurrent('handles nested child workflow errors with proper hierarchy', () => { const functionErrorSpan = { id: 'function-error-span', name: 'Function 1', @@ -770,7 +773,7 @@ describe('buildTraceSpans', () => { expect((functionSpan?.output as { error?: string })?.error).toContain('Syntax Error') }) - test('should remove childTraceSpans from output after integrating them as children', () => { + it.concurrent('removes childTraceSpans from output after integrating them as children', () => { const mockExecutionResult: ExecutionResult = { success: true, output: { result: 'parent output' }, @@ -843,15 +846,157 @@ describe('buildTraceSpans', () => { data: 'some result', }) }) + + it.concurrent('matches multiple tool calls with same name by sequential order', () => { + // This test verifies that when an agent makes multiple calls to the same tool + // (e.g., search_tool called 3 times with different queries), each tool segment + // is matched to the correct tool call by their sequential order, not just by name. + const mockExecutionResult: ExecutionResult = { + success: true, + output: { content: 'Final output with multiple searches' }, + logs: [ + { + blockId: 'agent-multi-search', + blockName: 'Multi-Search Agent', + blockType: 'agent', + startedAt: '2024-01-01T10:00:00.000Z', + endedAt: '2024-01-01T10:00:10.000Z', + durationMs: 10000, + success: true, + input: { userPrompt: 'Search for multiple topics' }, + output: { + content: 'Results from multiple searches', + model: 'gpt-4o', + tokens: { input: 50, output: 100, total: 150 }, + providerTiming: { + duration: 10000, + startTime: '2024-01-01T10:00:00.000Z', + endTime: '2024-01-01T10:00:10.000Z', + timeSegments: [ + { + type: 'model', + name: 'Initial response', + startTime: 1704103200000, // 2024-01-01T10:00:00.000Z + endTime: 1704103201000, + duration: 1000, + }, + { + type: 'tool', + name: 'search_tool', + startTime: 1704103201000, // 2024-01-01T10:00:01.000Z + endTime: 1704103202000, + duration: 1000, + }, + { + type: 'model', + name: 'Model response (iteration 1)', + startTime: 1704103202000, + endTime: 1704103203000, + duration: 1000, + }, + { + type: 'tool', + name: 'search_tool', + startTime: 1704103203000, // 2024-01-01T10:00:03.000Z + endTime: 1704103204500, + duration: 1500, + }, + { + type: 'model', + name: 'Model response (iteration 2)', + startTime: 1704103204500, + endTime: 1704103206000, + duration: 1500, + }, + { + type: 'tool', + name: 'search_tool', + startTime: 1704103206000, // 2024-01-01T10:00:06.000Z + endTime: 1704103208000, + duration: 2000, + }, + { + type: 'model', + name: 'Model response (iteration 3)', + startTime: 1704103208000, + endTime: 1704103210000, + duration: 2000, + }, + ], + }, + toolCalls: { + list: [ + { + name: 'search_tool', + arguments: { query: 'first query' }, + result: { results: ['first result'] }, + duration: 1000, + startTime: '2024-01-01T10:00:01.000Z', // Matches first segment + endTime: '2024-01-01T10:00:02.000Z', + }, + { + name: 'search_tool', + arguments: { query: 'second query' }, + result: { results: ['second result'] }, + duration: 1500, + startTime: '2024-01-01T10:00:03.000Z', // Matches second segment + endTime: '2024-01-01T10:00:04.500Z', + }, + { + name: 'search_tool', + arguments: { query: 'third query' }, + result: { results: ['third result'] }, + duration: 2000, + startTime: '2024-01-01T10:00:06.000Z', // Matches third segment + endTime: '2024-01-01T10:00:08.000Z', + }, + ], + count: 3, + }, + }, + }, + ], + } + + const { traceSpans } = buildTraceSpans(mockExecutionResult) + + expect(traceSpans).toHaveLength(1) + const agentSpan = traceSpans[0] + expect(agentSpan.children).toBeDefined() + expect(agentSpan.children).toHaveLength(7) + + const segments = agentSpan.children! + + // First search_tool call should have "first query" + const firstToolSegment = segments[1] + expect(firstToolSegment.name).toBe('search_tool') + expect(firstToolSegment.type).toBe('tool') + expect(firstToolSegment.input).toEqual({ query: 'first query' }) + expect(firstToolSegment.output).toEqual({ results: ['first result'] }) + + // Second search_tool call should have "second query" + const secondToolSegment = segments[3] + expect(secondToolSegment.name).toBe('search_tool') + expect(secondToolSegment.type).toBe('tool') + expect(secondToolSegment.input).toEqual({ query: 'second query' }) + expect(secondToolSegment.output).toEqual({ results: ['second result'] }) + + // Third search_tool call should have "third query" + const thirdToolSegment = segments[5] + expect(thirdToolSegment.name).toBe('search_tool') + expect(thirdToolSegment.type).toBe('tool') + expect(thirdToolSegment.input).toEqual({ query: 'third query' }) + expect(thirdToolSegment.output).toEqual({ results: ['third result'] }) + }) }) describe('stripCustomToolPrefix', () => { - test('should strip custom_ prefix from tool names', () => { + it.concurrent('strips custom_ prefix from tool names', () => { expect(stripCustomToolPrefix('custom_test_tool')).toBe('test_tool') expect(stripCustomToolPrefix('custom_analysis')).toBe('analysis') }) - test('should leave non-custom tool names unchanged', () => { + it.concurrent('leaves non-custom tool names unchanged', () => { expect(stripCustomToolPrefix('http_request')).toBe('http_request') expect(stripCustomToolPrefix('serper_search')).toBe('serper_search') expect(stripCustomToolPrefix('regular_tool')).toBe('regular_tool') diff --git a/apps/sim/lib/logs/execution/trace-spans/trace-spans.ts b/apps/sim/lib/logs/execution/trace-spans/trace-spans.ts index 33d671865..a4b35330d 100644 --- a/apps/sim/lib/logs/execution/trace-spans/trace-spans.ts +++ b/apps/sim/lib/logs/execution/trace-spans/trace-spans.ts @@ -233,6 +233,17 @@ export function buildTraceSpans(result: ExecutionResult): { const timeSegments = log.output.providerTiming.timeSegments const toolCallsData = log.output?.toolCalls?.list || log.output?.toolCalls || [] + const toolCallsByName = new Map>>() + for (const tc of toolCallsData as Array<{ name?: string; [key: string]: unknown }>) { + const normalizedName = stripCustomToolPrefix(tc.name || '') + if (!toolCallsByName.has(normalizedName)) { + toolCallsByName.set(normalizedName, []) + } + toolCallsByName.get(normalizedName)!.push(tc) + } + + const toolCallIndices = new Map() + span.children = timeSegments.map( ( segment: { @@ -259,14 +270,25 @@ export function buildTraceSpans(result: ExecutionResult): { } if (segment.type === 'tool') { - const matchingToolCall = toolCallsData.find( - (tc: { name?: string; [key: string]: unknown }) => - tc.name === segment.name || stripCustomToolPrefix(tc.name || '') === segment.name - ) + const normalizedName = stripCustomToolPrefix(segment.name || '') + + const toolCallsForName = toolCallsByName.get(normalizedName) || [] + const currentIndex = toolCallIndices.get(normalizedName) || 0 + const matchingToolCall = toolCallsForName[currentIndex] as + | { + error?: string + arguments?: Record + input?: Record + result?: Record + output?: Record + } + | undefined + + toolCallIndices.set(normalizedName, currentIndex + 1) return { id: `${span.id}-segment-${index}`, - name: stripCustomToolPrefix(segment.name || ''), + name: normalizedName, type: 'tool', duration: segment.duration, startTime: segmentStartTime,