fix(tracespans): update tracespans tool calls to accurately display inputs for successive identical tool calls (#3140)

2026-02-05 04:05:14 -05:00 · 2026-02-04 19:32:18 -08:00
parent 36ec68d93e
commit 2147309365
3 changed files with 252 additions and 82 deletions
--- a/apps/sim/lib/logs/execution/snapshot/service.test.ts
+++ b/apps/sim/lib/logs/execution/snapshot/service.test.ts
@@ -1,16 +1,11 @@
-import { beforeEach, describe, expect, test } from 'vitest'
+import { describe, expect, it } from 'vitest'
 import { SnapshotService } from '@/lib/logs/execution/snapshot/service'
 import type { WorkflowState } from '@/lib/logs/types'

 describe('SnapshotService', () => {
-  let service: SnapshotService
-
-  beforeEach(() => {
-    service = new SnapshotService()
-  })
-
  describe('computeStateHash', () => {
-    test('should generate consistent hashes for identical states', () => {
+    it.concurrent('should generate consistent hashes for identical states', () => {
+      const service = new SnapshotService()
      const state: WorkflowState = {
        blocks: {
          block1: {
@@ -39,7 +34,8 @@ describe('SnapshotService', () => {
      expect(hash1).toHaveLength(64) // SHA-256 hex string
    })

-    test('should ignore position changes', () => {
+    it.concurrent('should ignore position changes', () => {
+      const service = new SnapshotService()
      const baseState: WorkflowState = {
        blocks: {
          block1: {
@@ -77,7 +73,8 @@ describe('SnapshotService', () => {
      expect(hash1).toBe(hash2)
    })

-    test('should detect meaningful changes', () => {
+    it.concurrent('should detect meaningful changes', () => {
+      const service = new SnapshotService()
      const baseState: WorkflowState = {
        blocks: {
          block1: {
@@ -128,7 +125,8 @@ describe('SnapshotService', () => {
      expect(hash1).not.toBe(hash2)
    })

-    test('should handle edge order consistently', () => {
+    it.concurrent('should handle edge order consistently', () => {
+      const service = new SnapshotService()
      const state1: WorkflowState = {
        blocks: {},
        edges: [
@@ -155,7 +153,8 @@ describe('SnapshotService', () => {
      expect(hash1).toBe(hash2) // Should be same despite different order
    })

-    test('should handle empty states', () => {
+    it.concurrent('should handle empty states', () => {
+      const service = new SnapshotService()
      const emptyState: WorkflowState = {
        blocks: {},
        edges: [],
@@ -167,7 +166,8 @@ describe('SnapshotService', () => {
      expect(hash).toHaveLength(64)
    })

-    test('should handle complex nested structures', () => {
+    it.concurrent('should handle complex nested structures', () => {
+      const service = new SnapshotService()
      const complexState: WorkflowState = {
        blocks: {
          block1: {
@@ -224,7 +224,8 @@ describe('SnapshotService', () => {
      expect(hash).toBe(hash2)
    })

-    test('should include variables in hash computation', () => {
+    it.concurrent('should include variables in hash computation', () => {
+      const service = new SnapshotService()
      const stateWithVariables: WorkflowState = {
        blocks: {},
        edges: [],
@@ -253,7 +254,8 @@ describe('SnapshotService', () => {
      expect(hashWith).not.toBe(hashWithout)
    })

-    test('should detect changes in variable values', () => {
+    it.concurrent('should detect changes in variable values', () => {
+      const service = new SnapshotService()
      const state1: WorkflowState = {
        blocks: {},
        edges: [],
@@ -290,7 +292,8 @@ describe('SnapshotService', () => {
      expect(hash1).not.toBe(hash2)
    })

-    test('should generate consistent hashes for states with variables', () => {
+    it.concurrent('should generate consistent hashes for states with variables', () => {
+      const service = new SnapshotService()
      const stateWithVariables: WorkflowState = {
        blocks: {
          block1: {
--- a/apps/sim/lib/logs/execution/trace-spans/trace-spans.test.ts
+++ b/apps/sim/lib/logs/execution/trace-spans/trace-spans.test.ts
@@ -1,10 +1,10 @@
-import { describe, expect, test } from 'vitest'
+import { describe, expect, it } from 'vitest'
 import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans'
 import { stripCustomToolPrefix } from '@/executor/constants'
 import type { ExecutionResult } from '@/executor/types'

 describe('buildTraceSpans', () => {
-  test('should extract sequential segments from timeSegments data', () => {
+  it.concurrent('extracts sequential segments from timeSegments data', () => {
    const mockExecutionResult: ExecutionResult = {
      success: true,
      output: { content: 'Final output' },
@@ -119,7 +119,7 @@ describe('buildTraceSpans', () => {
    expect(segments[3].status).toBe('success')
  })

-  test('should fallback to toolCalls extraction when timeSegments not available', () => {
+  it.concurrent('falls back to toolCalls extraction when timeSegments not available', () => {
    const mockExecutionResult: ExecutionResult = {
      success: true,
      output: { content: 'Final output' },
@@ -194,60 +194,63 @@ describe('buildTraceSpans', () => {
    expect(secondToolCall.output).toEqual({ status: 200, data: 'response' })
  })

-  test('should extract tool calls from agent block output with direct toolCalls array format (fallback)', () => {
-    const mockExecutionResult: ExecutionResult = {
-      success: true,
-      output: { content: 'Final output' },
-      logs: [
-        {
-          blockId: 'agent-2',
-          blockName: 'Test Agent 2',
-          blockType: 'agent',
-          startedAt: '2024-01-01T10:00:00.000Z',
-          endedAt: '2024-01-01T10:00:03.000Z',
-          durationMs: 3000,
-          success: true,
-          input: { userPrompt: 'Test prompt' },
-          output: {
-            content: 'Agent response',
-            model: 'gpt-4o',
-            providerTiming: {
-              duration: 2500,
-              startTime: '2024-01-01T10:00:00.250Z',
-              endTime: '2024-01-01T10:00:02.750Z',
-              // No timeSegments - should fallback to toolCalls
-            },
-            toolCalls: [
-              {
-                name: 'serper_search',
-                arguments: { query: 'test search' },
-                result: { results: ['result1', 'result2'] },
-                duration: 1500,
-                startTime: '2024-01-01T10:00:00.500Z',
-                endTime: '2024-01-01T10:00:02.000Z',
+  it.concurrent(
+    'extracts tool calls from agent block output with direct toolCalls array format',
+    () => {
+      const mockExecutionResult: ExecutionResult = {
+        success: true,
+        output: { content: 'Final output' },
+        logs: [
+          {
+            blockId: 'agent-2',
+            blockName: 'Test Agent 2',
+            blockType: 'agent',
+            startedAt: '2024-01-01T10:00:00.000Z',
+            endedAt: '2024-01-01T10:00:03.000Z',
+            durationMs: 3000,
+            success: true,
+            input: { userPrompt: 'Test prompt' },
+            output: {
+              content: 'Agent response',
+              model: 'gpt-4o',
+              providerTiming: {
+                duration: 2500,
+                startTime: '2024-01-01T10:00:00.250Z',
+                endTime: '2024-01-01T10:00:02.750Z',
+                // No timeSegments - should fallback to toolCalls
              },
-            ],
+              toolCalls: [
+                {
+                  name: 'serper_search',
+                  arguments: { query: 'test search' },
+                  result: { results: ['result1', 'result2'] },
+                  duration: 1500,
+                  startTime: '2024-01-01T10:00:00.500Z',
+                  endTime: '2024-01-01T10:00:02.000Z',
+                },
+              ],
+            },
          },
-        },
-      ],
+        ],
+      }
+
+      const { traceSpans } = buildTraceSpans(mockExecutionResult)
+
+      expect(traceSpans).toHaveLength(1)
+      const agentSpan = traceSpans[0]
+      expect(agentSpan.toolCalls).toBeDefined()
+      expect(agentSpan.toolCalls).toHaveLength(1)
+
+      const toolCall = agentSpan.toolCalls![0]
+      expect(toolCall.name).toBe('serper_search')
+      expect(toolCall.duration).toBe(1500)
+      expect(toolCall.status).toBe('success')
+      expect(toolCall.input).toEqual({ query: 'test search' })
+      expect(toolCall.output).toEqual({ results: ['result1', 'result2'] })
    }
+  )

-    const { traceSpans } = buildTraceSpans(mockExecutionResult)
-
-    expect(traceSpans).toHaveLength(1)
-    const agentSpan = traceSpans[0]
-    expect(agentSpan.toolCalls).toBeDefined()
-    expect(agentSpan.toolCalls).toHaveLength(1)
-
-    const toolCall = agentSpan.toolCalls![0]
-    expect(toolCall.name).toBe('serper_search')
-    expect(toolCall.duration).toBe(1500)
-    expect(toolCall.status).toBe('success')
-    expect(toolCall.input).toEqual({ query: 'test search' })
-    expect(toolCall.output).toEqual({ results: ['result1', 'result2'] })
-  })
-
-  test('should extract tool calls from streaming response with executionData format (fallback)', () => {
+  it.concurrent('extracts tool calls from streaming response with executionData format', () => {
    const mockExecutionResult: ExecutionResult = {
      success: true,
      output: { content: 'Final output' },
@@ -301,7 +304,7 @@ describe('buildTraceSpans', () => {
    expect(toolCall.output).toEqual({ analysis: 'completed' })
  })

-  test('should handle tool calls with errors in timeSegments', () => {
+  it.concurrent('handles tool calls with errors in timeSegments', () => {
    const mockExecutionResult: ExecutionResult = {
      success: true,
      output: { content: 'Final output' },
@@ -380,7 +383,7 @@ describe('buildTraceSpans', () => {
    expect(toolSegment.output).toEqual({ error: 'Tool execution failed' })
  })

-  test('should handle blocks without tool calls', () => {
+  it.concurrent('handles blocks without tool calls', () => {
    const mockExecutionResult: ExecutionResult = {
      success: true,
      output: { content: 'Final output' },
@@ -407,7 +410,7 @@ describe('buildTraceSpans', () => {
    expect(textSpan.toolCalls).toBeUndefined()
  })

-  test('should handle complex multi-iteration agent execution with sequential segments', () => {
+  it.concurrent('handles complex multi-iteration agent execution with sequential segments', () => {
    // This test simulates a real agent execution with multiple tool calls and model iterations
    const mockExecutionResult: ExecutionResult = {
      success: true,
@@ -581,7 +584,7 @@ describe('buildTraceSpans', () => {
    expect(agentSpan.toolCalls).toBeUndefined()
  })

-  test('should flatten nested child workflow trace spans recursively', () => {
+  it.concurrent('flattens nested child workflow trace spans recursively', () => {
    const nestedChildSpan = {
      id: 'nested-workflow-span',
      name: 'Nested Workflow Block',
@@ -685,7 +688,7 @@ describe('buildTraceSpans', () => {
    expect(syntheticWrappers).toHaveLength(0)
  })

-  test('should handle nested child workflow errors with proper hierarchy', () => {
+  it.concurrent('handles nested child workflow errors with proper hierarchy', () => {
    const functionErrorSpan = {
      id: 'function-error-span',
      name: 'Function 1',
@@ -770,7 +773,7 @@ describe('buildTraceSpans', () => {
    expect((functionSpan?.output as { error?: string })?.error).toContain('Syntax Error')
  })

-  test('should remove childTraceSpans from output after integrating them as children', () => {
+  it.concurrent('removes childTraceSpans from output after integrating them as children', () => {
    const mockExecutionResult: ExecutionResult = {
      success: true,
      output: { result: 'parent output' },
@@ -843,15 +846,157 @@ describe('buildTraceSpans', () => {
      data: 'some result',
    })
  })
+
+  it.concurrent('matches multiple tool calls with same name by sequential order', () => {
+    // This test verifies that when an agent makes multiple calls to the same tool
+    // (e.g., search_tool called 3 times with different queries), each tool segment
+    // is matched to the correct tool call by their sequential order, not just by name.
+    const mockExecutionResult: ExecutionResult = {
+      success: true,
+      output: { content: 'Final output with multiple searches' },
+      logs: [
+        {
+          blockId: 'agent-multi-search',
+          blockName: 'Multi-Search Agent',
+          blockType: 'agent',
+          startedAt: '2024-01-01T10:00:00.000Z',
+          endedAt: '2024-01-01T10:00:10.000Z',
+          durationMs: 10000,
+          success: true,
+          input: { userPrompt: 'Search for multiple topics' },
+          output: {
+            content: 'Results from multiple searches',
+            model: 'gpt-4o',
+            tokens: { input: 50, output: 100, total: 150 },
+            providerTiming: {
+              duration: 10000,
+              startTime: '2024-01-01T10:00:00.000Z',
+              endTime: '2024-01-01T10:00:10.000Z',
+              timeSegments: [
+                {
+                  type: 'model',
+                  name: 'Initial response',
+                  startTime: 1704103200000, // 2024-01-01T10:00:00.000Z
+                  endTime: 1704103201000,
+                  duration: 1000,
+                },
+                {
+                  type: 'tool',
+                  name: 'search_tool',
+                  startTime: 1704103201000, // 2024-01-01T10:00:01.000Z
+                  endTime: 1704103202000,
+                  duration: 1000,
+                },
+                {
+                  type: 'model',
+                  name: 'Model response (iteration 1)',
+                  startTime: 1704103202000,
+                  endTime: 1704103203000,
+                  duration: 1000,
+                },
+                {
+                  type: 'tool',
+                  name: 'search_tool',
+                  startTime: 1704103203000, // 2024-01-01T10:00:03.000Z
+                  endTime: 1704103204500,
+                  duration: 1500,
+                },
+                {
+                  type: 'model',
+                  name: 'Model response (iteration 2)',
+                  startTime: 1704103204500,
+                  endTime: 1704103206000,
+                  duration: 1500,
+                },
+                {
+                  type: 'tool',
+                  name: 'search_tool',
+                  startTime: 1704103206000, // 2024-01-01T10:00:06.000Z
+                  endTime: 1704103208000,
+                  duration: 2000,
+                },
+                {
+                  type: 'model',
+                  name: 'Model response (iteration 3)',
+                  startTime: 1704103208000,
+                  endTime: 1704103210000,
+                  duration: 2000,
+                },
+              ],
+            },
+            toolCalls: {
+              list: [
+                {
+                  name: 'search_tool',
+                  arguments: { query: 'first query' },
+                  result: { results: ['first result'] },
+                  duration: 1000,
+                  startTime: '2024-01-01T10:00:01.000Z', // Matches first segment
+                  endTime: '2024-01-01T10:00:02.000Z',
+                },
+                {
+                  name: 'search_tool',
+                  arguments: { query: 'second query' },
+                  result: { results: ['second result'] },
+                  duration: 1500,
+                  startTime: '2024-01-01T10:00:03.000Z', // Matches second segment
+                  endTime: '2024-01-01T10:00:04.500Z',
+                },
+                {
+                  name: 'search_tool',
+                  arguments: { query: 'third query' },
+                  result: { results: ['third result'] },
+                  duration: 2000,
+                  startTime: '2024-01-01T10:00:06.000Z', // Matches third segment
+                  endTime: '2024-01-01T10:00:08.000Z',
+                },
+              ],
+              count: 3,
+            },
+          },
+        },
+      ],
+    }
+
+    const { traceSpans } = buildTraceSpans(mockExecutionResult)
+
+    expect(traceSpans).toHaveLength(1)
+    const agentSpan = traceSpans[0]
+    expect(agentSpan.children).toBeDefined()
+    expect(agentSpan.children).toHaveLength(7)
+
+    const segments = agentSpan.children!
+
+    // First search_tool call should have "first query"
+    const firstToolSegment = segments[1]
+    expect(firstToolSegment.name).toBe('search_tool')
+    expect(firstToolSegment.type).toBe('tool')
+    expect(firstToolSegment.input).toEqual({ query: 'first query' })
+    expect(firstToolSegment.output).toEqual({ results: ['first result'] })
+
+    // Second search_tool call should have "second query"
+    const secondToolSegment = segments[3]
+    expect(secondToolSegment.name).toBe('search_tool')
+    expect(secondToolSegment.type).toBe('tool')
+    expect(secondToolSegment.input).toEqual({ query: 'second query' })
+    expect(secondToolSegment.output).toEqual({ results: ['second result'] })
+
+    // Third search_tool call should have "third query"
+    const thirdToolSegment = segments[5]
+    expect(thirdToolSegment.name).toBe('search_tool')
+    expect(thirdToolSegment.type).toBe('tool')
+    expect(thirdToolSegment.input).toEqual({ query: 'third query' })
+    expect(thirdToolSegment.output).toEqual({ results: ['third result'] })
+  })
 })

 describe('stripCustomToolPrefix', () => {
-  test('should strip custom_ prefix from tool names', () => {
+  it.concurrent('strips custom_ prefix from tool names', () => {
    expect(stripCustomToolPrefix('custom_test_tool')).toBe('test_tool')
    expect(stripCustomToolPrefix('custom_analysis')).toBe('analysis')
  })

-  test('should leave non-custom tool names unchanged', () => {
+  it.concurrent('leaves non-custom tool names unchanged', () => {
    expect(stripCustomToolPrefix('http_request')).toBe('http_request')
    expect(stripCustomToolPrefix('serper_search')).toBe('serper_search')
    expect(stripCustomToolPrefix('regular_tool')).toBe('regular_tool')
--- a/apps/sim/lib/logs/execution/trace-spans/trace-spans.ts
+++ b/apps/sim/lib/logs/execution/trace-spans/trace-spans.ts
@@ -233,6 +233,17 @@ export function buildTraceSpans(result: ExecutionResult): {
      const timeSegments = log.output.providerTiming.timeSegments
      const toolCallsData = log.output?.toolCalls?.list || log.output?.toolCalls || []

+      const toolCallsByName = new Map<string, Array<Record<string, unknown>>>()
+      for (const tc of toolCallsData as Array<{ name?: string; [key: string]: unknown }>) {
+        const normalizedName = stripCustomToolPrefix(tc.name || '')
+        if (!toolCallsByName.has(normalizedName)) {
+          toolCallsByName.set(normalizedName, [])
+        }
+        toolCallsByName.get(normalizedName)!.push(tc)
+      }
+
+      const toolCallIndices = new Map<string, number>()
+
      span.children = timeSegments.map(
        (
          segment: {
@@ -259,14 +270,25 @@ export function buildTraceSpans(result: ExecutionResult): {
          }

          if (segment.type === 'tool') {
-            const matchingToolCall = toolCallsData.find(
-              (tc: { name?: string; [key: string]: unknown }) =>
-                tc.name === segment.name || stripCustomToolPrefix(tc.name || '') === segment.name
-            )
+            const normalizedName = stripCustomToolPrefix(segment.name || '')
+
+            const toolCallsForName = toolCallsByName.get(normalizedName) || []
+            const currentIndex = toolCallIndices.get(normalizedName) || 0
+            const matchingToolCall = toolCallsForName[currentIndex] as
+              | {
+                  error?: string
+                  arguments?: Record<string, unknown>
+                  input?: Record<string, unknown>
+                  result?: Record<string, unknown>
+                  output?: Record<string, unknown>
+                }
+              | undefined
+
+            toolCallIndices.set(normalizedName, currentIndex + 1)

            return {
              id: `${span.id}-segment-${index}`,
-              name: stripCustomToolPrefix(segment.name || ''),
+              name: normalizedName,
              type: 'tool',
              duration: segment.duration,
              startTime: segmentStartTime,