improve UI

This commit is contained in:
Vikhyath Mondreti
2025-07-12 13:59:42 -07:00
parent 027614f373
commit 6967ac0417
3 changed files with 522 additions and 101 deletions

View File

@@ -95,6 +95,70 @@ function transformBlockData(data: any, blockType: string, isInput: boolean) {
return data
}
// Collapsible Input/Output component
interface CollapsibleInputOutputProps {
span: TraceSpan
spanId: string
}
function CollapsibleInputOutput({ span, spanId }: CollapsibleInputOutputProps) {
const [inputExpanded, setInputExpanded] = useState(false)
const [outputExpanded, setOutputExpanded] = useState(false)
return (
<div className='mt-2 mr-4 mb-4 ml-8 space-y-3 overflow-hidden'>
{/* Input Data - Collapsible */}
{span.input && (
<div>
<button
onClick={() => setInputExpanded(!inputExpanded)}
className='flex items-center gap-2 mb-2 font-medium text-muted-foreground text-xs hover:text-foreground transition-colors'
>
{inputExpanded ? (
<ChevronDown className='h-3 w-3' />
) : (
<ChevronRight className='h-3 w-3' />
)}
Input
</button>
{inputExpanded && (
<div className='mb-2 overflow-hidden rounded-md bg-secondary/30 p-3'>
<BlockDataDisplay data={span.input} blockType={span.type} isInput={true} />
</div>
)}
</div>
)}
{/* Output Data - Collapsible */}
{span.output && (
<div>
<button
onClick={() => setOutputExpanded(!outputExpanded)}
className='flex items-center gap-2 mb-2 font-medium text-muted-foreground text-xs hover:text-foreground transition-colors'
>
{outputExpanded ? (
<ChevronDown className='h-3 w-3' />
) : (
<ChevronRight className='h-3 w-3' />
)}
{span.status === 'error' ? 'Error Details' : 'Output'}
</button>
{outputExpanded && (
<div className='mb-2 overflow-hidden rounded-md bg-secondary/30 p-3'>
<BlockDataDisplay
data={span.output}
blockType={span.type}
isInput={false}
isError={span.status === 'error'}
/>
</div>
)}
</div>
)}
</div>
)
}
// Component to display block input/output data in a clean, readable format
function BlockDataDisplay({
data,
@@ -544,37 +608,8 @@ function TraceSpanItem({
{/* Expanded content */}
{expanded && (
<div>
{/* Block Input/Output Data */}
{(span.input || span.output) && (
<div className='mt-2 mr-4 mb-4 ml-8 space-y-3 overflow-hidden'>
{/* Input Data */}
{span.input && (
<div>
<h4 className='mb-2 font-medium text-muted-foreground text-xs'>Input</h4>
<div className='mb-2 overflow-hidden rounded-md bg-secondary/30 p-3'>
<BlockDataDisplay data={span.input} blockType={span.type} isInput={true} />
</div>
</div>
)}
{/* Output Data */}
{span.output && (
<div>
<h4 className='mb-2 font-medium text-muted-foreground text-xs'>
{span.status === 'error' ? 'Error Details' : 'Output'}
</h4>
<div className='mb-2 overflow-hidden rounded-md bg-secondary/30 p-3'>
<BlockDataDisplay
data={span.output}
blockType={span.type}
isInput={false}
isError={span.status === 'error'}
/>
</div>
</div>
)}
</div>
)}
{/* Block Input/Output Data - Collapsible */}
{(span.input || span.output) && <CollapsibleInputOutput span={span} spanId={spanId} />}
{/* Children and tool calls */}
{/* Render child spans */}

View File

@@ -3,7 +3,122 @@ import type { ExecutionResult } from '@/executor/types'
import { buildTraceSpans, stripCustomToolPrefix } from './trace-spans'
describe('buildTraceSpans', () => {
test('should extract tool calls from agent block output with toolCalls.list format', () => {
test('should extract sequential segments from timeSegments data', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
logs: [
{
blockId: 'agent-1',
blockName: 'Test Agent',
blockType: 'agent',
startedAt: '2024-01-01T10:00:00.000Z',
endedAt: '2024-01-01T10:00:08.000Z',
durationMs: 8000,
success: true,
input: { userPrompt: 'Test prompt' },
output: {
content: 'Agent response',
model: 'gpt-4o',
tokens: { prompt: 10, completion: 20, total: 30 },
providerTiming: {
duration: 8000,
startTime: '2024-01-01T10:00:00.000Z',
endTime: '2024-01-01T10:00:08.000Z',
timeSegments: [
{
type: 'model',
name: 'Initial response',
startTime: 1704103200000, // 2024-01-01T10:00:00.000Z
endTime: 1704103201000, // 2024-01-01T10:00:01.000Z
duration: 1000,
},
{
type: 'tool',
name: 'custom_test_tool',
startTime: 1704103201000, // 2024-01-01T10:00:01.000Z
endTime: 1704103203000, // 2024-01-01T10:00:03.000Z
duration: 2000,
},
{
type: 'tool',
name: 'http_request',
startTime: 1704103203000, // 2024-01-01T10:00:03.000Z
endTime: 1704103206000, // 2024-01-01T10:00:06.000Z
duration: 3000,
},
{
type: 'model',
name: 'Model response (iteration 1)',
startTime: 1704103206000, // 2024-01-01T10:00:06.000Z
endTime: 1704103208000, // 2024-01-01T10:00:08.000Z
duration: 2000,
},
],
},
toolCalls: {
list: [
{
name: 'custom_test_tool',
arguments: { input: 'test input' },
result: { output: 'test output' },
duration: 2000,
},
{
name: 'http_request',
arguments: { url: 'https://api.example.com' },
result: { status: 200, data: 'response' },
duration: 3000,
},
],
count: 2,
},
},
},
],
}
const { traceSpans } = buildTraceSpans(mockExecutionResult)
expect(traceSpans).toHaveLength(1)
const agentSpan = traceSpans[0]
expect(agentSpan.type).toBe('agent')
expect(agentSpan.children).toBeDefined()
expect(agentSpan.children).toHaveLength(4)
// Check sequential segments
const segments = agentSpan.children!
// First segment: Initial model response
expect(segments[0].name).toBe('Initial response')
expect(segments[0].type).toBe('model')
expect(segments[0].duration).toBe(1000)
expect(segments[0].status).toBe('success')
// Second segment: First tool call
expect(segments[1].name).toBe('test_tool') // custom_ prefix should be stripped
expect(segments[1].type).toBe('tool')
expect(segments[1].duration).toBe(2000)
expect(segments[1].status).toBe('success')
expect(segments[1].input).toEqual({ input: 'test input' })
expect(segments[1].output).toEqual({ output: 'test output' })
// Third segment: Second tool call
expect(segments[2].name).toBe('http_request')
expect(segments[2].type).toBe('tool')
expect(segments[2].duration).toBe(3000)
expect(segments[2].status).toBe('success')
expect(segments[2].input).toEqual({ url: 'https://api.example.com' })
expect(segments[2].output).toEqual({ status: 200, data: 'response' })
// Fourth segment: Final model response
expect(segments[3].name).toBe('Model response (iteration 1)')
expect(segments[3].type).toBe('model')
expect(segments[3].duration).toBe(2000)
expect(segments[3].status).toBe('success')
})
test('should fallback to toolCalls extraction when timeSegments not available', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -25,6 +140,7 @@ describe('buildTraceSpans', () => {
duration: 4000,
startTime: '2024-01-01T10:00:00.500Z',
endTime: '2024-01-01T10:00:04.500Z',
// No timeSegments - should fallback to toolCalls
},
toolCalls: {
list: [
@@ -77,7 +193,7 @@ describe('buildTraceSpans', () => {
expect(secondToolCall.output).toEqual({ status: 200, data: 'response' })
})
test('should extract tool calls from agent block output with direct toolCalls array format', () => {
test('should extract tool calls from agent block output with direct toolCalls array format (fallback)', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -98,6 +214,7 @@ describe('buildTraceSpans', () => {
duration: 2500,
startTime: '2024-01-01T10:00:00.250Z',
endTime: '2024-01-01T10:00:02.750Z',
// No timeSegments - should fallback to toolCalls
},
toolCalls: [
{
@@ -129,7 +246,7 @@ describe('buildTraceSpans', () => {
expect(toolCall.output).toEqual({ results: ['result1', 'result2'] })
})
test('should extract tool calls from streaming response with executionData format', () => {
test('should extract tool calls from streaming response with executionData format (fallback)', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -146,6 +263,7 @@ describe('buildTraceSpans', () => {
output: {
content: 'Agent response',
model: 'gpt-4o',
// No providerTiming - should fallback to executionData
executionData: {
output: {
toolCalls: {
@@ -182,7 +300,7 @@ describe('buildTraceSpans', () => {
expect(toolCall.output).toEqual({ analysis: 'completed' })
})
test('should handle tool calls with errors', () => {
test('should handle tool calls with errors in timeSegments', () => {
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final output' },
@@ -192,13 +310,41 @@ describe('buildTraceSpans', () => {
blockName: 'Error Agent',
blockType: 'agent',
startedAt: '2024-01-01T10:00:00.000Z',
endedAt: '2024-01-01T10:00:02.000Z',
durationMs: 2000,
endedAt: '2024-01-01T10:00:03.000Z',
durationMs: 3000,
success: true,
input: { userPrompt: 'Test prompt' },
output: {
content: 'Agent response',
model: 'gpt-4o',
providerTiming: {
duration: 3000,
startTime: '2024-01-01T10:00:00.000Z',
endTime: '2024-01-01T10:00:03.000Z',
timeSegments: [
{
type: 'model',
name: 'Initial response',
startTime: 1704103200000, // 2024-01-01T10:00:00.000Z
endTime: 1704103201000, // 2024-01-01T10:00:01.000Z
duration: 1000,
},
{
type: 'tool',
name: 'failing_tool',
startTime: 1704103201000, // 2024-01-01T10:00:01.000Z
endTime: 1704103202000, // 2024-01-01T10:00:02.000Z
duration: 1000,
},
{
type: 'model',
name: 'Model response (iteration 1)',
startTime: 1704103202000, // 2024-01-01T10:00:02.000Z
endTime: 1704103203000, // 2024-01-01T10:00:03.000Z
duration: 1000,
},
],
},
toolCalls: {
list: [
{
@@ -206,8 +352,8 @@ describe('buildTraceSpans', () => {
arguments: { input: 'test' },
error: 'Tool execution failed',
duration: 1000,
startTime: '2024-01-01T10:00:00.500Z',
endTime: '2024-01-01T10:00:01.500Z',
startTime: '2024-01-01T10:00:01.000Z',
endTime: '2024-01-01T10:00:02.000Z',
},
],
count: 1,
@@ -221,14 +367,16 @@ describe('buildTraceSpans', () => {
expect(traceSpans).toHaveLength(1)
const agentSpan = traceSpans[0]
expect(agentSpan.toolCalls).toBeDefined()
expect(agentSpan.toolCalls).toHaveLength(1)
expect(agentSpan.children).toBeDefined()
expect(agentSpan.children).toHaveLength(3)
const toolCall = agentSpan.toolCalls![0]
expect(toolCall.name).toBe('failing_tool')
expect(toolCall.status).toBe('error')
expect(toolCall.error).toBe('Tool execution failed')
expect(toolCall.input).toEqual({ input: 'test' })
// Check the tool segment with error
const toolSegment = agentSpan.children![1]
expect(toolSegment.name).toBe('failing_tool')
expect(toolSegment.type).toBe('tool')
expect(toolSegment.status).toBe('error')
expect(toolSegment.input).toEqual({ input: 'test' })
expect(toolSegment.output).toEqual({ error: 'Tool execution failed' })
})
test('should handle blocks without tool calls', () => {
@@ -257,6 +405,180 @@ describe('buildTraceSpans', () => {
expect(textSpan.type).toBe('text')
expect(textSpan.toolCalls).toBeUndefined()
})
test('should handle complex multi-iteration agent execution with sequential segments', () => {
// This test simulates a real agent execution with multiple tool calls and model iterations
const mockExecutionResult: ExecutionResult = {
success: true,
output: { content: 'Final comprehensive response' },
logs: [
{
blockId: 'agent-complex',
blockName: 'Multi-Tool Agent',
blockType: 'agent',
startedAt: '2024-01-01T10:00:00.000Z',
endedAt: '2024-01-01T10:00:15.000Z',
durationMs: 15000,
success: true,
input: { userPrompt: 'Research and analyze tennis news' },
output: {
content: 'Based on my research using multiple sources...',
model: 'gpt-4o',
tokens: { prompt: 50, completion: 200, total: 250 },
cost: { total: 0.0025, prompt: 0.001, completion: 0.0015 },
providerTiming: {
duration: 15000,
startTime: '2024-01-01T10:00:00.000Z',
endTime: '2024-01-01T10:00:15.000Z',
modelTime: 8000,
toolsTime: 6500,
iterations: 2,
firstResponseTime: 1500,
timeSegments: [
{
type: 'model',
name: 'Initial response',
startTime: 1704103200000, // 2024-01-01T10:00:00.000Z
endTime: 1704103201500, // 2024-01-01T10:00:01.500Z
duration: 1500,
},
{
type: 'tool',
name: 'exa_search',
startTime: 1704103201500, // 2024-01-01T10:00:01.500Z
endTime: 1704103204000, // 2024-01-01T10:00:04.000Z
duration: 2500,
},
{
type: 'tool',
name: 'custom_analysis_tool',
startTime: 1704103204000, // 2024-01-01T10:00:04.000Z
endTime: 1704103208000, // 2024-01-01T10:00:08.000Z
duration: 4000,
},
{
type: 'model',
name: 'Model response (iteration 1)',
startTime: 1704103208000, // 2024-01-01T10:00:08.000Z
endTime: 1704103211500, // 2024-01-01T10:00:11.500Z
duration: 3500,
},
{
type: 'tool',
name: 'http_request',
startTime: 1704103211500, // 2024-01-01T10:00:11.500Z
endTime: 1704103213500, // 2024-01-01T10:00:13.500Z
duration: 2000,
},
{
type: 'model',
name: 'Model response (iteration 2)',
startTime: 1704103213500, // 2024-01-01T10:00:13.500Z
endTime: 1704103215000, // 2024-01-01T10:00:15.000Z
duration: 1500,
},
],
},
toolCalls: {
list: [
{
name: 'exa_search',
arguments: { query: 'tennis news 2024', apiKey: 'secret-key' },
result: { results: [{ title: 'Tennis News 1' }, { title: 'Tennis News 2' }] },
duration: 2500,
},
{
name: 'custom_analysis_tool',
arguments: { data: 'tennis data', mode: 'comprehensive' },
result: { analysis: 'Detailed tennis analysis', confidence: 0.95 },
duration: 4000,
},
{
name: 'http_request',
arguments: {
url: 'https://api.tennis.com/stats',
headers: { authorization: 'Bearer token' },
},
result: { status: 200, data: { stats: 'tennis statistics' } },
duration: 2000,
},
],
count: 3,
},
},
},
],
}
const { traceSpans } = buildTraceSpans(mockExecutionResult)
expect(traceSpans).toHaveLength(1)
const agentSpan = traceSpans[0]
// Verify agent span properties
expect(agentSpan.type).toBe('agent')
expect(agentSpan.name).toBe('Multi-Tool Agent')
expect(agentSpan.duration).toBe(15000)
expect(agentSpan.children).toBeDefined()
expect(agentSpan.children).toHaveLength(6) // 2 model + 3 tool + 1 model = 6 segments
const segments = agentSpan.children!
// Verify sequential execution flow
// 1. Initial model response
expect(segments[0].name).toBe('Initial response')
expect(segments[0].type).toBe('model')
expect(segments[0].duration).toBe(1500)
expect(segments[0].status).toBe('success')
// 2. First tool call - exa_search
expect(segments[1].name).toBe('exa_search')
expect(segments[1].type).toBe('tool')
expect(segments[1].duration).toBe(2500)
expect(segments[1].status).toBe('success')
expect(segments[1].input).toEqual({ query: 'tennis news 2024', apiKey: 'secret-key' })
expect(segments[1].output).toEqual({
results: [{ title: 'Tennis News 1' }, { title: 'Tennis News 2' }],
})
// 3. Second tool call - analysis_tool (custom_ prefix stripped)
expect(segments[2].name).toBe('analysis_tool')
expect(segments[2].type).toBe('tool')
expect(segments[2].duration).toBe(4000)
expect(segments[2].status).toBe('success')
expect(segments[2].input).toEqual({ data: 'tennis data', mode: 'comprehensive' })
expect(segments[2].output).toEqual({ analysis: 'Detailed tennis analysis', confidence: 0.95 })
// 4. First iteration model response
expect(segments[3].name).toBe('Model response (iteration 1)')
expect(segments[3].type).toBe('model')
expect(segments[3].duration).toBe(3500)
expect(segments[3].status).toBe('success')
// 5. Third tool call - http_request
expect(segments[4].name).toBe('http_request')
expect(segments[4].type).toBe('tool')
expect(segments[4].duration).toBe(2000)
expect(segments[4].status).toBe('success')
expect(segments[4].input).toEqual({
url: 'https://api.tennis.com/stats',
headers: { authorization: 'Bearer token' },
})
expect(segments[4].output).toEqual({ status: 200, data: { stats: 'tennis statistics' } })
// 6. Final iteration model response
expect(segments[5].name).toBe('Model response (iteration 2)')
expect(segments[5].type).toBe('model')
expect(segments[5].duration).toBe(1500)
expect(segments[5].status).toBe('success')
// Verify timing alignment
const totalSegmentTime = segments.reduce((sum, segment) => sum + segment.duration, 0)
expect(totalSegmentTime).toBe(15000) // Should match total agent duration
// Verify no toolCalls property exists (since we're using children instead)
expect(agentSpan.toolCalls).toBeUndefined()
})
})
describe('stripCustomToolPrefix', () => {

View File

@@ -115,71 +115,135 @@ export function buildTraceSpans(result: ExecutionResult): {
})
}
// Always extract tool calls if they exist (regardless of provider timing)
// Tool calls handling for different formats:
// 1. Standard format in response.toolCalls.list
// 2. Direct toolCalls array in response
// 3. Streaming response formats with executionData
// Enhanced approach: Use timeSegments for sequential flow if available
// This provides the actual model→tool→model execution sequence
if (
log.output?.providerTiming?.timeSegments &&
Array.isArray(log.output.providerTiming.timeSegments)
) {
const timeSegments = log.output.providerTiming.timeSegments
const toolCallsData = log.output?.toolCalls?.list || log.output?.toolCalls || []
// Check all possible paths for toolCalls
let toolCallsList = null
// Create child spans for each time segment
span.children = timeSegments.map((segment: any, index: number) => {
const segmentStartTime = new Date(segment.startTime).toISOString()
const segmentEndTime = new Date(segment.endTime).toISOString()
// Wrap extraction in try-catch to handle unexpected toolCalls formats
try {
if (log.output?.toolCalls?.list) {
// Standard format with list property
toolCallsList = log.output.toolCalls.list
} else if (Array.isArray(log.output?.toolCalls)) {
// Direct array format
toolCallsList = log.output.toolCalls
} else if (log.output?.executionData?.output?.toolCalls) {
// Streaming format with executionData
const tcObj = log.output.executionData.output.toolCalls
toolCallsList = Array.isArray(tcObj) ? tcObj : tcObj.list || []
}
if (segment.type === 'tool') {
// Find matching tool call data for this segment
const matchingToolCall = toolCallsData.find(
(tc: any) => tc.name === segment.name || stripCustomToolPrefix(tc.name) === segment.name
)
// Validate that toolCallsList is actually an array before processing
if (toolCallsList && !Array.isArray(toolCallsList)) {
logger.warn(`toolCallsList is not an array: ${typeof toolCallsList}`, {
return {
id: `${span.id}-segment-${index}`,
name: stripCustomToolPrefix(segment.name),
type: 'tool',
duration: segment.duration,
startTime: segmentStartTime,
endTime: segmentEndTime,
status: matchingToolCall?.error ? 'error' : 'success',
input: matchingToolCall?.arguments || matchingToolCall?.input,
output: matchingToolCall?.error
? {
error: matchingToolCall.error,
...(matchingToolCall.result || matchingToolCall.output || {}),
}
: matchingToolCall?.result || matchingToolCall?.output,
}
}
// Model segment
return {
id: `${span.id}-segment-${index}`,
name: segment.name,
type: 'model',
duration: segment.duration,
startTime: segmentStartTime,
endTime: segmentEndTime,
status: 'success',
}
})
logger.debug(
`Created ${span.children?.length || 0} sequential segments for span ${span.id}`,
{
blockId: log.blockId,
blockType: log.blockType,
})
toolCallsList = []
segments:
span.children?.map((child) => ({
name: child.name,
type: child.type,
duration: child.duration,
})) || [],
}
)
} else {
// Fallback: Extract tool calls using the original approach for backwards compatibility
// Tool calls handling for different formats:
// 1. Standard format in response.toolCalls.list
// 2. Direct toolCalls array in response
// 3. Streaming response formats with executionData
// Check all possible paths for toolCalls
let toolCallsList = null
// Wrap extraction in try-catch to handle unexpected toolCalls formats
try {
if (log.output?.toolCalls?.list) {
// Standard format with list property
toolCallsList = log.output.toolCalls.list
} else if (Array.isArray(log.output?.toolCalls)) {
// Direct array format
toolCallsList = log.output.toolCalls
} else if (log.output?.executionData?.output?.toolCalls) {
// Streaming format with executionData
const tcObj = log.output.executionData.output.toolCalls
toolCallsList = Array.isArray(tcObj) ? tcObj : tcObj.list || []
}
// Validate that toolCallsList is actually an array before processing
if (toolCallsList && !Array.isArray(toolCallsList)) {
logger.warn(`toolCallsList is not an array: ${typeof toolCallsList}`, {
blockId: log.blockId,
blockType: log.blockType,
})
toolCallsList = []
}
} catch (error) {
logger.error(`Error extracting toolCalls from block ${log.blockId}:`, error)
toolCallsList = [] // Set to empty array as fallback
}
} catch (error) {
logger.error(`Error extracting toolCalls from block ${log.blockId}:`, error)
toolCallsList = [] // Set to empty array as fallback
}
if (toolCallsList && toolCallsList.length > 0) {
span.toolCalls = toolCallsList
.map((tc: any) => {
// Add null check for each tool call
if (!tc) return null
if (toolCallsList && toolCallsList.length > 0) {
span.toolCalls = toolCallsList
.map((tc: any) => {
// Add null check for each tool call
if (!tc) return null
try {
return {
name: stripCustomToolPrefix(tc.name || 'unnamed-tool'),
duration: tc.duration || 0,
startTime: tc.startTime || log.startedAt,
endTime: tc.endTime || log.endedAt,
status: tc.error ? 'error' : 'success',
input: tc.arguments || tc.input,
output: tc.result || tc.output,
error: tc.error,
try {
return {
name: stripCustomToolPrefix(tc.name || 'unnamed-tool'),
duration: tc.duration || 0,
startTime: tc.startTime || log.startedAt,
endTime: tc.endTime || log.endedAt,
status: tc.error ? 'error' : 'success',
input: tc.arguments || tc.input,
output: tc.result || tc.output,
error: tc.error,
}
} catch (tcError) {
logger.error(`Error processing tool call in block ${log.blockId}:`, tcError)
return null
}
} catch (tcError) {
logger.error(`Error processing tool call in block ${log.blockId}:`, tcError)
return null
}
})
.filter(Boolean) // Remove any null entries from failed processing
})
.filter(Boolean) // Remove any null entries from failed processing
logger.debug(`Added ${span.toolCalls?.length || 0} tool calls to span ${span.id}`, {
blockId: log.blockId,
blockType: log.blockType,
toolCallNames: span.toolCalls?.map((tc) => tc.name) || [],
})
logger.debug(`Added ${span.toolCalls?.length || 0} tool calls to span ${span.id}`, {
blockId: log.blockId,
blockType: log.blockType,
toolCallNames: span.toolCalls?.map((tc) => tc.name) || [],
})
}
}
// Store in map