Modify executor to treat evaluator as an 'output' block instead of a 'router' block, looping logic still needs to be updated but forward path works

This commit is contained in:
Waleed Latif
2025-02-13 00:59:23 -08:00
parent e2a5e39b0b
commit af323a71ab
5 changed files with 222 additions and 350 deletions

View File

@@ -3,14 +3,13 @@ import { ToolResponse } from '@/tools/types'
import { MODEL_TOOLS, ModelType } from '../consts'
import { BlockConfig, ParamType } from '../types'
interface TargetBlock {
id: string
type?: string
title?: string
description?: string
category?: string
subBlocks?: Record<string, any>
currentState?: any
interface Metric {
name: string
description: string
range: {
min: number
max: number
}
}
interface EvaluatorResponse extends ToolResponse {
@@ -22,111 +21,39 @@ interface EvaluatorResponse extends ToolResponse {
completion?: number
total?: number
}
evaluation: {
score: number
reasoning: string
metrics: Record<string, number>
}
selectedPath: {
blockId: string
blockType: string
blockTitle: string
}
justification: string
history: Array<{ response: string; justification: string }>
[metricName: string]: any // Allow dynamic metric fields
}
}
export const generateEvaluatorPrompt = (
evaluationCriteria: string,
content: string,
targetBlocks?: TargetBlock[],
history?: Array<{ response: string; justification: string }>
): string => {
const basePrompt = `You are an objective evaluation agent. Analyze the content against the provided criteria and determine the next step based on the evaluation score.
export const generateEvaluatorPrompt = (metrics: Metric[], content: string): string => {
const metricsDescription = metrics
.map(
(metric) => `${metric.name} (${metric.range.min}-${metric.range.max}): ${metric.description}`
)
.join('\n')
return `You are an objective evaluation agent. Analyze the content against the provided metrics and provide detailed scoring.
Evaluation Instructions:
1. Score the content (0 to 1) using these metrics:
- Accuracy: How well does it meet requirements?
- Completeness: Are all aspects addressed?
- Quality: Is it clear and professional?
- Relevance: Does it match the criteria?
- For each metric, provide a numeric score within the specified range
- Your response must be a valid JSON object with each metric as a number field
- Do not include explanations in the JSON - only numeric scores
2. Calculate final score:
- Average all metrics
- Round to 2 decimal places${
history && history.length > 0
? `
Metrics to evaluate:
${metricsDescription}
Previous Attempts:
${history
.map(
(entry, i) => `
Attempt ${i + 1}:
Response: ${entry.response}
Evaluation: ${entry.justification}
---`
)
.join('\n')}`
: ''
}
Content:
${content}
Criteria:
${evaluationCriteria}`
// If no target blocks, just return the evaluation without routing
if (!targetBlocks || targetBlocks.length === 0) {
return `${basePrompt}
IMPORTANT: When there are no target blocks, you must use exactly "end" as the decision value. Do not use any other word.
Response Format:
Return a JSON object with the following structure:
{
"decision": "end", // You must use exactly "end" here - this is a required system keyword
"justification": "Brief explanation of the pure evaluation of the content. DO NOT include any information about the target blocks."
Content to evaluate:
${content}`
}
Remember:
1. Your response must be ONLY the JSON object - no additional text, formatting, or explanation.
2. The "decision" field MUST be exactly "end" - this is a required keyword that the system expects.`
}
const targetBlocksInfo = `
Available Destinations:
${targetBlocks
.map(
(block) => `
ID: ${block.id}
Type: ${block.type}
Title: ${block.title}
Description: ${block.description}`
)
.join('\n---\n')}
Routing Rules:
${
targetBlocks.length === 1
? `- Route back to the only available block (${targetBlocks[0].id}) to continue the iteration`
: `- Score greater than or equal to 0.85: Choose success path block
- Score less than 0.85: Choose failure path block`
}`
return `${basePrompt}${targetBlocksInfo}
Response Format:
Return a JSON object with the following structure:
{
"decision": "block-id-here",
"justification": "Brief explanation of the pure evaluation of the content. DO NOT include any information about the target blocks."
}
Remember: Your response must be ONLY the JSON object - no additional text, formatting, or explanation.
If there is only one available destination, return that block's ID in the decision field regardless of the score.`
}
// Simplified response format generator that matches the agent block schema structure
const generateResponseFormat = (metrics: Metric[]) => ({
fields: metrics.map((metric) => ({
name: metric.name,
type: 'number',
description: `${metric.description} (Score between ${metric.range.min}-${metric.range.max})`,
})),
})
export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
type: 'evaluator',
@@ -162,11 +89,46 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
},
workflow: {
inputs: {
prompt: { type: 'string' as ParamType, required: true },
metrics: {
type: 'json' as ParamType,
required: true,
description: 'Array of metrics to evaluate against',
schema: {
type: 'array',
properties: {},
items: {
type: 'object',
properties: {
name: {
type: 'string',
description: 'Name of the metric',
},
description: {
type: 'string',
description: 'Description of what this metric measures',
},
range: {
type: 'object',
properties: {
min: {
type: 'number',
description: 'Minimum possible score',
},
max: {
type: 'number',
description: 'Maximum possible score',
},
},
required: ['min', 'max'],
},
},
required: ['name', 'description', 'range'],
},
},
},
model: { type: 'string' as ParamType, required: true },
apiKey: { type: 'string' as ParamType, required: true },
content: { type: 'string' as ParamType, required: true },
history: { type: 'json' as ParamType, required: false },
},
outputs: {
response: {
@@ -174,17 +136,24 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
content: 'string',
model: 'string',
tokens: 'any',
evaluation: 'json',
selectedPath: 'json',
justification: 'string',
history: 'json',
},
dependsOn: {
subBlockId: 'metrics',
condition: {
whenEmpty: {
content: 'string',
model: 'string',
tokens: 'any',
},
whenFilled: 'json',
},
},
},
},
subBlocks: [
{
id: 'prompt',
title: 'Evaluation Criteria',
id: 'metrics',
title: 'Evaluation Metrics',
type: 'eval-input',
layout: 'full',
},
@@ -218,12 +187,14 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
layout: 'full',
hidden: true,
value: (params: Record<string, any>) => {
return generateEvaluatorPrompt(
params.prompt || '',
params.content || '',
undefined,
params.history || []
)
const metrics = params.metrics || []
const content = params.content || ''
const responseFormat = generateResponseFormat(metrics)
return JSON.stringify({
systemPrompt: generateEvaluatorPrompt(metrics, content),
responseFormat,
})
},
},
],