mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-28 03:00:29 -04:00
Modify executor to treat evaluator as an 'output' block instead of a 'router' block, looping logic still needs to be updated but forward path works
This commit is contained in:
@@ -3,14 +3,13 @@ import { ToolResponse } from '@/tools/types'
|
||||
import { MODEL_TOOLS, ModelType } from '../consts'
|
||||
import { BlockConfig, ParamType } from '../types'
|
||||
|
||||
interface TargetBlock {
|
||||
id: string
|
||||
type?: string
|
||||
title?: string
|
||||
description?: string
|
||||
category?: string
|
||||
subBlocks?: Record<string, any>
|
||||
currentState?: any
|
||||
interface Metric {
|
||||
name: string
|
||||
description: string
|
||||
range: {
|
||||
min: number
|
||||
max: number
|
||||
}
|
||||
}
|
||||
|
||||
interface EvaluatorResponse extends ToolResponse {
|
||||
@@ -22,111 +21,39 @@ interface EvaluatorResponse extends ToolResponse {
|
||||
completion?: number
|
||||
total?: number
|
||||
}
|
||||
evaluation: {
|
||||
score: number
|
||||
reasoning: string
|
||||
metrics: Record<string, number>
|
||||
}
|
||||
selectedPath: {
|
||||
blockId: string
|
||||
blockType: string
|
||||
blockTitle: string
|
||||
}
|
||||
justification: string
|
||||
history: Array<{ response: string; justification: string }>
|
||||
[metricName: string]: any // Allow dynamic metric fields
|
||||
}
|
||||
}
|
||||
|
||||
export const generateEvaluatorPrompt = (
|
||||
evaluationCriteria: string,
|
||||
content: string,
|
||||
targetBlocks?: TargetBlock[],
|
||||
history?: Array<{ response: string; justification: string }>
|
||||
): string => {
|
||||
const basePrompt = `You are an objective evaluation agent. Analyze the content against the provided criteria and determine the next step based on the evaluation score.
|
||||
export const generateEvaluatorPrompt = (metrics: Metric[], content: string): string => {
|
||||
const metricsDescription = metrics
|
||||
.map(
|
||||
(metric) => `${metric.name} (${metric.range.min}-${metric.range.max}): ${metric.description}`
|
||||
)
|
||||
.join('\n')
|
||||
|
||||
return `You are an objective evaluation agent. Analyze the content against the provided metrics and provide detailed scoring.
|
||||
|
||||
Evaluation Instructions:
|
||||
1. Score the content (0 to 1) using these metrics:
|
||||
- Accuracy: How well does it meet requirements?
|
||||
- Completeness: Are all aspects addressed?
|
||||
- Quality: Is it clear and professional?
|
||||
- Relevance: Does it match the criteria?
|
||||
- For each metric, provide a numeric score within the specified range
|
||||
- Your response must be a valid JSON object with each metric as a number field
|
||||
- Do not include explanations in the JSON - only numeric scores
|
||||
|
||||
2. Calculate final score:
|
||||
- Average all metrics
|
||||
- Round to 2 decimal places${
|
||||
history && history.length > 0
|
||||
? `
|
||||
Metrics to evaluate:
|
||||
${metricsDescription}
|
||||
|
||||
Previous Attempts:
|
||||
${history
|
||||
.map(
|
||||
(entry, i) => `
|
||||
Attempt ${i + 1}:
|
||||
Response: ${entry.response}
|
||||
Evaluation: ${entry.justification}
|
||||
---`
|
||||
)
|
||||
.join('\n')}`
|
||||
: ''
|
||||
}
|
||||
|
||||
Content:
|
||||
${content}
|
||||
|
||||
Criteria:
|
||||
${evaluationCriteria}`
|
||||
|
||||
// If no target blocks, just return the evaluation without routing
|
||||
if (!targetBlocks || targetBlocks.length === 0) {
|
||||
return `${basePrompt}
|
||||
|
||||
IMPORTANT: When there are no target blocks, you must use exactly "end" as the decision value. Do not use any other word.
|
||||
|
||||
Response Format:
|
||||
Return a JSON object with the following structure:
|
||||
{
|
||||
"decision": "end", // You must use exactly "end" here - this is a required system keyword
|
||||
"justification": "Brief explanation of the pure evaluation of the content. DO NOT include any information about the target blocks."
|
||||
Content to evaluate:
|
||||
${content}`
|
||||
}
|
||||
|
||||
Remember:
|
||||
1. Your response must be ONLY the JSON object - no additional text, formatting, or explanation.
|
||||
2. The "decision" field MUST be exactly "end" - this is a required keyword that the system expects.`
|
||||
}
|
||||
|
||||
const targetBlocksInfo = `
|
||||
Available Destinations:
|
||||
${targetBlocks
|
||||
.map(
|
||||
(block) => `
|
||||
ID: ${block.id}
|
||||
Type: ${block.type}
|
||||
Title: ${block.title}
|
||||
Description: ${block.description}`
|
||||
)
|
||||
.join('\n---\n')}
|
||||
|
||||
Routing Rules:
|
||||
${
|
||||
targetBlocks.length === 1
|
||||
? `- Route back to the only available block (${targetBlocks[0].id}) to continue the iteration`
|
||||
: `- Score greater than or equal to 0.85: Choose success path block
|
||||
- Score less than 0.85: Choose failure path block`
|
||||
}`
|
||||
|
||||
return `${basePrompt}${targetBlocksInfo}
|
||||
|
||||
Response Format:
|
||||
Return a JSON object with the following structure:
|
||||
{
|
||||
"decision": "block-id-here",
|
||||
"justification": "Brief explanation of the pure evaluation of the content. DO NOT include any information about the target blocks."
|
||||
}
|
||||
|
||||
Remember: Your response must be ONLY the JSON object - no additional text, formatting, or explanation.
|
||||
If there is only one available destination, return that block's ID in the decision field regardless of the score.`
|
||||
}
|
||||
// Simplified response format generator that matches the agent block schema structure
|
||||
const generateResponseFormat = (metrics: Metric[]) => ({
|
||||
fields: metrics.map((metric) => ({
|
||||
name: metric.name,
|
||||
type: 'number',
|
||||
description: `${metric.description} (Score between ${metric.range.min}-${metric.range.max})`,
|
||||
})),
|
||||
})
|
||||
|
||||
export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
|
||||
type: 'evaluator',
|
||||
@@ -162,11 +89,46 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
|
||||
},
|
||||
workflow: {
|
||||
inputs: {
|
||||
prompt: { type: 'string' as ParamType, required: true },
|
||||
metrics: {
|
||||
type: 'json' as ParamType,
|
||||
required: true,
|
||||
description: 'Array of metrics to evaluate against',
|
||||
schema: {
|
||||
type: 'array',
|
||||
properties: {},
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
name: {
|
||||
type: 'string',
|
||||
description: 'Name of the metric',
|
||||
},
|
||||
description: {
|
||||
type: 'string',
|
||||
description: 'Description of what this metric measures',
|
||||
},
|
||||
range: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
min: {
|
||||
type: 'number',
|
||||
description: 'Minimum possible score',
|
||||
},
|
||||
max: {
|
||||
type: 'number',
|
||||
description: 'Maximum possible score',
|
||||
},
|
||||
},
|
||||
required: ['min', 'max'],
|
||||
},
|
||||
},
|
||||
required: ['name', 'description', 'range'],
|
||||
},
|
||||
},
|
||||
},
|
||||
model: { type: 'string' as ParamType, required: true },
|
||||
apiKey: { type: 'string' as ParamType, required: true },
|
||||
content: { type: 'string' as ParamType, required: true },
|
||||
history: { type: 'json' as ParamType, required: false },
|
||||
},
|
||||
outputs: {
|
||||
response: {
|
||||
@@ -174,17 +136,24 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
|
||||
content: 'string',
|
||||
model: 'string',
|
||||
tokens: 'any',
|
||||
evaluation: 'json',
|
||||
selectedPath: 'json',
|
||||
justification: 'string',
|
||||
history: 'json',
|
||||
},
|
||||
dependsOn: {
|
||||
subBlockId: 'metrics',
|
||||
condition: {
|
||||
whenEmpty: {
|
||||
content: 'string',
|
||||
model: 'string',
|
||||
tokens: 'any',
|
||||
},
|
||||
whenFilled: 'json',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
subBlocks: [
|
||||
{
|
||||
id: 'prompt',
|
||||
title: 'Evaluation Criteria',
|
||||
id: 'metrics',
|
||||
title: 'Evaluation Metrics',
|
||||
type: 'eval-input',
|
||||
layout: 'full',
|
||||
},
|
||||
@@ -218,12 +187,14 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
|
||||
layout: 'full',
|
||||
hidden: true,
|
||||
value: (params: Record<string, any>) => {
|
||||
return generateEvaluatorPrompt(
|
||||
params.prompt || '',
|
||||
params.content || '',
|
||||
undefined,
|
||||
params.history || []
|
||||
)
|
||||
const metrics = params.metrics || []
|
||||
const content = params.content || ''
|
||||
const responseFormat = generateResponseFormat(metrics)
|
||||
|
||||
return JSON.stringify({
|
||||
systemPrompt: generateEvaluatorPrompt(metrics, content),
|
||||
responseFormat,
|
||||
})
|
||||
},
|
||||
},
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user