fix(evaluator): fix evaluator to handle temperature gracefully like router (#1792)

* fix(evaluator): fix evaluator to handle temperature gracefully like router

* apps/sim

* fix eval-input subblock
This commit is contained in:
Waleed
2025-11-01 18:52:18 -07:00
committed by GitHub
parent f9980868a4
commit 7d67ae397d
6 changed files with 231 additions and 50 deletions

View File

@@ -1,10 +1,15 @@
import { useMemo } from 'react'
import { useMemo, useRef, useState } from 'react'
import { Plus, Trash } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { formatDisplayText } from '@/components/ui/formatted-text'
import { Input } from '@/components/ui/input'
import { Label } from '@/components/ui/label'
import { checkTagTrigger, TagDropdown } from '@/components/ui/tag-dropdown'
import { Textarea } from '@/components/ui/textarea'
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'
import { cn } from '@/lib/utils'
import { useSubBlockValue } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/components/sub-block/hooks/use-sub-block-value'
import { useAccessibleReferencePrefixes } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-accessible-reference-prefixes'
interface EvalMetric {
id: string
@@ -22,6 +27,7 @@ interface EvalInputProps {
isPreview?: boolean
previewValue?: EvalMetric[] | null
disabled?: boolean
isConnecting?: boolean
}
// Default values
@@ -38,17 +44,24 @@ export function EvalInput({
isPreview = false,
previewValue,
disabled = false,
isConnecting = false,
}: EvalInputProps) {
const [storeValue, setStoreValue] = useSubBlockValue<EvalMetric[]>(blockId, subBlockId)
const accessiblePrefixes = useAccessibleReferencePrefixes(blockId)
const [showTags, setShowTags] = useState(false)
const [cursorPosition, setCursorPosition] = useState(0)
const [activeMetricId, setActiveMetricId] = useState<string | null>(null)
const [activeSourceBlockId, setActiveSourceBlockId] = useState<string | null>(null)
const descriptionInputRefs = useRef<Record<string, HTMLTextAreaElement>>({})
const descriptionOverlayRefs = useRef<Record<string, HTMLDivElement>>({})
const [dragHighlight, setDragHighlight] = useState<Record<string, boolean>>({})
// Use preview value when in preview mode, otherwise use store value
const value = isPreview ? previewValue : storeValue
// State hooks - memoize default metric to prevent key changes
const defaultMetric = useMemo(() => createDefaultMetric(), [])
const metrics: EvalMetric[] = value || [defaultMetric]
// Metric operations
const addMetric = () => {
if (isPreview || disabled) return
@@ -61,7 +74,6 @@ export function EvalInput({
setStoreValue(metrics.filter((metric) => metric.id !== id))
}
// Update handlers
const updateMetric = (id: string, field: keyof EvalMetric, value: any) => {
if (isPreview || disabled) return
setStoreValue(
@@ -86,7 +98,6 @@ export function EvalInput({
)
}
// Validation handlers
const handleRangeBlur = (id: string, field: 'min' | 'max', value: string) => {
const sanitizedValue = value.replace(/[^\d.-]/g, '')
const numValue = Number.parseFloat(sanitizedValue)
@@ -106,7 +117,97 @@ export function EvalInput({
)
}
// Metric header
const handleTagSelect = (tag: string) => {
if (!activeMetricId) return
const metric = metrics.find((m) => m.id === activeMetricId)
if (!metric) return
const currentValue = metric.description || ''
const textBeforeCursor = currentValue.slice(0, cursorPosition)
const lastOpenBracket = textBeforeCursor.lastIndexOf('<')
const newValue =
currentValue.slice(0, lastOpenBracket) + tag + currentValue.slice(cursorPosition)
updateMetric(activeMetricId, 'description', newValue)
setShowTags(false)
setTimeout(() => {
const inputEl = descriptionInputRefs.current[activeMetricId]
if (inputEl) {
inputEl.focus()
const newCursorPos = lastOpenBracket + tag.length
inputEl.setSelectionRange(newCursorPos, newCursorPos)
}
}, 10)
}
const handleDescriptionChange = (metricId: string, newValue: string, selectionStart?: number) => {
updateMetric(metricId, 'description', newValue)
if (selectionStart !== undefined) {
setCursorPosition(selectionStart)
setActiveMetricId(metricId)
const shouldShowTags = checkTagTrigger(newValue, selectionStart)
setShowTags(shouldShowTags.show)
if (shouldShowTags.show) {
const textBeforeCursor = newValue.slice(0, selectionStart)
const lastOpenBracket = textBeforeCursor.lastIndexOf('<')
const tagContent = textBeforeCursor.slice(lastOpenBracket + 1)
const dotIndex = tagContent.indexOf('.')
const sourceBlock = dotIndex > 0 ? tagContent.slice(0, dotIndex) : null
setActiveSourceBlockId(sourceBlock)
}
}
}
const handleDrop = (e: React.DragEvent, metricId: string) => {
e.preventDefault()
setDragHighlight((prev) => ({ ...prev, [metricId]: false }))
const input = descriptionInputRefs.current[metricId]
input?.focus()
if (input) {
const metric = metrics.find((m) => m.id === metricId)
const currentValue = metric?.description || ''
const dropPosition = input.selectionStart ?? currentValue.length
const newValue = `${currentValue.slice(0, dropPosition)}<${currentValue.slice(dropPosition)}`
updateMetric(metricId, 'description', newValue)
setActiveMetricId(metricId)
setCursorPosition(dropPosition + 1)
setShowTags(true)
try {
const data = JSON.parse(e.dataTransfer.getData('application/json'))
if (data?.connectionData?.sourceBlockId) {
setActiveSourceBlockId(data.connectionData.sourceBlockId)
}
} catch {}
setTimeout(() => {
const el = descriptionInputRefs.current[metricId]
if (el) {
el.selectionStart = dropPosition + 1
el.selectionEnd = dropPosition + 1
}
}, 0)
}
}
const handleDragOver = (e: React.DragEvent, metricId: string) => {
e.preventDefault()
e.dataTransfer.dropEffect = 'copy'
setDragHighlight((prev) => ({ ...prev, [metricId]: true }))
}
const handleDragLeave = (e: React.DragEvent, metricId: string) => {
e.preventDefault()
setDragHighlight((prev) => ({ ...prev, [metricId]: false }))
}
const renderMetricHeader = (metric: EvalMetric, index: number) => (
<div className='flex h-10 items-center justify-between rounded-t-lg border-b bg-card px-3'>
<span className='font-medium text-sm'>Metric {index + 1}</span>
@@ -146,7 +247,6 @@ export function EvalInput({
</div>
)
// Main render
return (
<div className='space-y-2'>
{metrics.map((metric, index) => (
@@ -172,13 +272,67 @@ export function EvalInput({
<div key={`description-${metric.id}`} className='space-y-1'>
<Label>Description</Label>
<Input
value={metric.description}
onChange={(e) => updateMetric(metric.id, 'description', e.target.value)}
placeholder='How accurate is the response?'
disabled={isPreview || disabled}
className='placeholder:text-muted-foreground/50'
/>
<div className='relative'>
<Textarea
ref={(el) => {
if (el) descriptionInputRefs.current[metric.id] = el
}}
value={metric.description}
onChange={(e) =>
handleDescriptionChange(
metric.id,
e.target.value,
e.target.selectionStart ?? undefined
)
}
placeholder='How accurate is the response?'
disabled={isPreview || disabled}
className={cn(
'min-h-[80px] border border-input bg-white text-transparent caret-foreground placeholder:text-muted-foreground/50 dark:border-input/60 dark:bg-background',
(isConnecting || dragHighlight[metric.id]) &&
'ring-2 ring-blue-500 ring-offset-2'
)}
style={{
fontFamily: 'inherit',
lineHeight: 'inherit',
wordBreak: 'break-word',
whiteSpace: 'pre-wrap',
}}
rows={3}
onDrop={(e) => handleDrop(e, metric.id)}
onDragOver={(e) => handleDragOver(e, metric.id)}
onDragLeave={(e) => handleDragLeave(e, metric.id)}
/>
<div
ref={(el) => {
if (el) descriptionOverlayRefs.current[metric.id] = el
}}
className='pointer-events-none absolute inset-0 flex items-start overflow-auto bg-transparent px-3 py-2 text-sm'
style={{
fontFamily: 'inherit',
lineHeight: 'inherit',
}}
>
<div className='w-full whitespace-pre-wrap break-words'>
{formatDisplayText(metric.description || '', {
accessiblePrefixes,
highlightAll: !accessiblePrefixes,
})}
</div>
</div>
{showTags && activeMetricId === metric.id && (
<TagDropdown
visible={showTags}
onSelect={handleTagSelect}
blockId={blockId}
activeSourceBlockId={activeSourceBlockId}
inputValue={metric.description || ''}
cursorPosition={cursorPosition}
onClose={() => setShowTags(false)}
className='absolute top-full left-0 z-50 mt-1'
/>
)}
</div>
</div>
<div key={`range-${metric.id}`} className='grid grid-cols-2 gap-4'>

View File

@@ -339,33 +339,58 @@ export function VariablesInput({
<div className='space-y-1.5'>
<Label className='text-xs'>Value</Label>
{assignment.type === 'object' || assignment.type === 'array' ? (
<Textarea
ref={(el) => {
if (el) valueInputRefs.current[assignment.id] = el
}}
value={assignment.value || ''}
onChange={(e) =>
handleValueInputChange(
assignment.id,
e.target.value,
e.target.selectionStart ?? undefined
)
}
placeholder={
assignment.type === 'object'
? '{\n "key": "value"\n}'
: '[\n 1, 2, 3\n]'
}
disabled={isPreview || disabled}
className={cn(
'min-h-[120px] border border-input bg-white font-mono text-sm placeholder:text-muted-foreground/50 dark:border-input/60 dark:bg-background',
dragHighlight[assignment.id] && 'ring-2 ring-blue-500 ring-offset-2',
isConnecting && 'ring-2 ring-blue-500 ring-offset-2'
)}
onDrop={(e) => handleDrop(e, assignment.id)}
onDragOver={(e) => handleDragOver(e, assignment.id)}
onDragLeave={(e) => handleDragLeave(e, assignment.id)}
/>
<div className='relative'>
<Textarea
ref={(el) => {
if (el) valueInputRefs.current[assignment.id] = el
}}
value={assignment.value || ''}
onChange={(e) =>
handleValueInputChange(
assignment.id,
e.target.value,
e.target.selectionStart ?? undefined
)
}
placeholder={
assignment.type === 'object'
? '{\n "key": "value"\n}'
: '[\n 1, 2, 3\n]'
}
disabled={isPreview || disabled}
className={cn(
'min-h-[120px] border border-input bg-white font-mono text-sm text-transparent caret-foreground placeholder:text-muted-foreground/50 dark:border-input/60 dark:bg-background',
dragHighlight[assignment.id] && 'ring-2 ring-blue-500 ring-offset-2',
isConnecting && 'ring-2 ring-blue-500 ring-offset-2'
)}
style={{
fontFamily: 'inherit',
lineHeight: 'inherit',
wordBreak: 'break-word',
whiteSpace: 'pre-wrap',
}}
onDrop={(e) => handleDrop(e, assignment.id)}
onDragOver={(e) => handleDragOver(e, assignment.id)}
onDragLeave={(e) => handleDragLeave(e, assignment.id)}
/>
<div
ref={(el) => {
if (el) overlayRefs.current[assignment.id] = el
}}
className='pointer-events-none absolute inset-0 flex items-start overflow-auto bg-transparent px-3 py-2 font-mono text-sm'
style={{
fontFamily: 'inherit',
lineHeight: 'inherit',
}}
>
<div className='w-full whitespace-pre-wrap break-words'>
{formatDisplayText(assignment.value || '', {
accessiblePrefixes,
highlightAll: !accessiblePrefixes,
})}
</div>
</div>
</div>
) : (
<div className='relative'>
<Input

View File

@@ -297,6 +297,7 @@ export const SubBlock = memo(
isPreview={isPreview}
previewValue={previewValue}
disabled={isDisabled}
isConnecting={isConnecting}
/>
)
case 'time-input':

View File

@@ -174,7 +174,7 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
{
id: 'content',
title: 'Content',
type: 'short-input',
type: 'long-input',
layout: 'full',
placeholder: 'Enter the content to evaluate',
required: true,
@@ -252,7 +252,6 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
layout: 'half',
min: 0,
max: 2,
value: () => '0.1',
hidden: true,
},
{

View File

@@ -21,8 +21,11 @@ export class EvaluatorBlockHandler implements BlockHandler {
inputs: Record<string, any>,
context: ExecutionContext
): Promise<BlockOutput> {
const model = inputs.model || 'gpt-4o'
const providerId = getProviderFromModel(model)
const evaluatorConfig = {
model: inputs.model || 'gpt-4o',
apiKey: inputs.apiKey,
}
const providerId = getProviderFromModel(evaluatorConfig.model)
// Process the content to ensure it's in a suitable format
let processedContent = ''
@@ -109,7 +112,7 @@ export class EvaluatorBlockHandler implements BlockHandler {
// Make sure we force JSON output in the request
const providerRequest = {
provider: providerId,
model: model,
model: evaluatorConfig.model,
systemPrompt: systemPromptObj.systemPrompt,
responseFormat: systemPromptObj.responseFormat,
context: JSON.stringify([
@@ -119,8 +122,8 @@ export class EvaluatorBlockHandler implements BlockHandler {
'Please evaluate the content provided in the system prompt. Return ONLY a valid JSON with metric scores.',
},
]),
temperature: inputs.temperature || 0,
apiKey: inputs.apiKey,
temperature: 0.1,
apiKey: evaluatorConfig.apiKey,
workflowId: context.workflowId,
}

View File

@@ -34,7 +34,6 @@ export class RouterBlockHandler implements BlockHandler {
prompt: inputs.prompt,
model: inputs.model || 'gpt-4o',
apiKey: inputs.apiKey,
temperature: inputs.temperature || 0,
}
const providerId = getProviderFromModel(routerConfig.model)