mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-10 07:27:57 -05:00
fix(evaluator): fix evaluator to handle temperature gracefully like router (#1792)
* fix(evaluator): fix evaluator to handle temperature gracefully like router * apps/sim * fix eval-input subblock
This commit is contained in:
@@ -1,10 +1,15 @@
|
||||
import { useMemo } from 'react'
|
||||
import { useMemo, useRef, useState } from 'react'
|
||||
import { Plus, Trash } from 'lucide-react'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { formatDisplayText } from '@/components/ui/formatted-text'
|
||||
import { Input } from '@/components/ui/input'
|
||||
import { Label } from '@/components/ui/label'
|
||||
import { checkTagTrigger, TagDropdown } from '@/components/ui/tag-dropdown'
|
||||
import { Textarea } from '@/components/ui/textarea'
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { useSubBlockValue } from '@/app/workspace/[workspaceId]/w/[workflowId]/components/workflow-block/components/sub-block/hooks/use-sub-block-value'
|
||||
import { useAccessibleReferencePrefixes } from '@/app/workspace/[workspaceId]/w/[workflowId]/hooks/use-accessible-reference-prefixes'
|
||||
|
||||
interface EvalMetric {
|
||||
id: string
|
||||
@@ -22,6 +27,7 @@ interface EvalInputProps {
|
||||
isPreview?: boolean
|
||||
previewValue?: EvalMetric[] | null
|
||||
disabled?: boolean
|
||||
isConnecting?: boolean
|
||||
}
|
||||
|
||||
// Default values
|
||||
@@ -38,17 +44,24 @@ export function EvalInput({
|
||||
isPreview = false,
|
||||
previewValue,
|
||||
disabled = false,
|
||||
isConnecting = false,
|
||||
}: EvalInputProps) {
|
||||
const [storeValue, setStoreValue] = useSubBlockValue<EvalMetric[]>(blockId, subBlockId)
|
||||
const accessiblePrefixes = useAccessibleReferencePrefixes(blockId)
|
||||
|
||||
const [showTags, setShowTags] = useState(false)
|
||||
const [cursorPosition, setCursorPosition] = useState(0)
|
||||
const [activeMetricId, setActiveMetricId] = useState<string | null>(null)
|
||||
const [activeSourceBlockId, setActiveSourceBlockId] = useState<string | null>(null)
|
||||
const descriptionInputRefs = useRef<Record<string, HTMLTextAreaElement>>({})
|
||||
const descriptionOverlayRefs = useRef<Record<string, HTMLDivElement>>({})
|
||||
const [dragHighlight, setDragHighlight] = useState<Record<string, boolean>>({})
|
||||
|
||||
// Use preview value when in preview mode, otherwise use store value
|
||||
const value = isPreview ? previewValue : storeValue
|
||||
|
||||
// State hooks - memoize default metric to prevent key changes
|
||||
const defaultMetric = useMemo(() => createDefaultMetric(), [])
|
||||
const metrics: EvalMetric[] = value || [defaultMetric]
|
||||
|
||||
// Metric operations
|
||||
const addMetric = () => {
|
||||
if (isPreview || disabled) return
|
||||
|
||||
@@ -61,7 +74,6 @@ export function EvalInput({
|
||||
setStoreValue(metrics.filter((metric) => metric.id !== id))
|
||||
}
|
||||
|
||||
// Update handlers
|
||||
const updateMetric = (id: string, field: keyof EvalMetric, value: any) => {
|
||||
if (isPreview || disabled) return
|
||||
setStoreValue(
|
||||
@@ -86,7 +98,6 @@ export function EvalInput({
|
||||
)
|
||||
}
|
||||
|
||||
// Validation handlers
|
||||
const handleRangeBlur = (id: string, field: 'min' | 'max', value: string) => {
|
||||
const sanitizedValue = value.replace(/[^\d.-]/g, '')
|
||||
const numValue = Number.parseFloat(sanitizedValue)
|
||||
@@ -106,7 +117,97 @@ export function EvalInput({
|
||||
)
|
||||
}
|
||||
|
||||
// Metric header
|
||||
const handleTagSelect = (tag: string) => {
|
||||
if (!activeMetricId) return
|
||||
|
||||
const metric = metrics.find((m) => m.id === activeMetricId)
|
||||
if (!metric) return
|
||||
|
||||
const currentValue = metric.description || ''
|
||||
const textBeforeCursor = currentValue.slice(0, cursorPosition)
|
||||
const lastOpenBracket = textBeforeCursor.lastIndexOf('<')
|
||||
|
||||
const newValue =
|
||||
currentValue.slice(0, lastOpenBracket) + tag + currentValue.slice(cursorPosition)
|
||||
|
||||
updateMetric(activeMetricId, 'description', newValue)
|
||||
setShowTags(false)
|
||||
|
||||
setTimeout(() => {
|
||||
const inputEl = descriptionInputRefs.current[activeMetricId]
|
||||
if (inputEl) {
|
||||
inputEl.focus()
|
||||
const newCursorPos = lastOpenBracket + tag.length
|
||||
inputEl.setSelectionRange(newCursorPos, newCursorPos)
|
||||
}
|
||||
}, 10)
|
||||
}
|
||||
|
||||
const handleDescriptionChange = (metricId: string, newValue: string, selectionStart?: number) => {
|
||||
updateMetric(metricId, 'description', newValue)
|
||||
|
||||
if (selectionStart !== undefined) {
|
||||
setCursorPosition(selectionStart)
|
||||
setActiveMetricId(metricId)
|
||||
|
||||
const shouldShowTags = checkTagTrigger(newValue, selectionStart)
|
||||
setShowTags(shouldShowTags.show)
|
||||
|
||||
if (shouldShowTags.show) {
|
||||
const textBeforeCursor = newValue.slice(0, selectionStart)
|
||||
const lastOpenBracket = textBeforeCursor.lastIndexOf('<')
|
||||
const tagContent = textBeforeCursor.slice(lastOpenBracket + 1)
|
||||
const dotIndex = tagContent.indexOf('.')
|
||||
const sourceBlock = dotIndex > 0 ? tagContent.slice(0, dotIndex) : null
|
||||
setActiveSourceBlockId(sourceBlock)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const handleDrop = (e: React.DragEvent, metricId: string) => {
|
||||
e.preventDefault()
|
||||
setDragHighlight((prev) => ({ ...prev, [metricId]: false }))
|
||||
const input = descriptionInputRefs.current[metricId]
|
||||
input?.focus()
|
||||
|
||||
if (input) {
|
||||
const metric = metrics.find((m) => m.id === metricId)
|
||||
const currentValue = metric?.description || ''
|
||||
const dropPosition = input.selectionStart ?? currentValue.length
|
||||
const newValue = `${currentValue.slice(0, dropPosition)}<${currentValue.slice(dropPosition)}`
|
||||
updateMetric(metricId, 'description', newValue)
|
||||
setActiveMetricId(metricId)
|
||||
setCursorPosition(dropPosition + 1)
|
||||
setShowTags(true)
|
||||
|
||||
try {
|
||||
const data = JSON.parse(e.dataTransfer.getData('application/json'))
|
||||
if (data?.connectionData?.sourceBlockId) {
|
||||
setActiveSourceBlockId(data.connectionData.sourceBlockId)
|
||||
}
|
||||
} catch {}
|
||||
|
||||
setTimeout(() => {
|
||||
const el = descriptionInputRefs.current[metricId]
|
||||
if (el) {
|
||||
el.selectionStart = dropPosition + 1
|
||||
el.selectionEnd = dropPosition + 1
|
||||
}
|
||||
}, 0)
|
||||
}
|
||||
}
|
||||
|
||||
const handleDragOver = (e: React.DragEvent, metricId: string) => {
|
||||
e.preventDefault()
|
||||
e.dataTransfer.dropEffect = 'copy'
|
||||
setDragHighlight((prev) => ({ ...prev, [metricId]: true }))
|
||||
}
|
||||
|
||||
const handleDragLeave = (e: React.DragEvent, metricId: string) => {
|
||||
e.preventDefault()
|
||||
setDragHighlight((prev) => ({ ...prev, [metricId]: false }))
|
||||
}
|
||||
|
||||
const renderMetricHeader = (metric: EvalMetric, index: number) => (
|
||||
<div className='flex h-10 items-center justify-between rounded-t-lg border-b bg-card px-3'>
|
||||
<span className='font-medium text-sm'>Metric {index + 1}</span>
|
||||
@@ -146,7 +247,6 @@ export function EvalInput({
|
||||
</div>
|
||||
)
|
||||
|
||||
// Main render
|
||||
return (
|
||||
<div className='space-y-2'>
|
||||
{metrics.map((metric, index) => (
|
||||
@@ -172,13 +272,67 @@ export function EvalInput({
|
||||
|
||||
<div key={`description-${metric.id}`} className='space-y-1'>
|
||||
<Label>Description</Label>
|
||||
<Input
|
||||
value={metric.description}
|
||||
onChange={(e) => updateMetric(metric.id, 'description', e.target.value)}
|
||||
placeholder='How accurate is the response?'
|
||||
disabled={isPreview || disabled}
|
||||
className='placeholder:text-muted-foreground/50'
|
||||
/>
|
||||
<div className='relative'>
|
||||
<Textarea
|
||||
ref={(el) => {
|
||||
if (el) descriptionInputRefs.current[metric.id] = el
|
||||
}}
|
||||
value={metric.description}
|
||||
onChange={(e) =>
|
||||
handleDescriptionChange(
|
||||
metric.id,
|
||||
e.target.value,
|
||||
e.target.selectionStart ?? undefined
|
||||
)
|
||||
}
|
||||
placeholder='How accurate is the response?'
|
||||
disabled={isPreview || disabled}
|
||||
className={cn(
|
||||
'min-h-[80px] border border-input bg-white text-transparent caret-foreground placeholder:text-muted-foreground/50 dark:border-input/60 dark:bg-background',
|
||||
(isConnecting || dragHighlight[metric.id]) &&
|
||||
'ring-2 ring-blue-500 ring-offset-2'
|
||||
)}
|
||||
style={{
|
||||
fontFamily: 'inherit',
|
||||
lineHeight: 'inherit',
|
||||
wordBreak: 'break-word',
|
||||
whiteSpace: 'pre-wrap',
|
||||
}}
|
||||
rows={3}
|
||||
onDrop={(e) => handleDrop(e, metric.id)}
|
||||
onDragOver={(e) => handleDragOver(e, metric.id)}
|
||||
onDragLeave={(e) => handleDragLeave(e, metric.id)}
|
||||
/>
|
||||
<div
|
||||
ref={(el) => {
|
||||
if (el) descriptionOverlayRefs.current[metric.id] = el
|
||||
}}
|
||||
className='pointer-events-none absolute inset-0 flex items-start overflow-auto bg-transparent px-3 py-2 text-sm'
|
||||
style={{
|
||||
fontFamily: 'inherit',
|
||||
lineHeight: 'inherit',
|
||||
}}
|
||||
>
|
||||
<div className='w-full whitespace-pre-wrap break-words'>
|
||||
{formatDisplayText(metric.description || '', {
|
||||
accessiblePrefixes,
|
||||
highlightAll: !accessiblePrefixes,
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
{showTags && activeMetricId === metric.id && (
|
||||
<TagDropdown
|
||||
visible={showTags}
|
||||
onSelect={handleTagSelect}
|
||||
blockId={blockId}
|
||||
activeSourceBlockId={activeSourceBlockId}
|
||||
inputValue={metric.description || ''}
|
||||
cursorPosition={cursorPosition}
|
||||
onClose={() => setShowTags(false)}
|
||||
className='absolute top-full left-0 z-50 mt-1'
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div key={`range-${metric.id}`} className='grid grid-cols-2 gap-4'>
|
||||
|
||||
@@ -339,33 +339,58 @@ export function VariablesInput({
|
||||
<div className='space-y-1.5'>
|
||||
<Label className='text-xs'>Value</Label>
|
||||
{assignment.type === 'object' || assignment.type === 'array' ? (
|
||||
<Textarea
|
||||
ref={(el) => {
|
||||
if (el) valueInputRefs.current[assignment.id] = el
|
||||
}}
|
||||
value={assignment.value || ''}
|
||||
onChange={(e) =>
|
||||
handleValueInputChange(
|
||||
assignment.id,
|
||||
e.target.value,
|
||||
e.target.selectionStart ?? undefined
|
||||
)
|
||||
}
|
||||
placeholder={
|
||||
assignment.type === 'object'
|
||||
? '{\n "key": "value"\n}'
|
||||
: '[\n 1, 2, 3\n]'
|
||||
}
|
||||
disabled={isPreview || disabled}
|
||||
className={cn(
|
||||
'min-h-[120px] border border-input bg-white font-mono text-sm placeholder:text-muted-foreground/50 dark:border-input/60 dark:bg-background',
|
||||
dragHighlight[assignment.id] && 'ring-2 ring-blue-500 ring-offset-2',
|
||||
isConnecting && 'ring-2 ring-blue-500 ring-offset-2'
|
||||
)}
|
||||
onDrop={(e) => handleDrop(e, assignment.id)}
|
||||
onDragOver={(e) => handleDragOver(e, assignment.id)}
|
||||
onDragLeave={(e) => handleDragLeave(e, assignment.id)}
|
||||
/>
|
||||
<div className='relative'>
|
||||
<Textarea
|
||||
ref={(el) => {
|
||||
if (el) valueInputRefs.current[assignment.id] = el
|
||||
}}
|
||||
value={assignment.value || ''}
|
||||
onChange={(e) =>
|
||||
handleValueInputChange(
|
||||
assignment.id,
|
||||
e.target.value,
|
||||
e.target.selectionStart ?? undefined
|
||||
)
|
||||
}
|
||||
placeholder={
|
||||
assignment.type === 'object'
|
||||
? '{\n "key": "value"\n}'
|
||||
: '[\n 1, 2, 3\n]'
|
||||
}
|
||||
disabled={isPreview || disabled}
|
||||
className={cn(
|
||||
'min-h-[120px] border border-input bg-white font-mono text-sm text-transparent caret-foreground placeholder:text-muted-foreground/50 dark:border-input/60 dark:bg-background',
|
||||
dragHighlight[assignment.id] && 'ring-2 ring-blue-500 ring-offset-2',
|
||||
isConnecting && 'ring-2 ring-blue-500 ring-offset-2'
|
||||
)}
|
||||
style={{
|
||||
fontFamily: 'inherit',
|
||||
lineHeight: 'inherit',
|
||||
wordBreak: 'break-word',
|
||||
whiteSpace: 'pre-wrap',
|
||||
}}
|
||||
onDrop={(e) => handleDrop(e, assignment.id)}
|
||||
onDragOver={(e) => handleDragOver(e, assignment.id)}
|
||||
onDragLeave={(e) => handleDragLeave(e, assignment.id)}
|
||||
/>
|
||||
<div
|
||||
ref={(el) => {
|
||||
if (el) overlayRefs.current[assignment.id] = el
|
||||
}}
|
||||
className='pointer-events-none absolute inset-0 flex items-start overflow-auto bg-transparent px-3 py-2 font-mono text-sm'
|
||||
style={{
|
||||
fontFamily: 'inherit',
|
||||
lineHeight: 'inherit',
|
||||
}}
|
||||
>
|
||||
<div className='w-full whitespace-pre-wrap break-words'>
|
||||
{formatDisplayText(assignment.value || '', {
|
||||
accessiblePrefixes,
|
||||
highlightAll: !accessiblePrefixes,
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className='relative'>
|
||||
<Input
|
||||
|
||||
@@ -297,6 +297,7 @@ export const SubBlock = memo(
|
||||
isPreview={isPreview}
|
||||
previewValue={previewValue}
|
||||
disabled={isDisabled}
|
||||
isConnecting={isConnecting}
|
||||
/>
|
||||
)
|
||||
case 'time-input':
|
||||
|
||||
@@ -174,7 +174,7 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
|
||||
{
|
||||
id: 'content',
|
||||
title: 'Content',
|
||||
type: 'short-input',
|
||||
type: 'long-input',
|
||||
layout: 'full',
|
||||
placeholder: 'Enter the content to evaluate',
|
||||
required: true,
|
||||
@@ -252,7 +252,6 @@ export const EvaluatorBlock: BlockConfig<EvaluatorResponse> = {
|
||||
layout: 'half',
|
||||
min: 0,
|
||||
max: 2,
|
||||
value: () => '0.1',
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
|
||||
@@ -21,8 +21,11 @@ export class EvaluatorBlockHandler implements BlockHandler {
|
||||
inputs: Record<string, any>,
|
||||
context: ExecutionContext
|
||||
): Promise<BlockOutput> {
|
||||
const model = inputs.model || 'gpt-4o'
|
||||
const providerId = getProviderFromModel(model)
|
||||
const evaluatorConfig = {
|
||||
model: inputs.model || 'gpt-4o',
|
||||
apiKey: inputs.apiKey,
|
||||
}
|
||||
const providerId = getProviderFromModel(evaluatorConfig.model)
|
||||
|
||||
// Process the content to ensure it's in a suitable format
|
||||
let processedContent = ''
|
||||
@@ -109,7 +112,7 @@ export class EvaluatorBlockHandler implements BlockHandler {
|
||||
// Make sure we force JSON output in the request
|
||||
const providerRequest = {
|
||||
provider: providerId,
|
||||
model: model,
|
||||
model: evaluatorConfig.model,
|
||||
systemPrompt: systemPromptObj.systemPrompt,
|
||||
responseFormat: systemPromptObj.responseFormat,
|
||||
context: JSON.stringify([
|
||||
@@ -119,8 +122,8 @@ export class EvaluatorBlockHandler implements BlockHandler {
|
||||
'Please evaluate the content provided in the system prompt. Return ONLY a valid JSON with metric scores.',
|
||||
},
|
||||
]),
|
||||
temperature: inputs.temperature || 0,
|
||||
apiKey: inputs.apiKey,
|
||||
temperature: 0.1,
|
||||
apiKey: evaluatorConfig.apiKey,
|
||||
workflowId: context.workflowId,
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,6 @@ export class RouterBlockHandler implements BlockHandler {
|
||||
prompt: inputs.prompt,
|
||||
model: inputs.model || 'gpt-4o',
|
||||
apiKey: inputs.apiKey,
|
||||
temperature: inputs.temperature || 0,
|
||||
}
|
||||
|
||||
const providerId = getProviderFromModel(routerConfig.model)
|
||||
|
||||
Reference in New Issue
Block a user