mirror of
https://github.com/simstudioai/sim.git
synced 2026-01-07 22:24:06 -05:00
improvement(usage): update usage limit in realtime, standardize token output object across providers (#2553)
* improvement(usage-limit): update usage in real time, fix token output object * updated tokenBreakdown to tokens, standardized input/output/total token object type across providers * update remaining references * ack PR comment * remove singleton query client instance from hooks, leave only in zustand
This commit is contained in:
@@ -20,7 +20,7 @@ interface NavProps {
|
||||
}
|
||||
|
||||
export default function Nav({ hideAuthButtons = false, variant = 'landing' }: NavProps = {}) {
|
||||
const [githubStars, setGithubStars] = useState('18.6k')
|
||||
const [githubStars, setGithubStars] = useState('24k')
|
||||
const [isHovered, setIsHovered] = useState(false)
|
||||
const [isLoginHovered, setIsLoginHovered] = useState(false)
|
||||
const router = useRouter()
|
||||
|
||||
@@ -1,26 +1,42 @@
|
||||
'use client'
|
||||
|
||||
import { type ReactNode, useState } from 'react'
|
||||
import type { ReactNode } from 'react'
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
|
||||
|
||||
export function QueryProvider({ children }: { children: ReactNode }) {
|
||||
const [queryClient] = useState(
|
||||
() =>
|
||||
new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
staleTime: 30 * 1000,
|
||||
gcTime: 5 * 60 * 1000,
|
||||
refetchOnWindowFocus: false,
|
||||
retry: 1,
|
||||
retryOnMount: false,
|
||||
},
|
||||
mutations: {
|
||||
retry: 1,
|
||||
},
|
||||
},
|
||||
})
|
||||
)
|
||||
/**
|
||||
* Singleton QueryClient instance for client-side use.
|
||||
* Can be imported directly for cache operations outside React components.
|
||||
*/
|
||||
function makeQueryClient() {
|
||||
return new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
staleTime: 30 * 1000,
|
||||
gcTime: 5 * 60 * 1000,
|
||||
refetchOnWindowFocus: false,
|
||||
retry: 1,
|
||||
retryOnMount: false,
|
||||
},
|
||||
mutations: {
|
||||
retry: 1,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
let browserQueryClient: QueryClient | undefined
|
||||
|
||||
export function getQueryClient() {
|
||||
if (typeof window === 'undefined') {
|
||||
return makeQueryClient()
|
||||
}
|
||||
if (!browserQueryClient) {
|
||||
browserQueryClient = makeQueryClient()
|
||||
}
|
||||
return browserQueryClient
|
||||
}
|
||||
|
||||
export function QueryProvider({ children }: { children: ReactNode }) {
|
||||
const queryClient = getQueryClient()
|
||||
return <QueryClientProvider client={queryClient}>{children}</QueryClientProvider>
|
||||
}
|
||||
|
||||
@@ -259,15 +259,16 @@ export async function GET(request: NextRequest) {
|
||||
input: 0,
|
||||
output: 0,
|
||||
total: 0,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
})
|
||||
}
|
||||
const modelCost = models.get(block.cost.model)
|
||||
modelCost.input += Number(block.cost.input) || 0
|
||||
modelCost.output += Number(block.cost.output) || 0
|
||||
modelCost.total += Number(block.cost.total) || 0
|
||||
modelCost.tokens.prompt += block.cost.tokens?.prompt || 0
|
||||
modelCost.tokens.completion += block.cost.tokens?.completion || 0
|
||||
modelCost.tokens.input += block.cost.tokens?.input || block.cost.tokens?.prompt || 0
|
||||
modelCost.tokens.output +=
|
||||
block.cost.tokens?.output || block.cost.tokens?.completion || 0
|
||||
modelCost.tokens.total += block.cost.tokens?.total || 0
|
||||
}
|
||||
}
|
||||
@@ -279,8 +280,8 @@ export async function GET(request: NextRequest) {
|
||||
output: totalOutputCost,
|
||||
tokens: {
|
||||
total: totalTokens,
|
||||
prompt: totalPromptTokens,
|
||||
completion: totalCompletionTokens,
|
||||
input: totalPromptTokens,
|
||||
output: totalCompletionTokens,
|
||||
},
|
||||
models: Object.fromEntries(models),
|
||||
}
|
||||
|
||||
@@ -165,8 +165,8 @@ export async function POST(request: NextRequest) {
|
||||
: '',
|
||||
model: executionData.output?.model,
|
||||
tokens: executionData.output?.tokens || {
|
||||
prompt: 0,
|
||||
completion: 0,
|
||||
input: 0,
|
||||
output: 0,
|
||||
total: 0,
|
||||
},
|
||||
// Sanitize any potential Unicode characters in tool calls
|
||||
|
||||
@@ -87,8 +87,8 @@ export async function POST(request: NextRequest) {
|
||||
output: 0,
|
||||
total: SEARCH_TOOL_COST,
|
||||
tokens: {
|
||||
prompt: 0,
|
||||
completion: 0,
|
||||
input: 0,
|
||||
output: 0,
|
||||
total: 0,
|
||||
},
|
||||
model: 'search-exa',
|
||||
|
||||
@@ -53,7 +53,7 @@ function buildTestPayload(subscription: typeof workspaceNotificationSubscription
|
||||
totalDurationMs: 5000,
|
||||
cost: {
|
||||
total: 0.00123,
|
||||
tokens: { prompt: 100, completion: 50, total: 150 },
|
||||
tokens: { input: 100, output: 50, total: 150 },
|
||||
},
|
||||
},
|
||||
links: {
|
||||
|
||||
@@ -117,7 +117,7 @@ export default function ChatClient({ identifier }: { identifier: string }) {
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const messagesEndRef = useRef<HTMLDivElement>(null)
|
||||
const messagesContainerRef = useRef<HTMLDivElement>(null)
|
||||
const [starCount, setStarCount] = useState('19.4k')
|
||||
const [starCount, setStarCount] = useState('24k')
|
||||
const [conversationId, setConversationId] = useState('')
|
||||
|
||||
const [showScrollButton, setShowScrollButton] = useState(false)
|
||||
|
||||
@@ -131,8 +131,8 @@ function formatExecutionData(executionData: any) {
|
||||
: null,
|
||||
tokens: tokens
|
||||
? {
|
||||
prompt: tokens.prompt || 0,
|
||||
completion: tokens.completion || 0,
|
||||
input: tokens.input || tokens.prompt || 0,
|
||||
output: tokens.output || tokens.completion || 0,
|
||||
total: tokens.total || 0,
|
||||
}
|
||||
: null,
|
||||
@@ -347,12 +347,12 @@ function PinnedLogs({
|
||||
</h4>
|
||||
<div className='space-y-[4px] rounded-[4px] border border-[var(--border)] bg-[var(--surface-3)] p-[12px] text-[13px]'>
|
||||
<div className='flex justify-between text-[var(--text-primary)]'>
|
||||
<span>Prompt:</span>
|
||||
<span>{formatted.tokens.prompt}</span>
|
||||
<span>Input:</span>
|
||||
<span>{formatted.tokens.input}</span>
|
||||
</div>
|
||||
<div className='flex justify-between text-[var(--text-primary)]'>
|
||||
<span>Completion:</span>
|
||||
<span>{formatted.tokens.completion}</span>
|
||||
<span>Output:</span>
|
||||
<span>{formatted.tokens.output}</span>
|
||||
</div>
|
||||
<div className='flex justify-between border-[var(--border)] border-t pt-[4px] font-medium text-[var(--text-primary)]'>
|
||||
<span>Total:</span>
|
||||
@@ -498,8 +498,8 @@ export function FrozenCanvas({
|
||||
total: null,
|
||||
},
|
||||
tokens: span.tokens || {
|
||||
prompt: null,
|
||||
completion: null,
|
||||
input: null,
|
||||
output: null,
|
||||
total: null,
|
||||
},
|
||||
modelUsed: span.model || null,
|
||||
|
||||
@@ -344,8 +344,8 @@ export const LogDetails = memo(function LogDetails({
|
||||
Tokens:
|
||||
</span>
|
||||
<span className='font-medium text-[12px] text-[var(--text-secondary)]'>
|
||||
{log.cost?.tokens?.prompt || 0} in / {log.cost?.tokens?.completion || 0}{' '}
|
||||
out
|
||||
{log.cost?.tokens?.input || log.cost?.tokens?.prompt || 0} in /{' '}
|
||||
{log.cost?.tokens?.output || log.cost?.tokens?.completion || 0} out
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -116,7 +116,7 @@ export function CredentialSelector({
|
||||
setStoreValue('')
|
||||
}, [invalidSelection, selectedId, effectiveProviderId, setStoreValue])
|
||||
|
||||
useCredentialRefreshTriggers(refetchCredentials, effectiveProviderId, provider)
|
||||
useCredentialRefreshTriggers(refetchCredentials)
|
||||
|
||||
const handleOpenChange = useCallback(
|
||||
(isOpen: boolean) => {
|
||||
@@ -268,11 +268,7 @@ export function CredentialSelector({
|
||||
)
|
||||
}
|
||||
|
||||
function useCredentialRefreshTriggers(
|
||||
refetchCredentials: () => Promise<unknown>,
|
||||
effectiveProviderId?: string,
|
||||
provider?: OAuthProvider
|
||||
) {
|
||||
function useCredentialRefreshTriggers(refetchCredentials: () => Promise<unknown>) {
|
||||
useEffect(() => {
|
||||
const refresh = () => {
|
||||
void refetchCredentials()
|
||||
@@ -290,26 +286,12 @@ function useCredentialRefreshTriggers(
|
||||
}
|
||||
}
|
||||
|
||||
const handleCredentialDisconnected = (event: Event) => {
|
||||
const customEvent = event as CustomEvent<{ providerId?: string }>
|
||||
const providerId = customEvent.detail?.providerId
|
||||
|
||||
if (
|
||||
providerId &&
|
||||
(providerId === effectiveProviderId || (provider && providerId.startsWith(provider)))
|
||||
) {
|
||||
refresh()
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('visibilitychange', handleVisibilityChange)
|
||||
window.addEventListener('pageshow', handlePageShow)
|
||||
window.addEventListener('credential-disconnected', handleCredentialDisconnected)
|
||||
|
||||
return () => {
|
||||
document.removeEventListener('visibilitychange', handleVisibilityChange)
|
||||
window.removeEventListener('pageshow', handlePageShow)
|
||||
window.removeEventListener('credential-disconnected', handleCredentialDisconnected)
|
||||
}
|
||||
}, [refetchCredentials, effectiveProviderId, provider])
|
||||
}, [refetchCredentials])
|
||||
}
|
||||
|
||||
@@ -117,7 +117,7 @@ export function ToolCredentialSelector({
|
||||
onChange('')
|
||||
}, [invalidSelection, onChange])
|
||||
|
||||
useCredentialRefreshTriggers(refetchCredentials, effectiveProviderId, provider)
|
||||
useCredentialRefreshTriggers(refetchCredentials)
|
||||
|
||||
const handleOpenChange = useCallback(
|
||||
(isOpen: boolean) => {
|
||||
@@ -238,11 +238,7 @@ export function ToolCredentialSelector({
|
||||
)
|
||||
}
|
||||
|
||||
function useCredentialRefreshTriggers(
|
||||
refetchCredentials: () => Promise<unknown>,
|
||||
effectiveProviderId?: string,
|
||||
provider?: OAuthProvider
|
||||
) {
|
||||
function useCredentialRefreshTriggers(refetchCredentials: () => Promise<unknown>) {
|
||||
useEffect(() => {
|
||||
const refresh = () => {
|
||||
void refetchCredentials()
|
||||
@@ -260,26 +256,12 @@ function useCredentialRefreshTriggers(
|
||||
}
|
||||
}
|
||||
|
||||
const handleCredentialDisconnected = (event: Event) => {
|
||||
const customEvent = event as CustomEvent<{ providerId?: string }>
|
||||
const providerId = customEvent.detail?.providerId
|
||||
|
||||
if (
|
||||
providerId &&
|
||||
(providerId === effectiveProviderId || (provider && providerId.startsWith(provider)))
|
||||
) {
|
||||
refresh()
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('visibilitychange', handleVisibilityChange)
|
||||
window.addEventListener('pageshow', handlePageShow)
|
||||
window.addEventListener('credential-disconnected', handleCredentialDisconnected)
|
||||
|
||||
return () => {
|
||||
document.removeEventListener('visibilitychange', handleVisibilityChange)
|
||||
window.removeEventListener('pageshow', handlePageShow)
|
||||
window.removeEventListener('credential-disconnected', handleCredentialDisconnected)
|
||||
}
|
||||
}, [refetchCredentials, effectiveProviderId, provider])
|
||||
}, [refetchCredentials])
|
||||
}
|
||||
|
||||
@@ -143,22 +143,10 @@ export function useScheduleInfo(
|
||||
setIsLoading(false)
|
||||
}
|
||||
|
||||
const handleScheduleUpdate = (event: CustomEvent) => {
|
||||
if (event.detail?.workflowId === workflowId && event.detail?.blockId === blockId) {
|
||||
logger.debug('Schedule update event received, refetching schedule info')
|
||||
if (blockType === 'schedule') {
|
||||
fetchScheduleInfo(workflowId)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
window.addEventListener('schedule-updated', handleScheduleUpdate as EventListener)
|
||||
|
||||
return () => {
|
||||
setIsLoading(false)
|
||||
window.removeEventListener('schedule-updated', handleScheduleUpdate as EventListener)
|
||||
}
|
||||
}, [blockType, workflowId, blockId, fetchScheduleInfo])
|
||||
}, [blockType, workflowId, fetchScheduleInfo])
|
||||
|
||||
return {
|
||||
scheduleInfo,
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { useCallback, useRef, useState } from 'react'
|
||||
import { useQueryClient } from '@tanstack/react-query'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import type { GenerationType } from '@/blocks/types'
|
||||
import { subscriptionKeys } from '@/hooks/queries/subscription'
|
||||
|
||||
const logger = createLogger('useWand')
|
||||
|
||||
@@ -17,12 +19,10 @@ function buildContextInfo(currentValue?: string, generationType?: string): strin
|
||||
|
||||
let contextInfo = `Current content (${contentLength} characters, ${lineCount} lines):\n${currentValue}`
|
||||
|
||||
// Add type-specific context analysis
|
||||
if (generationType) {
|
||||
switch (generationType) {
|
||||
case 'javascript-function-body':
|
||||
case 'typescript-function-body': {
|
||||
// Analyze code structure
|
||||
const hasFunction = /function\s+\w+/.test(currentValue)
|
||||
const hasArrowFunction = /=>\s*{/.test(currentValue)
|
||||
const hasReturn = /return\s+/.test(currentValue)
|
||||
@@ -32,7 +32,6 @@ function buildContextInfo(currentValue?: string, generationType?: string): strin
|
||||
|
||||
case 'json-schema':
|
||||
case 'json-object':
|
||||
// Analyze JSON structure
|
||||
try {
|
||||
const parsed = JSON.parse(currentValue)
|
||||
const keys = Object.keys(parsed)
|
||||
@@ -77,13 +76,13 @@ export function useWand({
|
||||
onStreamStart,
|
||||
onGenerationComplete,
|
||||
}: UseWandProps) {
|
||||
const queryClient = useQueryClient()
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
const [isPromptVisible, setIsPromptVisible] = useState(false)
|
||||
const [promptInputValue, setPromptInputValue] = useState('')
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [isStreaming, setIsStreaming] = useState(false)
|
||||
|
||||
// Conversation history state
|
||||
const [conversationHistory, setConversationHistory] = useState<ChatMessage[]>([])
|
||||
|
||||
const abortControllerRef = useRef<AbortController | null>(null)
|
||||
@@ -143,25 +142,20 @@ export function useWand({
|
||||
|
||||
abortControllerRef.current = new AbortController()
|
||||
|
||||
// Signal the start of streaming to clear previous content
|
||||
if (onStreamStart) {
|
||||
onStreamStart()
|
||||
}
|
||||
|
||||
try {
|
||||
// Build context-aware message
|
||||
const contextInfo = buildContextInfo(currentValue, wandConfig?.generationType)
|
||||
|
||||
// Build the system prompt with context information
|
||||
let systemPrompt = wandConfig?.prompt || ''
|
||||
if (systemPrompt.includes('{context}')) {
|
||||
systemPrompt = systemPrompt.replace('{context}', contextInfo)
|
||||
}
|
||||
|
||||
// User message is just the user's specific request
|
||||
const userMessage = prompt
|
||||
|
||||
// Keep track of the current prompt for history
|
||||
const currentPrompt = prompt
|
||||
|
||||
const response = await fetch('/api/wand', {
|
||||
@@ -172,9 +166,9 @@ export function useWand({
|
||||
},
|
||||
body: JSON.stringify({
|
||||
prompt: userMessage,
|
||||
systemPrompt: systemPrompt, // Send the processed system prompt with context
|
||||
systemPrompt: systemPrompt,
|
||||
stream: true,
|
||||
history: wandConfig?.maintainHistory ? conversationHistory : [], // Include history if enabled
|
||||
history: wandConfig?.maintainHistory ? conversationHistory : [],
|
||||
}),
|
||||
signal: abortControllerRef.current.signal,
|
||||
cache: 'no-store',
|
||||
@@ -256,6 +250,10 @@ export function useWand({
|
||||
prompt,
|
||||
contentLength: accumulatedContent.length,
|
||||
})
|
||||
|
||||
setTimeout(() => {
|
||||
queryClient.invalidateQueries({ queryKey: subscriptionKeys.user() })
|
||||
}, 1000)
|
||||
} catch (error: any) {
|
||||
if (error.name === 'AbortError') {
|
||||
logger.debug('Wand generation cancelled')
|
||||
@@ -276,6 +274,7 @@ export function useWand({
|
||||
onStreamChunk,
|
||||
onStreamStart,
|
||||
onGenerationComplete,
|
||||
queryClient,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -88,9 +88,9 @@ function extractExecutionResult(error: unknown): ExecutionResult | null {
|
||||
}
|
||||
|
||||
export function useWorkflowExecution() {
|
||||
const queryClient = useQueryClient()
|
||||
const currentWorkflow = useCurrentWorkflow()
|
||||
const { activeWorkflowId, workflows } = useWorkflowRegistry()
|
||||
const queryClient = useQueryClient()
|
||||
const { toggleConsole, addConsole } = useTerminalConsoleStore()
|
||||
const { getAllVariables } = useEnvironmentStore()
|
||||
const { getVariablesByWorkflowId, variables } = useVariablesStore()
|
||||
@@ -563,9 +563,10 @@ export function useWorkflowExecution() {
|
||||
logger.info(`Processed ${processedCount} blocks for streaming tokenization`)
|
||||
}
|
||||
|
||||
// Invalidate subscription query to update usage
|
||||
queryClient.invalidateQueries({ queryKey: subscriptionKeys.user() })
|
||||
queryClient.invalidateQueries({ queryKey: subscriptionKeys.usage() })
|
||||
// Invalidate subscription queries to update usage
|
||||
setTimeout(() => {
|
||||
queryClient.invalidateQueries({ queryKey: subscriptionKeys.user() })
|
||||
}, 1000)
|
||||
|
||||
const { encodeSSE } = await import('@/lib/core/utils/sse')
|
||||
controller.enqueue(encodeSSE({ event: 'final', data: result }))
|
||||
@@ -630,9 +631,10 @@ export function useWorkflowExecution() {
|
||||
;(result.metadata as any).source = 'chat'
|
||||
}
|
||||
|
||||
// Invalidate subscription query to update usage
|
||||
queryClient.invalidateQueries({ queryKey: subscriptionKeys.user() })
|
||||
queryClient.invalidateQueries({ queryKey: subscriptionKeys.usage() })
|
||||
// Invalidate subscription queries to update usage
|
||||
setTimeout(() => {
|
||||
queryClient.invalidateQueries({ queryKey: subscriptionKeys.user() })
|
||||
}, 1000)
|
||||
}
|
||||
return result
|
||||
} catch (error: any) {
|
||||
@@ -654,6 +656,7 @@ export function useWorkflowExecution() {
|
||||
setPendingBlocks,
|
||||
setActiveBlocks,
|
||||
workflows,
|
||||
queryClient,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -2236,27 +2236,6 @@ const WorkflowContent = React.memo(() => {
|
||||
return () => window.removeEventListener('keydown', handleKeyDown)
|
||||
}, [selectedEdgeInfo, removeEdge, getNodes, removeBlock, effectivePermissions.canEdit])
|
||||
|
||||
/** Handles sub-block value updates from custom events. */
|
||||
useEffect(() => {
|
||||
const handleSubBlockValueUpdate = (event: CustomEvent) => {
|
||||
const { blockId, subBlockId, value } = event.detail
|
||||
if (blockId && subBlockId) {
|
||||
// Use collaborative function to go through queue system
|
||||
// This ensures 5-second timeout and error detection work
|
||||
collaborativeSetSubblockValue(blockId, subBlockId, value)
|
||||
}
|
||||
}
|
||||
|
||||
window.addEventListener('update-subblock-value', handleSubBlockValueUpdate as EventListener)
|
||||
|
||||
return () => {
|
||||
window.removeEventListener(
|
||||
'update-subblock-value',
|
||||
handleSubBlockValueUpdate as EventListener
|
||||
)
|
||||
}
|
||||
}, [collaborativeSetSubblockValue])
|
||||
|
||||
return (
|
||||
<div className='flex h-full w-full flex-col overflow-hidden bg-[var(--bg)]'>
|
||||
<div className='relative h-full w-full flex-1 bg-[var(--bg)]'>
|
||||
|
||||
@@ -273,8 +273,8 @@ export function supportsHandles(blockType: string | undefined): boolean {
|
||||
|
||||
export function getDefaultTokens() {
|
||||
return {
|
||||
prompt: DEFAULTS.TOKENS.PROMPT,
|
||||
completion: DEFAULTS.TOKENS.COMPLETION,
|
||||
input: DEFAULTS.TOKENS.PROMPT,
|
||||
output: DEFAULTS.TOKENS.COMPLETION,
|
||||
total: DEFAULTS.TOKENS.TOTAL,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ describe('EdgeManager', () => {
|
||||
const output = {
|
||||
result: { data: 'test' },
|
||||
content: 'Hello world',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
}
|
||||
|
||||
const readyNodes = edgeManager.processOutgoingEdges(sourceNode, output)
|
||||
|
||||
@@ -31,7 +31,7 @@ vi.mock('@/providers/utils', () => ({
|
||||
create: vi.fn().mockResolvedValue({
|
||||
content: 'Mocked response content',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: [],
|
||||
cost: 0.001,
|
||||
timing: { total: 100 },
|
||||
@@ -53,7 +53,7 @@ vi.mock('@/providers', () => ({
|
||||
executeProviderRequest: vi.fn().mockResolvedValue({
|
||||
content: 'Mocked response content',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: [],
|
||||
cost: 0.001,
|
||||
timing: { total: 100 },
|
||||
@@ -134,7 +134,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Mocked response content',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: [],
|
||||
cost: 0.001,
|
||||
timing: { total: 100 },
|
||||
@@ -211,7 +211,7 @@ describe('AgentBlockHandler', () => {
|
||||
const expectedOutput = {
|
||||
content: 'Mocked response content',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: { list: [], count: 0 },
|
||||
providerTiming: { total: 100 },
|
||||
cost: 0.001,
|
||||
@@ -253,7 +253,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Using tools to respond',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: [
|
||||
{
|
||||
name: 'auto_tool',
|
||||
@@ -591,7 +591,7 @@ describe('AgentBlockHandler', () => {
|
||||
const expectedOutput = {
|
||||
content: 'Mocked response content',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: { list: [], count: 0 }, // Assuming no tool calls in this mock response
|
||||
providerTiming: { total: 100 },
|
||||
cost: 0.001,
|
||||
@@ -672,7 +672,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: '{"result": "Success", "score": 0.95}',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
timing: { total: 100 },
|
||||
toolCalls: [],
|
||||
cost: undefined,
|
||||
@@ -693,7 +693,7 @@ describe('AgentBlockHandler', () => {
|
||||
expect(result).toEqual({
|
||||
result: 'Success',
|
||||
score: 0.95,
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: { list: [], count: 0 },
|
||||
providerTiming: { total: 100 },
|
||||
cost: undefined,
|
||||
@@ -715,7 +715,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Regular text response',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
timing: { total: 100 },
|
||||
}),
|
||||
})
|
||||
@@ -733,7 +733,7 @@ describe('AgentBlockHandler', () => {
|
||||
expect(result).toEqual({
|
||||
content: 'Regular text response',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: { list: [], count: 0 },
|
||||
providerTiming: { total: 100 },
|
||||
cost: undefined,
|
||||
@@ -755,7 +755,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Regular text response',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
timing: { total: 100 },
|
||||
toolCalls: [],
|
||||
cost: undefined,
|
||||
@@ -776,7 +776,7 @@ describe('AgentBlockHandler', () => {
|
||||
expect(result).toEqual({
|
||||
content: 'Regular text response',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: { list: [], count: 0 },
|
||||
providerTiming: { total: 100 },
|
||||
cost: undefined,
|
||||
@@ -798,7 +798,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Regular text response',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
timing: { total: 100 },
|
||||
toolCalls: [],
|
||||
cost: undefined,
|
||||
@@ -819,7 +819,7 @@ describe('AgentBlockHandler', () => {
|
||||
expect(result).toEqual({
|
||||
content: 'Regular text response',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
toolCalls: { list: [], count: 0 },
|
||||
providerTiming: { total: 100 },
|
||||
cost: undefined,
|
||||
@@ -907,7 +907,7 @@ describe('AgentBlockHandler', () => {
|
||||
output: {
|
||||
content: '',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
},
|
||||
logs: [
|
||||
{
|
||||
@@ -988,7 +988,7 @@ describe('AgentBlockHandler', () => {
|
||||
output: {
|
||||
content: 'Test streaming content',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 5, total: 15 },
|
||||
tokens: { input: 10, output: 5, total: 15 },
|
||||
},
|
||||
logs: [],
|
||||
metadata: {
|
||||
@@ -1414,7 +1414,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'I will use MCP tools to help you.',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 15, completion: 25, total: 40 },
|
||||
tokens: { input: 15, output: 25, total: 40 },
|
||||
toolCalls: [
|
||||
{
|
||||
name: 'mcp-server1-list_files',
|
||||
@@ -1525,7 +1525,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Let me try to use this tool.',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 15, total: 25 },
|
||||
tokens: { input: 10, output: 15, total: 25 },
|
||||
toolCalls: [
|
||||
{
|
||||
name: 'mcp-server1-failing_tool',
|
||||
@@ -1630,7 +1630,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Used MCP tools successfully',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 20, completion: 30, total: 50 },
|
||||
tokens: { input: 20, output: 30, total: 50 },
|
||||
toolCalls: [],
|
||||
timing: { total: 200 },
|
||||
}),
|
||||
@@ -1679,7 +1679,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Using MCP tool',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 10, total: 20 },
|
||||
tokens: { input: 10, output: 10, total: 20 },
|
||||
toolCalls: [{ name: 'mcp-test-tool', arguments: {} }],
|
||||
timing: { total: 50 },
|
||||
}),
|
||||
@@ -1734,7 +1734,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Used MCP tool successfully',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 10, total: 20 },
|
||||
tokens: { input: 10, output: 10, total: 20 },
|
||||
toolCalls: [],
|
||||
timing: { total: 50 },
|
||||
}),
|
||||
@@ -1811,7 +1811,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Tool executed',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 10, total: 20 },
|
||||
tokens: { input: 10, output: 10, total: 20 },
|
||||
toolCalls: [
|
||||
{
|
||||
name: 'search_files',
|
||||
@@ -1901,7 +1901,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Used tools',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 10, total: 20 },
|
||||
tokens: { input: 10, output: 10, total: 20 },
|
||||
toolCalls: [],
|
||||
timing: { total: 50 },
|
||||
}),
|
||||
@@ -2008,7 +2008,7 @@ describe('AgentBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'Used legacy tool',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 10, total: 20 },
|
||||
tokens: { input: 10, output: 10, total: 20 },
|
||||
toolCalls: [],
|
||||
timing: { total: 50 },
|
||||
}),
|
||||
|
||||
@@ -1317,15 +1317,15 @@ export class AgentBlockHandler implements BlockHandler {
|
||||
}
|
||||
|
||||
private createResponseMetadata(result: {
|
||||
tokens?: { prompt?: number; completion?: number; total?: number }
|
||||
tokens?: { input?: number; output?: number; total?: number }
|
||||
toolCalls?: Array<any>
|
||||
timing?: any
|
||||
cost?: any
|
||||
}) {
|
||||
return {
|
||||
tokens: result.tokens || {
|
||||
prompt: DEFAULTS.TOKENS.PROMPT,
|
||||
completion: DEFAULTS.TOKENS.COMPLETION,
|
||||
input: DEFAULTS.TOKENS.PROMPT,
|
||||
output: DEFAULTS.TOKENS.COMPLETION,
|
||||
total: DEFAULTS.TOKENS.TOTAL,
|
||||
},
|
||||
toolCalls: {
|
||||
|
||||
@@ -60,7 +60,7 @@ describe('EvaluatorBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: JSON.stringify({ score1: 5, score2: 8 }),
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 50, completion: 10, total: 60 },
|
||||
tokens: { input: 50, output: 10, total: 60 },
|
||||
cost: 0.002,
|
||||
timing: { total: 200 },
|
||||
}),
|
||||
@@ -121,7 +121,7 @@ describe('EvaluatorBlockHandler', () => {
|
||||
expect(result).toEqual({
|
||||
content: 'This is the content to evaluate.',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 50, completion: 10, total: 60 },
|
||||
tokens: { input: 50, output: 10, total: 60 },
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
||||
@@ -124,19 +124,18 @@ export class EvaluatorBlockHandler implements BlockHandler {
|
||||
|
||||
const metricScores = this.extractMetricScores(parsedContent, inputs.metrics)
|
||||
|
||||
const costCalculation = calculateCost(
|
||||
result.model,
|
||||
result.tokens?.prompt || DEFAULTS.TOKENS.PROMPT,
|
||||
result.tokens?.completion || DEFAULTS.TOKENS.COMPLETION,
|
||||
false
|
||||
)
|
||||
const inputTokens = result.tokens?.input || result.tokens?.prompt || DEFAULTS.TOKENS.PROMPT
|
||||
const outputTokens =
|
||||
result.tokens?.output || result.tokens?.completion || DEFAULTS.TOKENS.COMPLETION
|
||||
|
||||
const costCalculation = calculateCost(result.model, inputTokens, outputTokens, false)
|
||||
|
||||
return {
|
||||
content: inputs.content,
|
||||
model: result.model,
|
||||
tokens: {
|
||||
prompt: result.tokens?.prompt || DEFAULTS.TOKENS.PROMPT,
|
||||
completion: result.tokens?.completion || DEFAULTS.TOKENS.COMPLETION,
|
||||
input: inputTokens,
|
||||
output: outputTokens,
|
||||
total: result.tokens?.total || DEFAULTS.TOKENS.TOTAL,
|
||||
},
|
||||
cost: {
|
||||
|
||||
@@ -186,8 +186,8 @@ describe('GenericBlockHandler', () => {
|
||||
output: 0,
|
||||
total: 0.00001042,
|
||||
tokens: {
|
||||
prompt: 521,
|
||||
completion: 0,
|
||||
input: 521,
|
||||
output: 0,
|
||||
total: 521,
|
||||
},
|
||||
model: 'text-embedding-3-small',
|
||||
@@ -215,8 +215,8 @@ describe('GenericBlockHandler', () => {
|
||||
total: 0.00001042,
|
||||
},
|
||||
tokens: {
|
||||
prompt: 521,
|
||||
completion: 0,
|
||||
input: 521,
|
||||
output: 0,
|
||||
total: 521,
|
||||
},
|
||||
model: 'text-embedding-3-small',
|
||||
@@ -253,8 +253,8 @@ describe('GenericBlockHandler', () => {
|
||||
output: 0,
|
||||
total: 0.00000521,
|
||||
tokens: {
|
||||
prompt: 260,
|
||||
completion: 0,
|
||||
input: 260,
|
||||
output: 0,
|
||||
total: 260,
|
||||
},
|
||||
model: 'text-embedding-3-small',
|
||||
@@ -286,8 +286,8 @@ describe('GenericBlockHandler', () => {
|
||||
total: 0.00000521,
|
||||
},
|
||||
tokens: {
|
||||
prompt: 260,
|
||||
completion: 0,
|
||||
input: 260,
|
||||
output: 0,
|
||||
total: 260,
|
||||
},
|
||||
model: 'text-embedding-3-small',
|
||||
@@ -340,7 +340,7 @@ describe('GenericBlockHandler', () => {
|
||||
input: 0.001,
|
||||
output: 0.002,
|
||||
total: 0.003,
|
||||
tokens: { prompt: 100, completion: 50, total: 150 },
|
||||
tokens: { input: 100, output: 50, total: 150 },
|
||||
model: 'some-model',
|
||||
},
|
||||
},
|
||||
@@ -357,7 +357,7 @@ describe('GenericBlockHandler', () => {
|
||||
output: 0.002,
|
||||
total: 0.003,
|
||||
},
|
||||
tokens: { prompt: 100, completion: 50, total: 150 },
|
||||
tokens: { input: 100, output: 50, total: 150 },
|
||||
model: 'some-model',
|
||||
})
|
||||
}
|
||||
|
||||
@@ -87,7 +87,7 @@ describe('RouterBlockHandler', () => {
|
||||
Promise.resolve({
|
||||
content: 'target-block-1',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 100, completion: 5, total: 105 },
|
||||
tokens: { input: 100, output: 5, total: 105 },
|
||||
cost: 0.003,
|
||||
timing: { total: 300 },
|
||||
}),
|
||||
@@ -160,7 +160,7 @@ describe('RouterBlockHandler', () => {
|
||||
expect(result).toEqual({
|
||||
prompt: 'Choose the best option.',
|
||||
model: 'mock-model',
|
||||
tokens: { prompt: 100, completion: 5, total: 105 },
|
||||
tokens: { input: 100, output: 5, total: 105 },
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
||||
@@ -82,15 +82,15 @@ export class RouterBlockHandler implements BlockHandler {
|
||||
}
|
||||
|
||||
const tokens = result.tokens || {
|
||||
prompt: DEFAULTS.TOKENS.PROMPT,
|
||||
completion: DEFAULTS.TOKENS.COMPLETION,
|
||||
input: DEFAULTS.TOKENS.PROMPT,
|
||||
output: DEFAULTS.TOKENS.COMPLETION,
|
||||
total: DEFAULTS.TOKENS.TOTAL,
|
||||
}
|
||||
|
||||
const cost = calculateCost(
|
||||
result.model,
|
||||
tokens.prompt || DEFAULTS.TOKENS.PROMPT,
|
||||
tokens.completion || DEFAULTS.TOKENS.COMPLETION,
|
||||
tokens.input || DEFAULTS.TOKENS.PROMPT,
|
||||
tokens.output || DEFAULTS.TOKENS.COMPLETION,
|
||||
false
|
||||
)
|
||||
|
||||
@@ -98,8 +98,8 @@ export class RouterBlockHandler implements BlockHandler {
|
||||
prompt: inputs.prompt,
|
||||
model: result.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt || DEFAULTS.TOKENS.PROMPT,
|
||||
completion: tokens.completion || DEFAULTS.TOKENS.COMPLETION,
|
||||
input: tokens.input || DEFAULTS.TOKENS.PROMPT,
|
||||
output: tokens.output || DEFAULTS.TOKENS.COMPLETION,
|
||||
total: tokens.total || DEFAULTS.TOKENS.TOTAL,
|
||||
},
|
||||
cost: {
|
||||
|
||||
@@ -69,8 +69,8 @@ export interface NormalizedBlockOutput {
|
||||
content?: string
|
||||
model?: string
|
||||
tokens?: {
|
||||
prompt?: number
|
||||
completion?: number
|
||||
input?: number
|
||||
output?: number
|
||||
total?: number
|
||||
}
|
||||
toolCalls?: {
|
||||
|
||||
@@ -154,7 +154,7 @@ export interface LogWorkflowUsageBatchParams {
|
||||
string,
|
||||
{
|
||||
total: number
|
||||
tokens: { prompt: number; completion: number }
|
||||
tokens: { input: number; output: number }
|
||||
}
|
||||
>
|
||||
}
|
||||
@@ -205,8 +205,8 @@ export async function logWorkflowUsageBatch(params: LogWorkflowUsageBatchParams)
|
||||
source: 'workflow',
|
||||
description: modelName,
|
||||
metadata: {
|
||||
inputTokens: modelData.tokens.prompt,
|
||||
outputTokens: modelData.tokens.completion,
|
||||
inputTokens: modelData.tokens.input,
|
||||
outputTokens: modelData.tokens.output,
|
||||
},
|
||||
cost: modelData.total.toString(),
|
||||
workspaceId: params.workspaceId ?? null,
|
||||
|
||||
@@ -27,7 +27,7 @@ interface BlockExecution {
|
||||
input: number
|
||||
output: number
|
||||
model?: string
|
||||
tokens?: { total: number; prompt: number; completion: number }
|
||||
tokens?: { total: number; input: number; output: number }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -59,8 +59,12 @@ export class ExecutionLogger implements IExecutionLoggerService {
|
||||
output: (merged[model].output || 0) + (costs.output || 0),
|
||||
total: (merged[model].total || 0) + (costs.total || 0),
|
||||
tokens: {
|
||||
prompt: (merged[model].tokens?.prompt || 0) + (costs.tokens?.prompt || 0),
|
||||
completion: (merged[model].tokens?.completion || 0) + (costs.tokens?.completion || 0),
|
||||
input:
|
||||
(merged[model].tokens?.input || merged[model].tokens?.prompt || 0) +
|
||||
(costs.tokens?.input || costs.tokens?.prompt || 0),
|
||||
output:
|
||||
(merged[model].tokens?.output || merged[model].tokens?.completion || 0) +
|
||||
(costs.tokens?.output || costs.tokens?.completion || 0),
|
||||
total: (merged[model].tokens?.total || 0) + (costs.tokens?.total || 0),
|
||||
},
|
||||
}
|
||||
@@ -195,7 +199,7 @@ export class ExecutionLogger implements IExecutionLoggerService {
|
||||
input: number
|
||||
output: number
|
||||
total: number
|
||||
tokens: { prompt: number; completion: number; total: number }
|
||||
tokens: { input: number; output: number; total: number }
|
||||
}
|
||||
>
|
||||
}
|
||||
@@ -269,8 +273,12 @@ export class ExecutionLogger implements IExecutionLoggerService {
|
||||
input: (existingCost.input || 0) + costSummary.totalInputCost,
|
||||
output: (existingCost.output || 0) + costSummary.totalOutputCost,
|
||||
tokens: {
|
||||
prompt: (existingCost.tokens?.prompt || 0) + costSummary.totalPromptTokens,
|
||||
completion: (existingCost.tokens?.completion || 0) + costSummary.totalCompletionTokens,
|
||||
input:
|
||||
(existingCost.tokens?.input || existingCost.tokens?.prompt || 0) +
|
||||
costSummary.totalPromptTokens,
|
||||
output:
|
||||
(existingCost.tokens?.output || existingCost.tokens?.completion || 0) +
|
||||
costSummary.totalCompletionTokens,
|
||||
total: (existingCost.tokens?.total || 0) + costSummary.totalTokens,
|
||||
},
|
||||
models: this.mergeCostModels(existingCost.models || {}, costSummary.models),
|
||||
@@ -280,8 +288,8 @@ export class ExecutionLogger implements IExecutionLoggerService {
|
||||
input: costSummary.totalInputCost,
|
||||
output: costSummary.totalOutputCost,
|
||||
tokens: {
|
||||
prompt: costSummary.totalPromptTokens,
|
||||
completion: costSummary.totalCompletionTokens,
|
||||
input: costSummary.totalPromptTokens,
|
||||
output: costSummary.totalCompletionTokens,
|
||||
total: costSummary.totalTokens,
|
||||
},
|
||||
models: costSummary.models,
|
||||
@@ -307,9 +315,9 @@ export class ExecutionLogger implements IExecutionLoggerService {
|
||||
executionData: {
|
||||
traceSpans: redactedTraceSpans,
|
||||
finalOutput: redactedFinalOutput,
|
||||
tokenBreakdown: {
|
||||
prompt: mergedCost.tokens.prompt,
|
||||
completion: mergedCost.tokens.completion,
|
||||
tokens: {
|
||||
input: mergedCost.tokens.input,
|
||||
output: mergedCost.tokens.output,
|
||||
total: mergedCost.tokens.total,
|
||||
},
|
||||
models: mergedCost.models,
|
||||
@@ -508,7 +516,7 @@ export class ExecutionLogger implements IExecutionLoggerService {
|
||||
input: number
|
||||
output: number
|
||||
total: number
|
||||
tokens: { prompt: number; completion: number; total: number }
|
||||
tokens: { input: number; output: number; total: number }
|
||||
}
|
||||
>
|
||||
},
|
||||
|
||||
@@ -83,7 +83,7 @@ export function calculateCostSummary(traceSpans: any[]): {
|
||||
input: number
|
||||
output: number
|
||||
total: number
|
||||
tokens: { prompt: number; completion: number; total: number }
|
||||
tokens: { input: number; output: number; total: number }
|
||||
}
|
||||
>
|
||||
} {
|
||||
@@ -131,7 +131,7 @@ export function calculateCostSummary(traceSpans: any[]): {
|
||||
input: number
|
||||
output: number
|
||||
total: number
|
||||
tokens: { prompt: number; completion: number; total: number }
|
||||
tokens: { input: number; output: number; total: number }
|
||||
}
|
||||
> = {}
|
||||
|
||||
@@ -150,14 +150,14 @@ export function calculateCostSummary(traceSpans: any[]): {
|
||||
input: 0,
|
||||
output: 0,
|
||||
total: 0,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
}
|
||||
}
|
||||
models[model].input += span.cost.input || 0
|
||||
models[model].output += span.cost.output || 0
|
||||
models[model].total += span.cost.total || 0
|
||||
models[model].tokens.prompt += span.tokens?.input ?? span.tokens?.prompt ?? 0
|
||||
models[model].tokens.completion += span.tokens?.output ?? span.tokens?.completion ?? 0
|
||||
models[model].tokens.input += span.tokens?.input ?? span.tokens?.prompt ?? 0
|
||||
models[model].tokens.output += span.tokens?.output ?? span.tokens?.completion ?? 0
|
||||
models[model].tokens.total += span.tokens?.total || 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ describe('buildTraceSpans', () => {
|
||||
output: {
|
||||
content: 'Agent response',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
providerTiming: {
|
||||
duration: 8000,
|
||||
startTime: '2024-01-01T10:00:00.000Z',
|
||||
@@ -138,7 +138,7 @@ describe('buildTraceSpans', () => {
|
||||
output: {
|
||||
content: 'Agent response',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 10, completion: 20, total: 30 },
|
||||
tokens: { input: 10, output: 20, total: 30 },
|
||||
providerTiming: {
|
||||
duration: 4000,
|
||||
startTime: '2024-01-01T10:00:00.500Z',
|
||||
@@ -427,8 +427,8 @@ describe('buildTraceSpans', () => {
|
||||
output: {
|
||||
content: 'Based on my research using multiple sources...',
|
||||
model: 'gpt-4o',
|
||||
tokens: { prompt: 50, completion: 200, total: 250 },
|
||||
cost: { total: 0.0025, prompt: 0.001, completion: 0.0015 },
|
||||
tokens: { input: 50, output: 200, total: 250 },
|
||||
cost: { total: 0.0025, input: 0.001, output: 0.0015 },
|
||||
providerTiming: {
|
||||
duration: 15000,
|
||||
startTime: '2024-01-01T10:00:00.000Z',
|
||||
|
||||
@@ -15,8 +15,8 @@ export interface PricingInfo {
|
||||
}
|
||||
|
||||
export interface TokenUsage {
|
||||
prompt: number
|
||||
completion: number
|
||||
input: number
|
||||
output: number
|
||||
total: number
|
||||
}
|
||||
|
||||
@@ -102,6 +102,17 @@ export interface WorkflowExecutionLog {
|
||||
environment?: ExecutionEnvironment
|
||||
trigger?: ExecutionTrigger
|
||||
traceSpans?: TraceSpan[]
|
||||
tokens?: { input?: number; output?: number; total?: number }
|
||||
models?: Record<
|
||||
string,
|
||||
{
|
||||
input?: number
|
||||
output?: number
|
||||
total?: number
|
||||
tokens?: { input?: number; output?: number; total?: number }
|
||||
}
|
||||
>
|
||||
finalOutput?: any
|
||||
errorDetails?: {
|
||||
blockId: string
|
||||
blockName: string
|
||||
@@ -114,14 +125,14 @@ export interface WorkflowExecutionLog {
|
||||
input?: number
|
||||
output?: number
|
||||
total?: number
|
||||
tokens?: { prompt?: number; completion?: number; total?: number }
|
||||
tokens?: { input?: number; output?: number; total?: number }
|
||||
models?: Record<
|
||||
string,
|
||||
{
|
||||
input?: number
|
||||
output?: number
|
||||
total?: number
|
||||
tokens?: { prompt?: number; completion?: number; total?: number }
|
||||
tokens?: { input?: number; output?: number; total?: number }
|
||||
}
|
||||
>
|
||||
}
|
||||
|
||||
@@ -57,8 +57,8 @@ export function calculateStreamingCost(
|
||||
|
||||
// Create token usage object
|
||||
const tokens: TokenUsage = {
|
||||
prompt: totalPromptTokens,
|
||||
completion: completionTokens,
|
||||
input: totalPromptTokens,
|
||||
output: completionTokens,
|
||||
total: totalTokens,
|
||||
}
|
||||
|
||||
|
||||
@@ -14,10 +14,10 @@ export interface TokenEstimate {
|
||||
}
|
||||
|
||||
export interface TokenUsage {
|
||||
/** Number of prompt/input tokens */
|
||||
prompt: number
|
||||
/** Number of completion/output tokens */
|
||||
completion: number
|
||||
/** Number of input tokens */
|
||||
input: number
|
||||
/** Number of output tokens */
|
||||
output: number
|
||||
/** Total number of tokens */
|
||||
total: number
|
||||
}
|
||||
|
||||
@@ -57,7 +57,7 @@ export function isTokenizableBlockType(blockType?: string): boolean {
|
||||
*/
|
||||
export function hasRealTokenData(tokens?: TokenUsage): boolean {
|
||||
if (!tokens) return false
|
||||
return tokens.total > 0 || tokens.prompt > 0 || tokens.completion > 0
|
||||
return tokens.total > 0 || tokens.input > 0 || tokens.output > 0
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -227,8 +227,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromAnthropicStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.input_tokens,
|
||||
completion: usage.output_tokens,
|
||||
input: usage.input_tokens,
|
||||
output: usage.output_tokens,
|
||||
total: usage.input_tokens + usage.output_tokens,
|
||||
}
|
||||
|
||||
@@ -260,7 +260,7 @@ export const anthropicProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -320,8 +320,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.input_tokens || 0,
|
||||
completion: currentResponse.usage?.output_tokens || 0,
|
||||
input: currentResponse.usage?.input_tokens || 0,
|
||||
output: currentResponse.usage?.output_tokens || 0,
|
||||
total:
|
||||
(currentResponse.usage?.input_tokens || 0) +
|
||||
(currentResponse.usage?.output_tokens || 0),
|
||||
@@ -547,8 +547,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
modelTime += thisModelTime
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.input_tokens || 0
|
||||
tokens.completion += currentResponse.usage.output_tokens || 0
|
||||
tokens.input += currentResponse.usage.input_tokens || 0
|
||||
tokens.output += currentResponse.usage.output_tokens || 0
|
||||
tokens.total +=
|
||||
(currentResponse.usage.input_tokens || 0) +
|
||||
(currentResponse.usage.output_tokens || 0)
|
||||
@@ -561,7 +561,7 @@ export const anthropicProvider: ProviderConfig = {
|
||||
throw error
|
||||
}
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingPayload = {
|
||||
...payload,
|
||||
@@ -578,8 +578,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
(streamContent, usage) => {
|
||||
streamingResult.execution.output.content = streamContent
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.input_tokens,
|
||||
completion: tokens.completion + usage.output_tokens,
|
||||
input: tokens.input + usage.input_tokens,
|
||||
output: tokens.output + usage.output_tokens,
|
||||
total: tokens.total + usage.input_tokens + usage.output_tokens,
|
||||
}
|
||||
|
||||
@@ -610,8 +610,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
@@ -692,8 +692,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.input_tokens || 0,
|
||||
completion: currentResponse.usage?.output_tokens || 0,
|
||||
input: currentResponse.usage?.input_tokens || 0,
|
||||
output: currentResponse.usage?.output_tokens || 0,
|
||||
total:
|
||||
(currentResponse.usage?.input_tokens || 0) + (currentResponse.usage?.output_tokens || 0),
|
||||
}
|
||||
@@ -923,8 +923,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
modelTime += thisModelTime
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.input_tokens || 0
|
||||
tokens.completion += currentResponse.usage.output_tokens || 0
|
||||
tokens.input += currentResponse.usage.input_tokens || 0
|
||||
tokens.output += currentResponse.usage.output_tokens || 0
|
||||
tokens.total +=
|
||||
(currentResponse.usage.input_tokens || 0) + (currentResponse.usage.output_tokens || 0)
|
||||
|
||||
@@ -965,8 +965,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromAnthropicStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.input_tokens,
|
||||
completion: tokens.completion + usage.output_tokens,
|
||||
input: tokens.input + usage.input_tokens,
|
||||
output: tokens.output + usage.output_tokens,
|
||||
total: tokens.total + usage.input_tokens + usage.output_tokens,
|
||||
}
|
||||
|
||||
@@ -992,8 +992,8 @@ export const anthropicProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -165,8 +165,8 @@ export const azureOpenAIProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -202,7 +202,7 @@ export const azureOpenAIProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -242,8 +242,8 @@ export const azureOpenAIProvider: ProviderConfig = {
|
||||
|
||||
let content = currentResponse.choices[0]?.message?.content || ''
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -445,8 +445,8 @@ export const azureOpenAIProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -456,7 +456,7 @@ export const azureOpenAIProvider: ProviderConfig = {
|
||||
if (request.stream) {
|
||||
logger.info('Using streaming for final response after tool processing')
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingParams: ChatCompletionCreateParamsStreaming = {
|
||||
...payload,
|
||||
@@ -471,8 +471,8 @@ export const azureOpenAIProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromAzureOpenAIStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -493,8 +493,8 @@ export const azureOpenAIProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -125,8 +125,8 @@ export const cerebrasProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromCerebrasStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -146,7 +146,7 @@ export const cerebrasProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -183,8 +183,8 @@ export const cerebrasProvider: ProviderConfig = {
|
||||
|
||||
let content = currentResponse.choices[0]?.message?.content || ''
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -384,8 +384,8 @@ export const cerebrasProvider: ProviderConfig = {
|
||||
content = finalResponse.choices[0].message.content
|
||||
}
|
||||
if (finalResponse.usage) {
|
||||
tokens.prompt += finalResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += finalResponse.usage.completion_tokens || 0
|
||||
tokens.input += finalResponse.usage.prompt_tokens || 0
|
||||
tokens.output += finalResponse.usage.completion_tokens || 0
|
||||
tokens.total += finalResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -416,8 +416,8 @@ export const cerebrasProvider: ProviderConfig = {
|
||||
|
||||
modelTime += thisModelTime
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -444,14 +444,14 @@ export const cerebrasProvider: ProviderConfig = {
|
||||
|
||||
const streamResponse: any = await client.chat.completions.create(streamingPayload)
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingResult = {
|
||||
stream: createReadableStreamFromCerebrasStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -472,8 +472,8 @@ export const cerebrasProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -124,8 +124,8 @@ export const deepseekProvider: ProviderConfig = {
|
||||
(content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -146,7 +146,7 @@ export const deepseekProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -193,8 +193,8 @@ export const deepseekProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -413,8 +413,8 @@ export const deepseekProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -440,7 +440,7 @@ export const deepseekProvider: ProviderConfig = {
|
||||
|
||||
const streamResponse = await deepseek.chat.completions.create(streamingPayload)
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingResult = {
|
||||
stream: createReadableStreamFromDeepseekStream(
|
||||
@@ -448,8 +448,8 @@ export const deepseekProvider: ProviderConfig = {
|
||||
(content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -471,8 +471,8 @@ export const deepseekProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -53,8 +53,8 @@ function createInitialState(
|
||||
return {
|
||||
contents,
|
||||
tokens: {
|
||||
prompt: initialUsage.promptTokenCount,
|
||||
completion: initialUsage.candidatesTokenCount,
|
||||
input: initialUsage.promptTokenCount,
|
||||
output: initialUsage.candidatesTokenCount,
|
||||
total: initialUsage.totalTokenCount,
|
||||
},
|
||||
cost: initialCost,
|
||||
@@ -192,8 +192,8 @@ function updateStateWithResponse(
|
||||
return {
|
||||
...state,
|
||||
tokens: {
|
||||
prompt: state.tokens.prompt + usage.promptTokenCount,
|
||||
completion: state.tokens.completion + usage.candidatesTokenCount,
|
||||
input: state.tokens.input + usage.promptTokenCount,
|
||||
output: state.tokens.output + usage.candidatesTokenCount,
|
||||
total: state.tokens.total + usage.totalTokenCount,
|
||||
},
|
||||
cost: {
|
||||
@@ -263,7 +263,7 @@ function createStreamingResult(
|
||||
output: {
|
||||
content: '',
|
||||
model: '',
|
||||
tokens: state?.tokens ?? { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: state?.tokens ?? { input: 0, output: 0, total: 0 },
|
||||
toolCalls: state?.toolCalls.length
|
||||
? { list: state.toolCalls, count: state.toolCalls.length }
|
||||
: undefined,
|
||||
@@ -447,8 +447,8 @@ export async function executeGeminiRequest(
|
||||
(content: string, usage: GeminiUsage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.promptTokenCount,
|
||||
completion: usage.candidatesTokenCount,
|
||||
input: usage.promptTokenCount,
|
||||
output: usage.candidatesTokenCount,
|
||||
total: usage.totalTokenCount,
|
||||
}
|
||||
|
||||
@@ -592,8 +592,8 @@ export async function executeGeminiRequest(
|
||||
(streamContent: string, usage: GeminiUsage) => {
|
||||
streamingResult.execution.output.content = streamContent
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: accumulatedTokens.prompt + usage.promptTokenCount,
|
||||
completion: accumulatedTokens.completion + usage.candidatesTokenCount,
|
||||
input: accumulatedTokens.input + usage.promptTokenCount,
|
||||
output: accumulatedTokens.output + usage.candidatesTokenCount,
|
||||
total: accumulatedTokens.total + usage.totalTokenCount,
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ export interface ParsedFunctionCall {
|
||||
*/
|
||||
export interface ExecutionState {
|
||||
contents: Content[]
|
||||
tokens: { prompt: number; completion: number; total: number }
|
||||
tokens: { input: number; output: number; total: number }
|
||||
cost: { input: number; output: number; total: number; pricing: ModelPricing }
|
||||
toolCalls: FunctionCallResponse[]
|
||||
toolResults: Record<string, unknown>[]
|
||||
|
||||
@@ -126,8 +126,8 @@ export const groqProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromGroqStream(streamResponse as any, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -147,7 +147,7 @@ export const groqProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -189,8 +189,8 @@ export const groqProvider: ProviderConfig = {
|
||||
|
||||
let content = currentResponse.choices[0]?.message?.content || ''
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -373,8 +373,8 @@ export const groqProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -396,14 +396,14 @@ export const groqProvider: ProviderConfig = {
|
||||
|
||||
const streamResponse = await groq.chat.completions.create(streamingPayload)
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingResult = {
|
||||
stream: createReadableStreamFromGroqStream(streamResponse as any, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -424,8 +424,8 @@ export const groqProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -85,7 +85,7 @@ export async function executeProviderRequest(
|
||||
}
|
||||
|
||||
if (response.tokens) {
|
||||
const { prompt: promptTokens = 0, completion: completionTokens = 0 } = response.tokens
|
||||
const { input: promptTokens = 0, output: completionTokens = 0 } = response.tokens
|
||||
const useCachedInput = !!request.context && request.context.length > 0
|
||||
|
||||
if (shouldBillModelUsage(response.model)) {
|
||||
|
||||
@@ -149,8 +149,8 @@ export const mistralProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromMistralStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -186,7 +186,7 @@ export const mistralProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -247,8 +247,8 @@ export const mistralProvider: ProviderConfig = {
|
||||
|
||||
let content = currentResponse.choices[0]?.message?.content || ''
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -434,8 +434,8 @@ export const mistralProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -445,7 +445,7 @@ export const mistralProvider: ProviderConfig = {
|
||||
if (request.stream) {
|
||||
logger.info('Using streaming for final response after tool processing')
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingParams: ChatCompletionCreateParamsStreaming = {
|
||||
...payload,
|
||||
@@ -460,8 +460,8 @@ export const mistralProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromMistralStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -482,8 +482,8 @@ export const mistralProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -178,8 +178,8 @@ export const ollamaProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -215,7 +215,7 @@ export const ollamaProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -258,8 +258,8 @@ export const ollamaProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -429,8 +429,8 @@ export const ollamaProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -440,7 +440,7 @@ export const ollamaProvider: ProviderConfig = {
|
||||
if (request.stream) {
|
||||
logger.info('Using streaming for final response after tool processing')
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingParams: ChatCompletionCreateParamsStreaming = {
|
||||
...payload,
|
||||
@@ -462,8 +462,8 @@ export const ollamaProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -484,8 +484,8 @@ export const ollamaProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -144,8 +144,8 @@ export const openaiProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromOpenAIStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -181,7 +181,7 @@ export const openaiProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -245,8 +245,8 @@ export const openaiProvider: ProviderConfig = {
|
||||
|
||||
let content = currentResponse.choices[0]?.message?.content || ''
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -433,8 +433,8 @@ export const openaiProvider: ProviderConfig = {
|
||||
modelTime += thisModelTime
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -444,7 +444,7 @@ export const openaiProvider: ProviderConfig = {
|
||||
if (request.stream) {
|
||||
logger.info('Using streaming for final response after tool processing')
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingParams: ChatCompletionCreateParamsStreaming = {
|
||||
...payload,
|
||||
@@ -459,8 +459,8 @@ export const openaiProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromOpenAIStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -481,8 +481,8 @@ export const openaiProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -160,8 +160,8 @@ export const openRouterProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromOpenAIStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -193,7 +193,7 @@ export const openRouterProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: requestedModel,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -233,8 +233,8 @@ export const openRouterProvider: ProviderConfig = {
|
||||
|
||||
let content = currentResponse.choices[0]?.message?.content || ''
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = [] as any[]
|
||||
@@ -420,15 +420,15 @@ export const openRouterProvider: ProviderConfig = {
|
||||
content = currentResponse.choices[0].message.content
|
||||
}
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
iterationCount++
|
||||
}
|
||||
|
||||
if (request.stream) {
|
||||
const accumulatedCost = calculateCost(requestedModel, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(requestedModel, tokens.input, tokens.output)
|
||||
|
||||
const streamingParams: ChatCompletionCreateParamsStreaming & { provider?: any } = {
|
||||
model: payload.model,
|
||||
@@ -459,8 +459,8 @@ export const openRouterProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromOpenAIStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -480,7 +480,7 @@ export const openRouterProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: requestedModel,
|
||||
tokens: { prompt: tokens.prompt, completion: tokens.completion, total: tokens.total },
|
||||
tokens: { input: tokens.input, output: tokens.output, total: tokens.total },
|
||||
toolCalls:
|
||||
toolCalls.length > 0
|
||||
? {
|
||||
@@ -553,8 +553,8 @@ export const openRouterProvider: ProviderConfig = {
|
||||
content = finalResponse.choices[0].message.content
|
||||
}
|
||||
if (finalResponse.usage) {
|
||||
tokens.prompt += finalResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += finalResponse.usage.completion_tokens || 0
|
||||
tokens.input += finalResponse.usage.prompt_tokens || 0
|
||||
tokens.output += finalResponse.usage.completion_tokens || 0
|
||||
tokens.total += finalResponse.usage.total_tokens || 0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,8 +28,8 @@ export interface ModelPricing {
|
||||
export type ModelPricingMap = Record<string, ModelPricing>
|
||||
|
||||
export interface TokenInfo {
|
||||
prompt?: number
|
||||
completion?: number
|
||||
input?: number
|
||||
output?: number
|
||||
total?: number
|
||||
}
|
||||
|
||||
@@ -74,8 +74,8 @@ export interface ProviderResponse {
|
||||
content: string
|
||||
model: string
|
||||
tokens?: {
|
||||
prompt?: number
|
||||
completion?: number
|
||||
input?: number
|
||||
output?: number
|
||||
total?: number
|
||||
}
|
||||
toolCalls?: FunctionCallResponse[]
|
||||
|
||||
@@ -198,8 +198,8 @@ export const vllmProvider: ProviderConfig = {
|
||||
|
||||
streamingResult.execution.output.content = cleanContent
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -235,7 +235,7 @@ export const vllmProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -301,8 +301,8 @@ export const vllmProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -497,8 +497,8 @@ export const vllmProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -508,7 +508,7 @@ export const vllmProvider: ProviderConfig = {
|
||||
if (request.stream) {
|
||||
logger.info('Using streaming for final response after tool processing')
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingParams: ChatCompletionCreateParamsStreaming = {
|
||||
...payload,
|
||||
@@ -528,8 +528,8 @@ export const vllmProvider: ProviderConfig = {
|
||||
|
||||
streamingResult.execution.output.content = cleanContent
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -550,8 +550,8 @@ export const vllmProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -119,8 +119,8 @@ export const xAIProvider: ProviderConfig = {
|
||||
stream: createReadableStreamFromXAIStream(streamResponse, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: usage.prompt_tokens,
|
||||
completion: usage.completion_tokens,
|
||||
input: usage.prompt_tokens,
|
||||
output: usage.completion_tokens,
|
||||
total: usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -140,7 +140,7 @@ export const xAIProvider: ProviderConfig = {
|
||||
output: {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: { prompt: 0, completion: 0, total: 0 },
|
||||
tokens: { input: 0, output: 0, total: 0 },
|
||||
toolCalls: undefined,
|
||||
providerTiming: {
|
||||
startTime: providerStartTimeISO,
|
||||
@@ -202,8 +202,8 @@ export const xAIProvider: ProviderConfig = {
|
||||
|
||||
let content = currentResponse.choices[0]?.message?.content || ''
|
||||
const tokens = {
|
||||
prompt: currentResponse.usage?.prompt_tokens || 0,
|
||||
completion: currentResponse.usage?.completion_tokens || 0,
|
||||
input: currentResponse.usage?.prompt_tokens || 0,
|
||||
output: currentResponse.usage?.completion_tokens || 0,
|
||||
total: currentResponse.usage?.total_tokens || 0,
|
||||
}
|
||||
const toolCalls = []
|
||||
@@ -441,8 +441,8 @@ export const xAIProvider: ProviderConfig = {
|
||||
}
|
||||
|
||||
if (currentResponse.usage) {
|
||||
tokens.prompt += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.completion += currentResponse.usage.completion_tokens || 0
|
||||
tokens.input += currentResponse.usage.prompt_tokens || 0
|
||||
tokens.output += currentResponse.usage.completion_tokens || 0
|
||||
tokens.total += currentResponse.usage.total_tokens || 0
|
||||
}
|
||||
|
||||
@@ -479,14 +479,14 @@ export const xAIProvider: ProviderConfig = {
|
||||
|
||||
const streamResponse = await xai.chat.completions.create(finalStreamingPayload as any)
|
||||
|
||||
const accumulatedCost = calculateCost(request.model, tokens.prompt, tokens.completion)
|
||||
const accumulatedCost = calculateCost(request.model, tokens.input, tokens.output)
|
||||
|
||||
const streamingResult = {
|
||||
stream: createReadableStreamFromXAIStream(streamResponse as any, (content, usage) => {
|
||||
streamingResult.execution.output.content = content
|
||||
streamingResult.execution.output.tokens = {
|
||||
prompt: tokens.prompt + usage.prompt_tokens,
|
||||
completion: tokens.completion + usage.completion_tokens,
|
||||
input: tokens.input + usage.prompt_tokens,
|
||||
output: tokens.output + usage.completion_tokens,
|
||||
total: tokens.total + usage.total_tokens,
|
||||
}
|
||||
|
||||
@@ -507,8 +507,8 @@ export const xAIProvider: ProviderConfig = {
|
||||
content: '',
|
||||
model: request.model,
|
||||
tokens: {
|
||||
prompt: tokens.prompt,
|
||||
completion: tokens.completion,
|
||||
input: tokens.input,
|
||||
output: tokens.output,
|
||||
total: tokens.total,
|
||||
},
|
||||
toolCalls:
|
||||
|
||||
@@ -29,6 +29,8 @@ export interface CostMetadata {
|
||||
output: number
|
||||
total: number
|
||||
tokens?: {
|
||||
input?: number
|
||||
output?: number
|
||||
prompt?: number
|
||||
completion?: number
|
||||
total?: number
|
||||
@@ -39,6 +41,8 @@ export interface CostMetadata {
|
||||
output?: number
|
||||
total?: number
|
||||
tokens?: {
|
||||
input?: number
|
||||
output?: number
|
||||
prompt?: number
|
||||
completion?: number
|
||||
total?: number
|
||||
|
||||
@@ -53,6 +53,8 @@ import { ManageMcpToolClientTool } from '@/lib/copilot/tools/client/workflow/man
|
||||
import { RunWorkflowClientTool } from '@/lib/copilot/tools/client/workflow/run-workflow'
|
||||
import { SetGlobalWorkflowVariablesClientTool } from '@/lib/copilot/tools/client/workflow/set-global-workflow-variables'
|
||||
import { createLogger } from '@/lib/logs/console/logger'
|
||||
import { getQueryClient } from '@/app/_shell/providers/query-provider'
|
||||
import { subscriptionKeys } from '@/hooks/queries/subscription'
|
||||
import type {
|
||||
ChatContext,
|
||||
CopilotMessage,
|
||||
@@ -2663,6 +2665,12 @@ export const useCopilotStore = create<CopilotStore>()(
|
||||
// Fetch context usage after response completes
|
||||
logger.info('[Context Usage] Stream completed, fetching usage')
|
||||
await get().fetchContextUsage()
|
||||
|
||||
// Invalidate subscription queries to update usage
|
||||
setTimeout(() => {
|
||||
const queryClient = getQueryClient()
|
||||
queryClient.invalidateQueries({ queryKey: subscriptionKeys.user() })
|
||||
}, 1000)
|
||||
} finally {
|
||||
clearTimeout(timeoutId)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user