v0.6.54: mothership tracing, db pool size increase

This commit is contained in:
Vikhyath Mondreti
2026-04-22 14:01:10 -07:00
committed by GitHub
79 changed files with 8215 additions and 1920 deletions

View File

@@ -5,7 +5,11 @@ import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { recordUsage } from '@/lib/billing/core/usage-log'
import { checkAndBillOverageThreshold } from '@/lib/billing/threshold-billing'
import { BillingRouteOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { checkInternalApiKey } from '@/lib/copilot/request/http'
import { withIncomingGoSpan } from '@/lib/copilot/request/otel'
import { isBillingEnabled } from '@/lib/core/config/feature-flags'
import { type AtomicClaimResult, billingIdempotency } from '@/lib/core/idempotency/service'
import { generateRequestId } from '@/lib/core/utils/request'
@@ -28,8 +32,28 @@ const UpdateCostSchema = z.object({
/**
* POST /api/billing/update-cost
* Update user cost with a pre-calculated cost value (internal API key auth required)
*
* Parented under the Go-side `sim.update_cost` span via W3C traceparent
* propagation. Every mothership request that bills should therefore show
* the Go client span AND this Sim server span sharing one trace, with
* the actual usage/overage work nested below.
*/
export const POST = withRouteHandler(async (req: NextRequest) => {
export const POST = withRouteHandler((req: NextRequest) =>
withIncomingGoSpan(
req.headers,
TraceSpan.CopilotBillingUpdateCost,
{
[TraceAttr.HttpMethod]: 'POST',
[TraceAttr.HttpRoute]: '/api/billing/update-cost',
},
async (span) => updateCostInner(req, span)
)
)
async function updateCostInner(
req: NextRequest,
span: import('@opentelemetry/api').Span
): Promise<NextResponse> {
const requestId = generateRequestId()
const startTime = Date.now()
let claim: AtomicClaimResult | null = null
@@ -39,6 +63,8 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
logger.info(`[${requestId}] Update cost request started`)
if (!isBillingEnabled) {
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.BillingDisabled)
span.setAttribute(TraceAttr.HttpStatusCode, 200)
return NextResponse.json({
success: true,
message: 'Billing disabled, cost update skipped',
@@ -54,6 +80,8 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
const authResult = checkInternalApiKey(req)
if (!authResult.success) {
logger.warn(`[${requestId}] Authentication failed: ${authResult.error}`)
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.AuthFailed)
span.setAttribute(TraceAttr.HttpStatusCode, 401)
return NextResponse.json(
{
success: false,
@@ -69,8 +97,9 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
if (!validation.success) {
logger.warn(`[${requestId}] Invalid request body`, {
errors: validation.error.issues,
body,
})
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.InvalidBody)
span.setAttribute(TraceAttr.HttpStatusCode, 400)
return NextResponse.json(
{
success: false,
@@ -85,6 +114,17 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
validation.data
const isMcp = source === 'mcp_copilot'
span.setAttributes({
[TraceAttr.UserId]: userId,
[TraceAttr.GenAiRequestModel]: model,
[TraceAttr.BillingSource]: source,
[TraceAttr.BillingCostUsd]: cost,
[TraceAttr.GenAiUsageInputTokens]: inputTokens,
[TraceAttr.GenAiUsageOutputTokens]: outputTokens,
[TraceAttr.BillingIsMcp]: isMcp,
...(idempotencyKey ? { [TraceAttr.BillingIdempotencyKey]: idempotencyKey } : {}),
})
claim = idempotencyKey
? await billingIdempotency.atomicallyClaim('update-cost', idempotencyKey)
: null
@@ -95,6 +135,8 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
userId,
source,
})
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.DuplicateIdempotencyKey)
span.setAttribute(TraceAttr.HttpStatusCode, 409)
return NextResponse.json(
{
success: false,
@@ -159,6 +201,9 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
cost,
})
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.Billed)
span.setAttribute(TraceAttr.HttpStatusCode, 200)
span.setAttribute(TraceAttr.BillingDurationMs, duration)
return NextResponse.json({
success: true,
data: {
@@ -193,6 +238,9 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
)
}
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.InternalError)
span.setAttribute(TraceAttr.HttpStatusCode, 500)
span.setAttribute(TraceAttr.BillingDurationMs, duration)
return NextResponse.json(
{
success: false,
@@ -202,4 +250,4 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
{ status: 500 }
)
}
})
}

View File

@@ -2,6 +2,8 @@ import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
import { env } from '@/lib/core/config/env'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
@@ -33,13 +35,16 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
const { name } = validationResult.data
const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/generate`, {
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/generate`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
},
body: JSON.stringify({ userId, name }),
spanName: 'sim → go /api/validate-key/generate',
operation: 'generate_api_key',
attributes: { [TraceAttr.UserId]: userId },
})
if (!res.ok) {

View File

@@ -20,6 +20,22 @@ vi.mock('@/lib/core/config/env', () => createEnvMock({ COPILOT_API_KEY: 'test-ap
import { DELETE, GET } from '@/app/api/copilot/api-keys/route'
// `fetchGo` reads `response.status` and `response.headers.get('content-length')`
// to stamp span attributes, so mock responses need both fields or the call
// path throws before the route handler sees the body.
function buildMockResponse(init: {
ok: boolean
status?: number
json: () => Promise<unknown>
}): Record<string, unknown> {
return {
ok: init.ok,
status: init.status ?? (init.ok ? 200 : 500),
headers: new Headers(),
json: init.json,
}
}
describe('Copilot API Keys API Route', () => {
beforeEach(() => {
vi.clearAllMocks()
@@ -60,10 +76,12 @@ describe('Copilot API Keys API Route', () => {
},
]
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve(mockApiKeys),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve(mockApiKeys),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
const response = await GET(request)
@@ -83,10 +101,12 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve([]),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve([]),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
const response = await GET(request)
@@ -101,10 +121,12 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve([]),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve([]),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
await GET(request)
@@ -127,11 +149,13 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: false,
status: 503,
json: () => Promise.resolve({ error: 'Service unavailable' }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: false,
status: 503,
json: () => Promise.resolve({ error: 'Service unavailable' }),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
const response = await GET(request)
@@ -146,10 +170,12 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ invalid: 'response' }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve({ invalid: 'response' }),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
const response = await GET(request)
@@ -189,10 +215,12 @@ describe('Copilot API Keys API Route', () => {
},
]
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve(mockApiKeys),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve(mockApiKeys),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
const response = await GET(request)
@@ -207,10 +235,12 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.reject(new Error('Invalid JSON')),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.reject(new Error('Invalid JSON')),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
const response = await GET(request)
@@ -251,10 +281,12 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ success: true }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve({ success: true }),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys?id=key-123')
const response = await DELETE(request)
@@ -281,11 +313,13 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: false,
status: 404,
json: () => Promise.resolve({ error: 'Key not found' }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: false,
status: 404,
json: () => Promise.resolve({ error: 'Key not found' }),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys?id=non-existent')
const response = await DELETE(request)
@@ -300,10 +334,12 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ success: false }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve({ success: false }),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys?id=key-123')
const response = await DELETE(request)
@@ -333,10 +369,12 @@ describe('Copilot API Keys API Route', () => {
user: { id: 'user-123', email: 'test@example.com' },
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.reject(new Error('Invalid JSON')),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.reject(new Error('Invalid JSON')),
})
)
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys?id=key-123')
const response = await DELETE(request)

View File

@@ -1,6 +1,8 @@
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
import { env } from '@/lib/core/config/env'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
@@ -13,13 +15,16 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
const userId = session.user.id
const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/get-api-keys`, {
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/get-api-keys`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
},
body: JSON.stringify({ userId }),
spanName: 'sim → go /api/validate-key/get-api-keys',
operation: 'get_api_keys',
attributes: { [TraceAttr.UserId]: userId },
})
if (!res.ok) {
@@ -67,13 +72,16 @@ export const DELETE = withRouteHandler(async (request: NextRequest) => {
return NextResponse.json({ error: 'id is required' }, { status: 400 })
}
const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/delete`, {
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/delete`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
},
body: JSON.stringify({ userId, apiKeyId: id }),
spanName: 'sim → go /api/validate-key/delete',
operation: 'delete_api_key',
attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ApiKeyId]: id },
})
if (!res.ok) {

View File

@@ -5,7 +5,11 @@ import { eq } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { checkServerSideUsageLimits } from '@/lib/billing/calculations/usage-monitor'
import { CopilotValidateOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { checkInternalApiKey } from '@/lib/copilot/request/http'
import { withIncomingGoSpan } from '@/lib/copilot/request/otel'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
const logger = createLogger('CopilotApiKeysValidate')
@@ -14,55 +18,87 @@ const ValidateApiKeySchema = z.object({
userId: z.string().min(1, 'userId is required'),
})
export const POST = withRouteHandler(async (req: NextRequest) => {
try {
const auth = checkInternalApiKey(req)
if (!auth.success) {
return new NextResponse(null, { status: 401 })
// Incoming-from-Go: extracts traceparent so this handler's work shows
// up as a child of the Go-side `sim.validate_api_key` span in the same
// trace. If there's no traceparent (manual curl / browser), the helper
// falls back to a new root span.
export const POST = withRouteHandler((req: NextRequest) =>
withIncomingGoSpan(
req.headers,
TraceSpan.CopilotAuthValidateApiKey,
{
[TraceAttr.HttpMethod]: 'POST',
[TraceAttr.HttpRoute]: '/api/copilot/api-keys/validate',
},
async (span) => {
try {
const auth = checkInternalApiKey(req)
if (!auth.success) {
span.setAttribute(
TraceAttr.CopilotValidateOutcome,
CopilotValidateOutcome.InternalAuthFailed
)
span.setAttribute(TraceAttr.HttpStatusCode, 401)
return new NextResponse(null, { status: 401 })
}
const body = await req.json().catch(() => null)
const validationResult = ValidateApiKeySchema.safeParse(body)
if (!validationResult.success) {
logger.warn('Invalid validation request', { errors: validationResult.error.errors })
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.InvalidBody)
span.setAttribute(TraceAttr.HttpStatusCode, 400)
return NextResponse.json(
{
error: 'userId is required',
details: validationResult.error.errors,
},
{ status: 400 }
)
}
const { userId } = validationResult.data
span.setAttribute(TraceAttr.UserId, userId)
const [existingUser] = await db.select().from(user).where(eq(user.id, userId)).limit(1)
if (!existingUser) {
logger.warn('[API VALIDATION] userId does not exist', { userId })
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.UserNotFound)
span.setAttribute(TraceAttr.HttpStatusCode, 403)
return NextResponse.json({ error: 'User not found' }, { status: 403 })
}
logger.info('[API VALIDATION] Validating usage limit', { userId })
const { isExceeded, currentUsage, limit } = await checkServerSideUsageLimits(userId)
span.setAttributes({
[TraceAttr.BillingUsageCurrent]: currentUsage,
[TraceAttr.BillingUsageLimit]: limit,
[TraceAttr.BillingUsageExceeded]: isExceeded,
})
logger.info('[API VALIDATION] Usage limit validated', {
userId,
currentUsage,
limit,
isExceeded,
})
if (isExceeded) {
logger.info('[API VALIDATION] Usage exceeded', { userId, currentUsage, limit })
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.UsageExceeded)
span.setAttribute(TraceAttr.HttpStatusCode, 402)
return new NextResponse(null, { status: 402 })
}
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.Ok)
span.setAttribute(TraceAttr.HttpStatusCode, 200)
return new NextResponse(null, { status: 200 })
} catch (error) {
logger.error('Error validating usage limit', { error })
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.InternalError)
span.setAttribute(TraceAttr.HttpStatusCode, 500)
return NextResponse.json({ error: 'Failed to validate usage' }, { status: 500 })
}
}
const body = await req.json().catch(() => null)
const validationResult = ValidateApiKeySchema.safeParse(body)
if (!validationResult.success) {
logger.warn('Invalid validation request', { errors: validationResult.error.errors })
return NextResponse.json(
{
error: 'userId is required',
details: validationResult.error.errors,
},
{ status: 400 }
)
}
const { userId } = validationResult.data
const [existingUser] = await db.select().from(user).where(eq(user.id, userId)).limit(1)
if (!existingUser) {
logger.warn('[API VALIDATION] userId does not exist', { userId })
return NextResponse.json({ error: 'User not found' }, { status: 403 })
}
logger.info('[API VALIDATION] Validating usage limit', { userId })
const { isExceeded, currentUsage, limit } = await checkServerSideUsageLimits(userId)
logger.info('[API VALIDATION] Usage limit validated', {
userId,
currentUsage,
limit,
isExceeded,
})
if (isExceeded) {
logger.info('[API VALIDATION] Usage exceeded', { userId, currentUsage, limit })
return new NextResponse(null, { status: 402 })
}
return new NextResponse(null, { status: 200 })
} catch (error) {
logger.error('Error validating usage limit', { error })
return NextResponse.json({ error: 'Failed to validate usage' }, { status: 500 })
}
})
)
)

View File

@@ -2,6 +2,8 @@ import { createLogger } from '@sim/logger'
import { type NextRequest, NextResponse } from 'next/server'
import { getSession } from '@/lib/auth'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
import { env } from '@/lib/core/config/env'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
@@ -31,9 +33,15 @@ export const GET = withRouteHandler(async () => {
const userId = session.user.id
const res = await fetch(
const res = await fetchGo(
`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed?userId=${encodeURIComponent(userId)}`,
{ method: 'GET', headers: copilotHeaders() }
{
method: 'GET',
headers: copilotHeaders(),
spanName: 'sim → go /api/tool-preferences/auto-allowed',
operation: 'list_auto_allowed_tools',
attributes: { [TraceAttr.UserId]: userId },
}
)
if (!res.ok) {
@@ -67,10 +75,13 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
return NextResponse.json({ error: 'toolId must be a string' }, { status: 400 })
}
const res = await fetch(`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed`, {
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed`, {
method: 'POST',
headers: copilotHeaders(),
body: JSON.stringify({ userId, toolId: body.toolId }),
spanName: 'sim → go /api/tool-preferences/auto-allowed',
operation: 'add_auto_allowed_tool',
attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ToolId]: body.toolId },
})
if (!res.ok) {
@@ -108,9 +119,15 @@ export const DELETE = withRouteHandler(async (request: NextRequest) => {
return NextResponse.json({ error: 'toolId query parameter is required' }, { status: 400 })
}
const res = await fetch(
const res = await fetchGo(
`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed?userId=${encodeURIComponent(userId)}&toolId=${encodeURIComponent(toolId)}`,
{ method: 'DELETE', headers: copilotHeaders() }
{
method: 'DELETE',
headers: copilotHeaders(),
spanName: 'sim → go /api/tool-preferences/auto-allowed',
operation: 'remove_auto_allowed_tool',
attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ToolId]: toolId },
}
)
if (!res.ok) {

View File

@@ -1,9 +1,13 @@
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { NextResponse } from 'next/server'
import { type NextRequest, NextResponse } from 'next/server'
import { getLatestRunForStream } from '@/lib/copilot/async-runs/repository'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { CopilotAbortOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http'
import { withCopilotSpan, withIncomingGoSpan } from '@/lib/copilot/request/otel'
import { abortActiveStream, waitForPendingChatStream } from '@/lib/copilot/request/session'
import { env } from '@/lib/core/config/env'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
@@ -12,81 +16,136 @@ const logger = createLogger('CopilotChatAbortAPI')
const GO_EXPLICIT_ABORT_TIMEOUT_MS = 3000
const STREAM_ABORT_SETTLE_TIMEOUT_MS = 8000
export const POST = withRouteHandler(async (request: Request) => {
const { userId: authenticatedUserId, isAuthenticated } =
await authenticateCopilotRequestSessionOnly()
// POST /api/copilot/chat/abort — fires on user Stop; marks the Go
// side aborted then waits for the prior stream to settle.
export const POST = withRouteHandler((request: NextRequest) =>
withIncomingGoSpan(
request.headers,
TraceSpan.CopilotChatAbortStream,
undefined,
async (rootSpan) => {
const { userId: authenticatedUserId, isAuthenticated } =
await authenticateCopilotRequestSessionOnly()
if (!isAuthenticated || !authenticatedUserId) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
if (!isAuthenticated || !authenticatedUserId) {
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.Unauthorized)
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
const body = await request.json().catch((err) => {
logger.warn('Abort request body parse failed; continuing with empty object', {
error: toError(err).message,
})
return {}
})
const streamId = typeof body.streamId === 'string' ? body.streamId : ''
let chatId = typeof body.chatId === 'string' ? body.chatId : ''
if (!streamId) {
return NextResponse.json({ error: 'streamId is required' }, { status: 400 })
}
if (!chatId) {
const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => {
logger.warn('getLatestRunForStream failed while resolving chatId for abort', {
streamId,
error: toError(err).message,
const body = await request.json().catch((err) => {
logger.warn('Abort request body parse failed; continuing with empty object', {
error: err instanceof Error ? err.message : String(err),
})
return {}
})
return null
})
if (run?.chatId) {
chatId = run.chatId
}
}
const streamId = typeof body.streamId === 'string' ? body.streamId : ''
let chatId = typeof body.chatId === 'string' ? body.chatId : ''
try {
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
if (env.COPILOT_API_KEY) {
headers['x-api-key'] = env.COPILOT_API_KEY
}
const controller = new AbortController()
const timeout = setTimeout(
() => controller.abort('timeout:go_explicit_abort_fetch'),
GO_EXPLICIT_ABORT_TIMEOUT_MS
)
const response = await fetch(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, {
method: 'POST',
headers,
signal: controller.signal,
body: JSON.stringify({
messageId: streamId,
userId: authenticatedUserId,
...(chatId ? { chatId } : {}),
}),
}).finally(() => clearTimeout(timeout))
if (!response.ok) {
throw new Error(`Explicit abort marker request failed: ${response.status}`)
}
} catch (err) {
logger.warn('Explicit abort marker request failed; proceeding with local abort', {
streamId,
error: toError(err).message,
})
}
if (!streamId) {
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.MissingStreamId)
return NextResponse.json({ error: 'streamId is required' }, { status: 400 })
}
rootSpan.setAttributes({
[TraceAttr.StreamId]: streamId,
[TraceAttr.UserId]: authenticatedUserId,
})
const aborted = await abortActiveStream(streamId)
if (chatId) {
const settled = await waitForPendingChatStream(chatId, STREAM_ABORT_SETTLE_TIMEOUT_MS, streamId)
if (!settled) {
return NextResponse.json(
{ error: 'Previous response is still shutting down', aborted, settled: false },
{ status: 409 }
)
}
return NextResponse.json({ aborted, settled: true })
}
if (!chatId) {
const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => {
logger.warn('getLatestRunForStream failed while resolving chatId for abort', {
streamId,
error: err instanceof Error ? err.message : String(err),
})
return null
})
if (run?.chatId) {
chatId = run.chatId
}
}
if (chatId) rootSpan.setAttribute(TraceAttr.ChatId, chatId)
return NextResponse.json({ aborted })
})
// Local abort before Go — lets the lifecycle classifier see
// `signal.aborted` with an explicit-stop reason before Go's
// context-canceled error propagates back. Go's endpoint runs
// second for billing-ledger flush; Go's context is already
// cancelled by then.
const aborted = await abortActiveStream(streamId)
rootSpan.setAttribute(TraceAttr.CopilotAbortLocalAborted, aborted)
let goAbortOk = false
try {
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
if (env.COPILOT_API_KEY) {
headers['x-api-key'] = env.COPILOT_API_KEY
}
const controller = new AbortController()
const timeout = setTimeout(
() => controller.abort('timeout:go_explicit_abort_fetch'),
GO_EXPLICIT_ABORT_TIMEOUT_MS
)
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, {
method: 'POST',
headers,
signal: controller.signal,
body: JSON.stringify({
messageId: streamId,
userId: authenticatedUserId,
...(chatId ? { chatId } : {}),
}),
spanName: 'sim → go /api/streams/explicit-abort',
operation: 'explicit_abort',
attributes: {
[TraceAttr.StreamId]: streamId,
...(chatId ? { [TraceAttr.ChatId]: chatId } : {}),
},
}).finally(() => clearTimeout(timeout))
if (!response.ok) {
throw new Error(`Explicit abort marker request failed: ${response.status}`)
}
goAbortOk = true
} catch (err) {
logger.warn('Explicit abort marker request failed after local abort', {
streamId,
error: err instanceof Error ? err.message : String(err),
})
}
rootSpan.setAttribute(TraceAttr.CopilotAbortGoMarkerOk, goAbortOk)
if (chatId) {
const settled = await withCopilotSpan(
TraceSpan.CopilotChatAbortWaitSettle,
{
[TraceAttr.ChatId]: chatId,
[TraceAttr.StreamId]: streamId,
[TraceAttr.SettleTimeoutMs]: STREAM_ABORT_SETTLE_TIMEOUT_MS,
},
async (settleSpan) => {
const start = Date.now()
const ok = await waitForPendingChatStream(
chatId,
STREAM_ABORT_SETTLE_TIMEOUT_MS,
streamId
)
settleSpan.setAttributes({
[TraceAttr.SettleWaitMs]: Date.now() - start,
[TraceAttr.SettleCompleted]: ok,
})
return ok
}
)
if (!settled) {
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.SettleTimeout)
return NextResponse.json(
{ error: 'Previous response is still shutting down', aborted, settled: false },
{ status: 409 }
)
}
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.Settled)
return NextResponse.json({ aborted, settled: true })
}
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.NoChatId)
return NextResponse.json({ aborted })
}
)
)

View File

@@ -7,6 +7,10 @@ import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { getSession } from '@/lib/auth'
import { normalizeMessage, type PersistedMessage } from '@/lib/copilot/chat/persisted-message'
import { CopilotStopOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { withIncomingGoSpan } from '@/lib/copilot/request/otel'
import { taskPubSub } from '@/lib/copilot/tasks'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
@@ -55,95 +59,121 @@ const StopSchema = z.object({
streamId: z.string(),
content: z.string(),
contentBlocks: z.array(ContentBlockSchema).optional(),
// Optional for older clients; when present, flows into msg.requestId
// so the UI's copy-request-ID button survives a stopped turn.
requestId: z.string().optional(),
})
/**
* POST /api/copilot/chat/stop
* Persists partial assistant content when the user stops a stream mid-response.
* Clears conversationId so the server-side onComplete won't duplicate the message.
* The chat stream lock is intentionally left alone here; it is released only once
* the aborted server stream actually unwinds.
*/
export const POST = withRouteHandler(async (req: NextRequest) => {
try {
const session = await getSession()
if (!session?.user?.id) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
// POST /api/copilot/chat/stop — persists partial assistant content
// when the user stops mid-stream. Lock release is handled by the
// aborted server stream unwinding, not this handler.
export const POST = withRouteHandler((req: NextRequest) =>
withIncomingGoSpan(req.headers, TraceSpan.CopilotChatStopStream, undefined, async (span) => {
try {
const session = await getSession()
if (!session?.user?.id) {
span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.Unauthorized)
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
}
const { chatId, streamId, content, contentBlocks } = StopSchema.parse(await req.json())
const [row] = await db
.select({
workspaceId: copilotChats.workspaceId,
messages: copilotChats.messages,
const { chatId, streamId, content, contentBlocks, requestId } = StopSchema.parse(
await req.json()
)
span.setAttributes({
[TraceAttr.ChatId]: chatId,
[TraceAttr.StreamId]: streamId,
[TraceAttr.UserId]: session.user.id,
[TraceAttr.CopilotStopContentLength]: content.length,
[TraceAttr.CopilotStopBlocksCount]: contentBlocks?.length ?? 0,
...(requestId ? { [TraceAttr.RequestId]: requestId } : {}),
})
.from(copilotChats)
.where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, session.user.id)))
.limit(1)
if (!row) {
const [row] = await db
.select({
workspaceId: copilotChats.workspaceId,
messages: copilotChats.messages,
})
.from(copilotChats)
.where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, session.user.id)))
.limit(1)
if (!row) {
span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.ChatNotFound)
return NextResponse.json({ success: true })
}
const messages: Record<string, unknown>[] = Array.isArray(row.messages) ? row.messages : []
const userIdx = messages.findIndex((message) => message.id === streamId)
const alreadyHasResponse =
userIdx >= 0 &&
userIdx + 1 < messages.length &&
(messages[userIdx + 1] as Record<string, unknown>)?.role === 'assistant'
const canAppendAssistant =
userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse
const updateWhere = and(
eq(copilotChats.id, chatId),
eq(copilotChats.userId, session.user.id),
eq(copilotChats.conversationId, streamId)
)
const setClause: Record<string, unknown> = {
conversationId: null,
updatedAt: new Date(),
}
const hasContent = content.trim().length > 0
const hasBlocks = Array.isArray(contentBlocks) && contentBlocks.length > 0
const synthesizedStoppedBlocks = hasBlocks
? contentBlocks
: hasContent
? [{ type: 'text', channel: 'assistant', content }, { type: 'stopped' }]
: [{ type: 'stopped' }]
if (canAppendAssistant) {
const normalized = normalizeMessage({
id: generateId(),
role: 'assistant',
content,
timestamp: new Date().toISOString(),
contentBlocks: synthesizedStoppedBlocks,
// Persist so the UI copy-request-id button survives refetch.
...(requestId ? { requestId } : {}),
})
const assistantMessage: PersistedMessage = normalized
setClause.messages = sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`
}
span.setAttribute(TraceAttr.CopilotStopAppendedAssistant, canAppendAssistant)
const [updated] = await db
.update(copilotChats)
.set(setClause)
.where(updateWhere)
.returning({ workspaceId: copilotChats.workspaceId })
if (updated?.workspaceId) {
taskPubSub?.publishStatusChanged({
workspaceId: updated.workspaceId,
chatId,
type: 'completed',
})
}
span.setAttribute(
TraceAttr.CopilotStopOutcome,
updated ? CopilotStopOutcome.Persisted : CopilotStopOutcome.NoMatchingRow
)
return NextResponse.json({ success: true })
} catch (error) {
if (error instanceof z.ZodError) {
span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.ValidationError)
return NextResponse.json(
{ error: 'Invalid request data', details: error.errors },
{ status: 400 }
)
}
logger.error('Error stopping chat stream:', error)
span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.InternalError)
return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
}
const messages: Record<string, unknown>[] = Array.isArray(row.messages) ? row.messages : []
const userIdx = messages.findIndex((message) => message.id === streamId)
const alreadyHasResponse =
userIdx >= 0 &&
userIdx + 1 < messages.length &&
(messages[userIdx + 1] as Record<string, unknown>)?.role === 'assistant'
const canAppendAssistant =
userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse
const updateWhere = and(
eq(copilotChats.id, chatId),
eq(copilotChats.userId, session.user.id),
eq(copilotChats.conversationId, streamId)
)
const setClause: Record<string, unknown> = {
conversationId: null,
updatedAt: new Date(),
}
const hasContent = content.trim().length > 0
const hasBlocks = Array.isArray(contentBlocks) && contentBlocks.length > 0
const synthesizedStoppedBlocks = hasBlocks
? contentBlocks
: hasContent
? [{ type: 'text', channel: 'assistant', content }, { type: 'stopped' }]
: [{ type: 'stopped' }]
if (canAppendAssistant) {
const normalized = normalizeMessage({
id: generateId(),
role: 'assistant',
content,
timestamp: new Date().toISOString(),
contentBlocks: synthesizedStoppedBlocks,
})
const assistantMessage: PersistedMessage = normalized
setClause.messages = sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`
}
const [updated] = await db
.update(copilotChats)
.set(setClause)
.where(updateWhere)
.returning({ workspaceId: copilotChats.workspaceId })
if (updated?.workspaceId) {
taskPubSub?.publishStatusChanged({
workspaceId: updated.workspaceId,
chatId,
type: 'completed',
})
}
return NextResponse.json({ success: true })
} catch (error) {
if (error instanceof z.ZodError) {
return NextResponse.json({ error: 'Invalid request' }, { status: 400 })
}
logger.error('Error stopping chat stream:', error)
return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
}
})
})
)

View File

@@ -160,4 +160,42 @@ describe('copilot chat stream replay route', () => {
expect(body).toContain('"code":"resume_run_unavailable"')
expect(body).toContain(`"type":"${MothershipStreamV1EventType.complete}"`)
})
it('uses the latest live request id for synthetic terminal replay events', async () => {
getLatestRunForStream
.mockResolvedValueOnce({
status: 'active',
executionId: 'exec-1',
id: 'run-1',
})
.mockResolvedValueOnce({
status: 'cancelled',
executionId: 'exec-1',
id: 'run-1',
})
readEvents
.mockResolvedValueOnce([
{
stream: { streamId: 'stream-1', cursor: '1' },
seq: 1,
trace: { requestId: 'req-live-123' },
type: MothershipStreamV1EventType.text,
payload: {
channel: 'assistant',
text: 'hello',
},
},
])
.mockResolvedValueOnce([])
const response = await GET(
new NextRequest('http://localhost:3000/api/copilot/chat/stream?streamId=stream-1&after=0')
)
const chunks = await readAllChunks(response)
const terminalChunk = chunks[chunks.length - 1] ?? ''
expect(terminalChunk).toContain(`"type":"${MothershipStreamV1EventType.complete}"`)
expect(terminalChunk).toContain('"requestId":"req-live-123"')
expect(terminalChunk).toContain('"status":"cancelled"')
})
})

View File

@@ -1,13 +1,20 @@
import { context as otelContext, trace } from '@opentelemetry/api'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { sleep } from '@sim/utils/helpers'
import { type NextRequest, NextResponse } from 'next/server'
import { getLatestRunForStream } from '@/lib/copilot/async-runs/repository'
import {
MothershipStreamV1CompletionStatus,
MothershipStreamV1EventType,
} from '@/lib/copilot/generated/mothership-stream-v1'
import {
CopilotResumeOutcome,
CopilotTransport,
} from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { contextFromRequestHeaders } from '@/lib/copilot/request/go/propagation'
import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http'
import { getCopilotTracer, markSpanForError } from '@/lib/copilot/request/otel'
import {
checkForReplayGap,
createEvent,
@@ -25,6 +32,25 @@ const logger = createLogger('CopilotChatStreamAPI')
const POLL_INTERVAL_MS = 250
const MAX_STREAM_MS = 60 * 60 * 1000
function extractCanonicalRequestId(value: unknown): string {
return typeof value === 'string' && value.length > 0 ? value : ''
}
function extractRunRequestId(run: { requestContext?: unknown } | null | undefined): string {
if (!run || typeof run.requestContext !== 'object' || run.requestContext === null) {
return ''
}
const requestContext = run.requestContext as Record<string, unknown>
return (
extractCanonicalRequestId(requestContext.requestId) ||
extractCanonicalRequestId(requestContext.simRequestId)
)
}
function extractEnvelopeRequestId(envelope: { trace?: { requestId?: unknown } }): string {
return extractCanonicalRequestId(envelope.trace?.requestId)
}
function isTerminalStatus(
status: string | null | undefined
): status is MothershipStreamV1CompletionStatus {
@@ -42,10 +68,12 @@ function buildResumeTerminalEnvelopes(options: {
message?: string
code: string
reason?: string
requestId?: string
}) {
const baseSeq = Number(options.afterCursor || '0')
const seq = Number.isFinite(baseSeq) ? baseSeq : 0
const envelopes: ReturnType<typeof createEvent>[] = []
const rid = options.requestId ?? ''
if (options.status === MothershipStreamV1CompletionStatus.error) {
envelopes.push(
@@ -53,7 +81,7 @@ function buildResumeTerminalEnvelopes(options: {
streamId: options.streamId,
cursor: String(seq + 1),
seq: seq + 1,
requestId: '',
requestId: rid,
type: MothershipStreamV1EventType.error,
payload: {
message: options.message || 'Stream recovery failed before completion.',
@@ -68,7 +96,7 @@ function buildResumeTerminalEnvelopes(options: {
streamId: options.streamId,
cursor: String(seq + envelopes.length + 1),
seq: seq + envelopes.length + 1,
requestId: '',
requestId: rid,
type: MothershipStreamV1EventType.complete,
payload: {
status: options.status,
@@ -97,10 +125,77 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
return NextResponse.json({ error: 'streamId is required' }, { status: 400 })
}
// Root span for the whole resume/reconnect request. In stream mode the
// work happens inside `ReadableStream.start`, which the Node runtime
// invokes after this function returns and OUTSIDE the AsyncLocalStorage
// scope installed by `startActiveSpan`. We therefore start the span
// manually, capture its context, and re-enter that context inside the
// stream callback so every nested `withCopilotSpan` / `withDbSpan` call
// attaches to this root.
//
// `contextFromRequestHeaders` extracts the W3C `traceparent` the
// client echoed (set via `streamTraceparentRef` on Sim's chat POST
// response), so the resume span becomes a child of the original
// chat's `gen_ai.agent.execute` trace instead of a disconnected
// new root. On reconnects after page reload (client ref was wiped)
// the header is absent and extraction leaves the ambient context
// alone → the resume span becomes its own root. Same as pre-
// linking behavior; no regression.
const incomingContext = contextFromRequestHeaders(request.headers)
const rootSpan = getCopilotTracer().startSpan(
TraceSpan.CopilotResumeRequest,
{
attributes: {
[TraceAttr.CopilotTransport]: batchMode ? CopilotTransport.Batch : CopilotTransport.Stream,
[TraceAttr.StreamId]: streamId,
[TraceAttr.UserId]: authenticatedUserId,
[TraceAttr.CopilotResumeAfterCursor]: afterCursor || '0',
},
},
incomingContext
)
const rootContext = trace.setSpan(incomingContext, rootSpan)
try {
return await otelContext.with(rootContext, () =>
handleResumeRequestBody({
request,
streamId,
afterCursor,
batchMode,
authenticatedUserId,
rootSpan,
rootContext,
})
)
} catch (err) {
markSpanForError(rootSpan, err)
rootSpan.end()
throw err
}
})
async function handleResumeRequestBody({
request,
streamId,
afterCursor,
batchMode,
authenticatedUserId,
rootSpan,
rootContext,
}: {
request: NextRequest
streamId: string
afterCursor: string
batchMode: boolean
authenticatedUserId: string
rootSpan: import('@opentelemetry/api').Span
rootContext: import('@opentelemetry/api').Context
}) {
const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => {
logger.warn('Failed to fetch latest run for stream', {
streamId,
error: toError(err).message,
error: err instanceof Error ? err.message : String(err),
})
return null
})
@@ -112,8 +207,11 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
runStatus: run?.status,
})
if (!run) {
rootSpan.setAttribute(TraceAttr.CopilotResumeOutcome, CopilotResumeOutcome.StreamNotFound)
rootSpan.end()
return NextResponse.json({ error: 'Stream not found' }, { status: 404 })
}
rootSpan.setAttribute(TraceAttr.CopilotRunStatus, run.status)
if (batchMode) {
const afterSeq = afterCursor || '0'
@@ -122,7 +220,7 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
readFilePreviewSessions(streamId).catch((error) => {
logger.warn('Failed to read preview sessions for stream batch', {
streamId,
error: toError(error).message,
error: error instanceof Error ? error.message : String(error),
})
return []
}),
@@ -135,6 +233,12 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
previewSessionCount: previewSessions.length,
runStatus: run.status,
})
rootSpan.setAttributes({
[TraceAttr.CopilotResumeOutcome]: CopilotResumeOutcome.BatchDelivered,
[TraceAttr.CopilotResumeEventCount]: batchEvents.length,
[TraceAttr.CopilotResumePreviewSessionCount]: previewSessions.length,
})
rootSpan.end()
return NextResponse.json({
success: true,
events: batchEvents,
@@ -144,165 +248,203 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
}
const startTime = Date.now()
let totalEventsFlushed = 0
let pollIterations = 0
const stream = new ReadableStream({
async start(controller) {
let cursor = afterCursor || '0'
let controllerClosed = false
let sawTerminalEvent = false
const closeController = () => {
if (controllerClosed) return
controllerClosed = true
try {
controller.close()
} catch {
// Controller already closed by runtime/client
}
}
const enqueueEvent = (payload: unknown) => {
if (controllerClosed) return false
try {
controller.enqueue(encodeSSEEnvelope(payload))
return true
} catch {
controllerClosed = true
return false
}
}
const abortListener = () => {
controllerClosed = true
}
request.signal.addEventListener('abort', abortListener, { once: true })
const flushEvents = async () => {
const events = await readEvents(streamId, cursor)
if (events.length > 0) {
logger.info('[Resume] Flushing events', {
streamId,
afterCursor: cursor,
eventCount: events.length,
})
}
for (const envelope of events) {
cursor = envelope.stream.cursor ?? String(envelope.seq)
if (envelope.type === MothershipStreamV1EventType.complete) {
sawTerminalEvent = true
}
if (!enqueueEvent(envelope)) {
break
}
}
}
const emitTerminalIfMissing = (
status: MothershipStreamV1CompletionStatus,
options?: { message?: string; code: string; reason?: string }
) => {
if (controllerClosed || sawTerminalEvent) {
return
}
for (const envelope of buildResumeTerminalEnvelopes({
streamId,
afterCursor: cursor,
status,
message: options?.message,
code: options?.code ?? 'resume_terminal',
reason: options?.reason,
})) {
cursor = envelope.stream.cursor ?? String(envelope.seq)
if (envelope.type === MothershipStreamV1EventType.complete) {
sawTerminalEvent = true
}
if (!enqueueEvent(envelope)) {
break
}
}
}
try {
const gap = await checkForReplayGap(streamId, afterCursor)
if (gap) {
for (const envelope of gap.envelopes) {
enqueueEvent(envelope)
}
return
}
await flushEvents()
while (!controllerClosed && Date.now() - startTime < MAX_STREAM_MS) {
const currentRun = await getLatestRunForStream(streamId, authenticatedUserId).catch(
(err) => {
logger.warn('Failed to poll latest run for stream', {
streamId,
error: toError(err).message,
})
return null
}
)
if (!currentRun) {
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
message: 'The stream could not be recovered because its run metadata is unavailable.',
code: 'resume_run_unavailable',
reason: 'run_unavailable',
})
break
}
await flushEvents()
if (controllerClosed) {
break
}
if (isTerminalStatus(currentRun.status)) {
emitTerminalIfMissing(currentRun.status, {
message:
currentRun.status === MothershipStreamV1CompletionStatus.error
? typeof currentRun.error === 'string'
? currentRun.error
: 'The recovered stream ended with an error.'
: undefined,
code: 'resume_terminal_status',
reason: 'terminal_status',
})
break
}
if (request.signal.aborted) {
controllerClosed = true
break
}
await sleep(POLL_INTERVAL_MS)
}
if (!controllerClosed && Date.now() - startTime >= MAX_STREAM_MS) {
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
message: 'The stream recovery timed out before completion.',
code: 'resume_timeout',
reason: 'timeout',
})
}
} catch (error) {
if (!controllerClosed && !request.signal.aborted) {
logger.warn('Stream replay failed', {
streamId,
error: toError(error).message,
})
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
message: 'The stream replay failed before completion.',
code: 'resume_internal',
reason: 'stream_replay_failed',
})
}
} finally {
request.signal.removeEventListener('abort', abortListener)
closeController()
}
// Re-enter the root OTel context so any `withCopilotSpan` call below
// (inside flushEvents/checkForReplayGap/etc.) parents under
// copilot.resume.request instead of becoming an orphan.
return otelContext.with(rootContext, () => startInner(controller))
},
})
async function startInner(controller: ReadableStreamDefaultController) {
let cursor = afterCursor || '0'
let controllerClosed = false
let sawTerminalEvent = false
let currentRequestId = extractRunRequestId(run)
// Stamp the logical request id + chat id on the resume root as soon
// as we resolve them from the run row, so TraceQL joins work on
// resume legs the same way they do on the original POST.
if (currentRequestId) {
rootSpan.setAttribute(TraceAttr.RequestId, currentRequestId)
rootSpan.setAttribute(TraceAttr.SimRequestId, currentRequestId)
}
if (run?.chatId) {
rootSpan.setAttribute(TraceAttr.ChatId, run.chatId)
}
const closeController = () => {
if (controllerClosed) return
controllerClosed = true
try {
controller.close()
} catch {
// Controller already closed by runtime/client
}
}
const enqueueEvent = (payload: unknown) => {
if (controllerClosed) return false
try {
controller.enqueue(encodeSSEEnvelope(payload))
return true
} catch {
controllerClosed = true
return false
}
}
const abortListener = () => {
controllerClosed = true
}
request.signal.addEventListener('abort', abortListener, { once: true })
const flushEvents = async () => {
const events = await readEvents(streamId, cursor)
if (events.length > 0) {
totalEventsFlushed += events.length
logger.debug('[Resume] Flushing events', {
streamId,
afterCursor: cursor,
eventCount: events.length,
})
}
for (const envelope of events) {
cursor = envelope.stream.cursor ?? String(envelope.seq)
currentRequestId = extractEnvelopeRequestId(envelope) || currentRequestId
if (envelope.type === MothershipStreamV1EventType.complete) {
sawTerminalEvent = true
}
if (!enqueueEvent(envelope)) {
break
}
}
}
const emitTerminalIfMissing = (
status: MothershipStreamV1CompletionStatus,
options?: { message?: string; code: string; reason?: string }
) => {
if (controllerClosed || sawTerminalEvent) {
return
}
for (const envelope of buildResumeTerminalEnvelopes({
streamId,
afterCursor: cursor,
status,
message: options?.message,
code: options?.code ?? 'resume_terminal',
reason: options?.reason,
requestId: currentRequestId,
})) {
cursor = envelope.stream.cursor ?? String(envelope.seq)
if (envelope.type === MothershipStreamV1EventType.complete) {
sawTerminalEvent = true
}
if (!enqueueEvent(envelope)) {
break
}
}
}
try {
const gap = await checkForReplayGap(streamId, afterCursor, currentRequestId)
if (gap) {
for (const envelope of gap.envelopes) {
enqueueEvent(envelope)
}
return
}
await flushEvents()
while (!controllerClosed && Date.now() - startTime < MAX_STREAM_MS) {
pollIterations += 1
const currentRun = await getLatestRunForStream(streamId, authenticatedUserId).catch(
(err) => {
logger.warn('Failed to poll latest run for stream', {
streamId,
error: err instanceof Error ? err.message : String(err),
})
return null
}
)
if (!currentRun) {
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
message: 'The stream could not be recovered because its run metadata is unavailable.',
code: 'resume_run_unavailable',
reason: 'run_unavailable',
})
break
}
currentRequestId = extractRunRequestId(currentRun) || currentRequestId
await flushEvents()
if (controllerClosed) {
break
}
if (isTerminalStatus(currentRun.status)) {
emitTerminalIfMissing(currentRun.status, {
message:
currentRun.status === MothershipStreamV1CompletionStatus.error
? typeof currentRun.error === 'string'
? currentRun.error
: 'The recovered stream ended with an error.'
: undefined,
code: 'resume_terminal_status',
reason: 'terminal_status',
})
break
}
if (request.signal.aborted) {
controllerClosed = true
break
}
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
}
if (!controllerClosed && Date.now() - startTime >= MAX_STREAM_MS) {
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
message: 'The stream recovery timed out before completion.',
code: 'resume_timeout',
reason: 'timeout',
})
}
} catch (error) {
if (!controllerClosed && !request.signal.aborted) {
logger.warn('Stream replay failed', {
streamId,
error: error instanceof Error ? error.message : String(error),
})
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
message: 'The stream replay failed before completion.',
code: 'resume_internal',
reason: 'stream_replay_failed',
})
}
markSpanForError(rootSpan, error)
} finally {
request.signal.removeEventListener('abort', abortListener)
closeController()
rootSpan.setAttributes({
[TraceAttr.CopilotResumeOutcome]: sawTerminalEvent
? CopilotResumeOutcome.TerminalDelivered
: controllerClosed
? CopilotResumeOutcome.ClientDisconnected
: CopilotResumeOutcome.EndedWithoutTerminal,
[TraceAttr.CopilotResumeEventCount]: totalEventsFlushed,
[TraceAttr.CopilotResumePollIterations]: pollIterations,
[TraceAttr.CopilotResumeDurationMs]: Date.now() - startTime,
})
rootSpan.end()
}
}
return new Response(stream, { headers: SSE_RESPONSE_HEADERS })
})
}

View File

@@ -206,7 +206,7 @@ describe('Copilot Confirm API Route', () => {
})
})
it('returns 400 when the durable write fails before publish', async () => {
it('returns 500 when the durable write fails before publish', async () => {
completeAsyncToolCall.mockRejectedValueOnce(new Error('db down'))
const response = await POST(
@@ -216,7 +216,7 @@ describe('Copilot Confirm API Route', () => {
})
)
expect(response.status).toBe(400)
expect(response.status).toBe(500)
expect(publishToolConfirmation).not.toHaveBeenCalled()
})
})

View File

@@ -14,6 +14,9 @@ import {
getRunSegment,
upsertAsyncToolCall,
} from '@/lib/copilot/async-runs/repository'
import { CopilotConfirmOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { publishToolConfirmation } from '@/lib/copilot/persistence/tool-confirm'
import {
authenticateCopilotRequestSessionOnly,
@@ -23,6 +26,7 @@ import {
createRequestTracker,
createUnauthorizedResponse,
} from '@/lib/copilot/request/http'
import { withIncomingGoSpan } from '@/lib/copilot/request/otel'
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
const logger = createLogger('CopilotConfirmAPI')
@@ -114,93 +118,112 @@ async function updateToolCallStatus(
}
}
/**
* POST /api/copilot/confirm
* Accept client tool completion or detach confirmations.
*/
export const POST = withRouteHandler(async (req: NextRequest) => {
// POST /api/copilot/confirm — delivery path for client-executed tool
// results. Correlate via `toolCallId` when the awaiting chat stream
// stalls.
export const POST = withRouteHandler((req: NextRequest) => {
const tracker = createRequestTracker()
try {
// Authenticate user using consolidated helper
const { userId: authenticatedUserId, isAuthenticated } =
await authenticateCopilotRequestSessionOnly()
return withIncomingGoSpan(
req.headers,
TraceSpan.CopilotConfirmToolResult,
{ [TraceAttr.RequestId]: tracker.requestId },
async (span) => {
try {
const { userId: authenticatedUserId, isAuthenticated } =
await authenticateCopilotRequestSessionOnly()
if (!isAuthenticated) {
return createUnauthorizedResponse()
if (!isAuthenticated || !authenticatedUserId) {
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Unauthorized)
return createUnauthorizedResponse()
}
const body = await req.json()
const { toolCallId, status, message, data } = ConfirmationSchema.parse(body)
span.setAttributes({
[TraceAttr.ToolCallId]: toolCallId,
[TraceAttr.ToolConfirmationStatus]: status,
[TraceAttr.UserId]: authenticatedUserId,
})
const existing = await getAsyncToolCall(toolCallId).catch((err) => {
logger.warn('Failed to fetch async tool call', {
toolCallId,
error: err instanceof Error ? err.message : String(err),
})
return null
})
if (!existing) {
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.ToolCallNotFound)
return createNotFoundResponse('Tool call not found')
}
if (existing.toolName) span.setAttribute(TraceAttr.ToolName, existing.toolName)
if (existing.runId) span.setAttribute(TraceAttr.RunId, existing.runId)
const run = await getRunSegment(existing.runId).catch((err) => {
logger.warn('Failed to fetch run segment', {
runId: existing.runId,
error: err instanceof Error ? err.message : String(err),
})
return null
})
if (!run) {
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.RunNotFound)
return createNotFoundResponse('Tool call run not found')
}
if (run.userId !== authenticatedUserId) {
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Forbidden)
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
const updated = await updateToolCallStatus(existing, status, message, data)
if (!updated) {
logger.error(`[${tracker.requestId}] Failed to update tool call status`, {
userId: authenticatedUserId,
toolCallId,
status,
internalStatus: status,
message,
})
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.UpdateFailed)
// DB write failed — 500, not 400. 400 is a client-shape error.
return createInternalServerErrorResponse('Failed to update tool call status')
}
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Delivered)
return NextResponse.json({
success: true,
message: message || `Tool call ${toolCallId} has been ${status.toLowerCase()}`,
toolCallId,
status,
})
} catch (error) {
const duration = tracker.getDuration()
if (error instanceof z.ZodError) {
logger.error(`[${tracker.requestId}] Request validation error:`, {
duration,
errors: error.errors,
})
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.ValidationError)
return createBadRequestResponse(
`Invalid request data: ${error.errors.map((e) => e.message).join(', ')}`
)
}
logger.error(`[${tracker.requestId}] Unexpected error:`, {
duration,
error: error instanceof Error ? error.message : 'Unknown error',
stack: error instanceof Error ? error.stack : undefined,
})
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.InternalError)
return createInternalServerErrorResponse(
error instanceof Error ? error.message : 'Internal server error'
)
}
}
const body = await req.json()
const { toolCallId, status, message, data } = ConfirmationSchema.parse(body)
const existing = await getAsyncToolCall(toolCallId).catch((err) => {
logger.warn('Failed to fetch async tool call', {
toolCallId,
error: toError(err).message,
})
return null
})
if (!existing) {
return createNotFoundResponse('Tool call not found')
}
const run = await getRunSegment(existing.runId).catch((err) => {
logger.warn('Failed to fetch run segment', {
runId: existing.runId,
error: toError(err).message,
})
return null
})
if (!run) {
return createNotFoundResponse('Tool call run not found')
}
if (run.userId !== authenticatedUserId) {
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
}
// Update the durable tool call status and wake any waiters.
const updated = await updateToolCallStatus(existing, status, message, data)
if (!updated) {
logger.error(`[${tracker.requestId}] Failed to update tool call status`, {
userId: authenticatedUserId,
toolCallId,
status,
internalStatus: status,
message,
})
return createBadRequestResponse('Failed to update tool call status or tool call not found')
}
const duration = tracker.getDuration()
return NextResponse.json({
success: true,
message: message || `Tool call ${toolCallId} has been ${status.toLowerCase()}`,
toolCallId,
status,
})
} catch (error) {
const duration = tracker.getDuration()
if (error instanceof z.ZodError) {
logger.error(`[${tracker.requestId}] Request validation error:`, {
duration,
errors: error.errors,
})
return createBadRequestResponse(
`Invalid request data: ${error.errors.map((e) => e.message).join(', ')}`
)
}
logger.error(`[${tracker.requestId}] Unexpected error:`, {
duration,
error: error instanceof Error ? error.message : 'Unknown error',
stack: error instanceof Error ? error.stack : undefined,
})
return createInternalServerErrorResponse(
error instanceof Error ? error.message : 'Internal server error'
)
}
)
})

View File

@@ -2,6 +2,7 @@ import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { type NextRequest, NextResponse } from 'next/server'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http'
interface AvailableModel {
@@ -45,10 +46,12 @@ export const GET = withRouteHandler(async (_req: NextRequest) => {
}
try {
const response = await fetch(`${SIM_AGENT_API_URL}/api/get-available-models`, {
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/get-available-models`, {
method: 'GET',
headers,
cache: 'no-store',
spanName: 'sim → go /api/get-available-models',
operation: 'get_available_models',
})
const payload = await response.json().catch(() => ({}))

View File

@@ -22,6 +22,22 @@ vi.mock('@/lib/core/config/env', () => createEnvMock({ COPILOT_API_KEY: 'test-ap
import { POST } from '@/app/api/copilot/stats/route'
// `fetchGo` reads `response.status` and `response.headers.get('content-length')`
// to stamp span attributes, so mock responses need both fields or the call
// path throws before the route handler sees the body.
function buildMockResponse(init: {
ok: boolean
status?: number
json: () => Promise<unknown>
}): Record<string, unknown> {
return {
ok: init.ok,
status: init.status ?? (init.ok ? 200 : 500),
headers: new Headers(),
json: init.json,
}
}
describe('Copilot Stats API Route', () => {
beforeEach(() => {
vi.clearAllMocks()
@@ -58,10 +74,12 @@ describe('Copilot Stats API Route', () => {
isAuthenticated: true,
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ success: true }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve({ success: true }),
})
)
const req = createMockRequest('POST', {
messageId: 'message-123',
@@ -152,10 +170,12 @@ describe('Copilot Stats API Route', () => {
isAuthenticated: true,
})
mockFetch.mockResolvedValueOnce({
ok: false,
json: () => Promise.resolve({ error: 'Invalid message ID' }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: false,
json: () => Promise.resolve({ error: 'Invalid message ID' }),
})
)
const req = createMockRequest('POST', {
messageId: 'invalid-message',
@@ -176,10 +196,12 @@ describe('Copilot Stats API Route', () => {
isAuthenticated: true,
})
mockFetch.mockResolvedValueOnce({
ok: false,
json: () => Promise.resolve({ message: 'Rate limit exceeded' }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: false,
json: () => Promise.resolve({ message: 'Rate limit exceeded' }),
})
)
const req = createMockRequest('POST', {
messageId: 'message-123',
@@ -200,10 +222,12 @@ describe('Copilot Stats API Route', () => {
isAuthenticated: true,
})
mockFetch.mockResolvedValueOnce({
ok: false,
json: () => Promise.reject(new Error('Not JSON')),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: false,
json: () => Promise.reject(new Error('Not JSON')),
})
)
const req = createMockRequest('POST', {
messageId: 'message-123',
@@ -266,10 +290,12 @@ describe('Copilot Stats API Route', () => {
isAuthenticated: true,
})
mockFetch.mockResolvedValueOnce({
ok: true,
json: () => Promise.resolve({ success: true }),
})
mockFetch.mockResolvedValueOnce(
buildMockResponse({
ok: true,
json: () => Promise.resolve({ success: true }),
})
)
const req = createMockRequest('POST', {
messageId: 'message-456',

View File

@@ -1,6 +1,7 @@
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
import {
authenticateCopilotRequestSessionOnly,
createBadRequestResponse,
@@ -40,13 +41,15 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
diffAccepted,
}
const agentRes = await fetch(`${SIM_AGENT_API_URL}/api/stats`, {
const agentRes = await fetchGo(`${SIM_AGENT_API_URL}/api/stats`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
},
body: JSON.stringify(payload),
spanName: 'sim → go /api/stats',
operation: 'stats_ingest',
})
// Prefer not to block clients; still relay status

View File

@@ -21,6 +21,7 @@ import { validateOAuthAccessToken } from '@/lib/auth/oauth-token'
import { getHighestPrioritySubscription } from '@/lib/billing/core/subscription'
import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context'
import { ORCHESTRATION_TIMEOUT_MS, SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { createRequestId } from '@/lib/copilot/request/http'
import { runHeadlessCopilotLifecycle } from '@/lib/copilot/request/lifecycle/headless'
import { orchestrateSubagentStream } from '@/lib/copilot/request/subagent'
import { ensureHandlersRegistered, executeTool } from '@/lib/copilot/tool-executor'
@@ -61,7 +62,8 @@ async function authenticateCopilotApiKey(apiKey: string): Promise<CopilotKeyAuth
return { success: false, error: 'Server configuration error' }
}
const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key`, {
const { fetchGo } = await import('@/lib/copilot/request/go/fetch')
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
@@ -69,6 +71,8 @@ async function authenticateCopilotApiKey(apiKey: string): Promise<CopilotKeyAuth
},
body: JSON.stringify({ targetApiKey: apiKey }),
signal: AbortSignal.timeout(10_000),
spanName: 'sim → go /api/validate-key (mcp)',
operation: 'mcp_validate_key',
})
if (!res.ok) {
@@ -89,7 +93,10 @@ async function authenticateCopilotApiKey(apiKey: string): Promise<CopilotKeyAuth
}
}
return { success: false, error: String(upstream ?? 'Copilot API key validation failed') }
return {
success: false,
error: String(upstream ?? 'Copilot API key validation failed'),
}
}
const data = (await res.json()) as { ok?: boolean; userId?: string }
@@ -696,7 +703,11 @@ async function handleBuildToolCall(
resolvedWorkflowName = authorization.workflow?.name || undefined
resolvedWorkspaceId = authorization.workflow?.workspaceId || undefined
return authorization.allowed
? { status: 'resolved' as const, workflowId, workflowName: resolvedWorkflowName }
? {
status: 'resolved' as const,
workflowId,
workflowName: resolvedWorkflowName,
}
: {
status: 'not_found' as const,
message: 'workflowId is required for build. Call create_workflow first.',
@@ -815,6 +826,7 @@ async function handleSubagentToolCall(
(args.message as string) ||
(args.error as string) ||
JSON.stringify(args)
const simRequestId = createRequestId()
const context = (args.context as Record<string, unknown>) || {}
if (args.plan && !context.plan) {
@@ -836,6 +848,7 @@ async function handleSubagentToolCall(
userId,
workflowId: args.workflowId as string | undefined,
workspaceId: args.workspaceId as string | undefined,
simRequestId,
abortSignal,
}
)

View File

@@ -140,35 +140,60 @@ export const MessageActions = memo(function MessageActions({
}
}
if (!content) return null
const hasContent = Boolean(content)
const canSubmitFeedback = Boolean(chatId && userQuery)
if (!hasContent && !canSubmitFeedback) return null
return (
<>
<div className='flex items-center gap-0.5'>
<button
type='button'
aria-label='Copy message'
onClick={copyToClipboard}
className={BUTTON_CLASS}
>
{copied ? <Check className={ICON_CLASS} /> : <Copy className={ICON_CLASS} />}
</button>
<button
type='button'
aria-label='Like'
onClick={() => handleFeedbackClick('up')}
className={BUTTON_CLASS}
>
<ThumbsUp className={ICON_CLASS} />
</button>
<button
type='button'
aria-label='Dislike'
onClick={() => handleFeedbackClick('down')}
className={BUTTON_CLASS}
>
<ThumbsDown className={ICON_CLASS} />
</button>
{hasContent && (
<Tooltip.Root>
<Tooltip.Trigger asChild>
<button
type='button'
aria-label='Copy message'
onClick={copyToClipboard}
className={BUTTON_CLASS}
>
{copied ? <Check className={ICON_CLASS} /> : <Copy className={ICON_CLASS} />}
</button>
</Tooltip.Trigger>
<Tooltip.Content side='top'>
{copied ? 'Copied message' : 'Copy message'}
</Tooltip.Content>
</Tooltip.Root>
)}
{canSubmitFeedback && (
<>
<Tooltip.Root>
<Tooltip.Trigger asChild>
<button
type='button'
aria-label='Like'
onClick={() => handleFeedbackClick('up')}
className={BUTTON_CLASS}
>
<ThumbsUp className={ICON_CLASS} />
</button>
</Tooltip.Trigger>
<Tooltip.Content side='top'>Good response</Tooltip.Content>
</Tooltip.Root>
<Tooltip.Root>
<Tooltip.Trigger asChild>
<button
type='button'
aria-label='Dislike'
onClick={() => handleFeedbackClick('down')}
className={BUTTON_CLASS}
>
<ThumbsDown className={ICON_CLASS} />
</button>
</Tooltip.Trigger>
<Tooltip.Content side='top'>Bad response</Tooltip.Content>
</Tooltip.Root>
</>
)}
</div>
<Modal open={pendingFeedback !== null} onOpenChange={handleModalClose}>

View File

@@ -28,6 +28,7 @@ import {
MothershipStreamV1SpanPayloadKind,
MothershipStreamV1ToolOutcome,
MothershipStreamV1ToolPhase,
MothershipStreamV1ToolStatus,
} from '@/lib/copilot/generated/mothership-stream-v1'
import {
CrawlWebsite,
@@ -88,6 +89,7 @@ import {
markRunToolManuallyStopped,
reportManualRunToolStop,
} from '@/lib/copilot/tools/client/run-tool-execution'
import { setCurrentChatTraceparent } from '@/lib/copilot/tools/client/trace-context'
import { isWorkflowToolName } from '@/lib/copilot/tools/workflow-tools'
import { getNextWorkflowColor } from '@/lib/workflows/colors'
import { getQueryClient } from '@/app/_shell/providers/get-query-client'
@@ -1273,6 +1275,14 @@ export function useChat(
const activeTurnRef = useRef<ActiveTurn | null>(null)
const pendingUserMsgRef = useRef<PersistedMessage | null>(null)
const streamIdRef = useRef<string | undefined>(undefined)
// W3C traceparent from the chat POST response; echoed on
// abort/stop/confirm/replay so side-channel calls join the same
// trace instead of becoming disconnected roots.
const streamTraceparentRef = useRef<string | undefined>(undefined)
// The `request.id` from the active stream's trace events. Forwarded
// to /chat/stop so the persisted aborted message carries it (keeps
// the copy-request-ID button functional after refetch).
const streamRequestIdRef = useRef<string | undefined>(undefined)
const locallyTerminalStreamIdRef = useRef<string | undefined>(undefined)
const lastCursorRef = useRef('0')
const sendingRef = useRef(false)
@@ -1311,6 +1321,9 @@ export function useChat(
activeTurnRef.current = null
pendingUserMsgRef.current = null
streamIdRef.current = undefined
streamRequestIdRef.current = undefined
streamTraceparentRef.current = undefined
setCurrentChatTraceparent(undefined)
lastCursorRef.current = '0'
resetStreamingBuffers()
}, [resetStreamingBuffers])
@@ -1810,8 +1823,10 @@ export function useChat(
try {
const pendingLines: string[] = []
readLoop: while (true) {
while (true) {
if (pendingLines.length === 0) {
// Don't read another chunk after `complete` has drained.
if (sawCompleteEvent) break
const { done, value } = await reader.read()
if (done) break
if (isStale()) continue
@@ -1851,6 +1866,7 @@ export function useChat(
if (parsed.trace?.requestId && parsed.trace.requestId !== streamRequestId) {
streamRequestId = parsed.trace.requestId
streamRequestIdRef.current = streamRequestId
flush()
}
if (parsed.stream?.streamId) {
@@ -2245,7 +2261,9 @@ export function useChat(
}
const name = payload.toolName
const isPartial = payload.partial === true
const isPartial =
payload.partial === true ||
payload.status === MothershipStreamV1ToolStatus.generating
if (name === ToolSearchToolRegex.id || isToolHiddenInUi(name)) {
break
}
@@ -2467,9 +2485,12 @@ export function useChat(
}
case MothershipStreamV1EventType.complete: {
sawCompleteEvent = true
// `complete` is terminal for this stream, even if the transport takes a moment
// longer to close.
break readLoop
// `complete` is the end-of-turn marker; drain whatever
// else arrived in the same TCP chunk (trailing text,
// followups, run metadata) before stopping. Do NOT
// await another read — events after `complete` would
// be a server bug.
continue
}
}
}
@@ -2530,7 +2551,12 @@ export function useChat(
): Promise<StreamBatchResponse> => {
const response = await fetch(
`/api/mothership/chat/stream?streamId=${encodeURIComponent(streamId)}&after=${encodeURIComponent(afterCursor)}&batch=true`,
{ signal }
{
signal,
...(streamTraceparentRef.current
? { headers: { traceparent: streamTraceparentRef.current } }
: {}),
}
)
if (!response.ok) {
throw new Error(`Stream resume batch failed: ${response.status}`)
@@ -2601,7 +2627,12 @@ export function useChat(
const sseRes = await fetch(
`/api/mothership/chat/stream?streamId=${encodeURIComponent(streamId)}&after=${encodeURIComponent(latestCursor)}`,
{ signal: activeAbort.signal }
{
signal: activeAbort.signal,
...(streamTraceparentRef.current
? { headers: { traceparent: streamTraceparentRef.current } }
: {}),
}
)
if (!sseRes.ok || !sseRes.body) {
throw new Error(RECONNECT_TAIL_ERROR)
@@ -2842,12 +2873,18 @@ export function useChat(
streamId?: string
content?: string
blocks?: ContentBlock[]
// `stopGeneration` must snapshot these BEFORE clearActiveTurn()
// nulls the refs, or the fetch sees undefined.
requestId?: string
traceparent?: string
}) => {
const chatId = overrides?.chatId ?? chatIdRef.current
const streamId = overrides?.streamId ?? streamIdRef.current
if (!chatId || !streamId) return
const content = overrides?.content ?? streamingContentRef.current
const requestId = overrides?.requestId ?? streamRequestIdRef.current
const traceparent = overrides?.traceparent ?? streamTraceparentRef.current
const sourceBlocks = overrides?.blocks ?? streamingBlocksRef.current
const storedBlocks = sourceBlocks.map((block) => {
@@ -2880,12 +2917,16 @@ export function useChat(
try {
const res = await fetch(stopPathRef.current, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: {
'Content-Type': 'application/json',
...(traceparent ? { traceparent } : {}),
},
body: JSON.stringify({
chatId,
streamId,
content,
...(storedBlocks.length > 0 && { contentBlocks: storedBlocks }),
...(requestId ? { requestId } : {}),
}),
})
if (!res.ok) {
@@ -2924,9 +2965,36 @@ export function useChat(
const messagesRef = useRef(messages)
messagesRef.current = messages
/**
* Notify downstream consumers that a turn has ended and, if a
* follow-up message is queued, kick the dispatcher. Safe to call
* from both the normal-completion path (`finalize`) and the
* abort/stop path (`stopGeneration`), which previously short-
* circuited without notifying — queued messages then sat until the
* user manually re-sent. Idempotent w.r.t. `onStreamEnd` (one call
* per terminal transition); the dispatcher itself de-dupes.
*/
const notifyTurnEnded = useCallback(
(options: { error: boolean; skipQueueDispatch?: boolean }) => {
const hasQueuedFollowUp = !options.error && messageQueueRef.current.length > 0
if (!options.error) {
const cid = chatIdRef.current
if (cid && onStreamEndRef.current) {
onStreamEndRef.current(cid, messagesRef.current)
}
}
if (!options.error && !options.skipQueueDispatch && hasQueuedFollowUp) {
void enqueueQueueDispatchRef.current({ type: 'send_head' })
}
return hasQueuedFollowUp
},
[]
)
const finalize = useCallback(
(options?: { error?: boolean }) => {
const hasQueuedFollowUp = !options?.error && messageQueueRef.current.length > 0
const isError = !!options?.error
const hasQueuedFollowUp = !isError && messageQueueRef.current.length > 0
reconcileTerminalPreviewSessions()
locallyTerminalStreamIdRef.current =
streamIdRef.current ?? activeTurnRef.current?.userMessageId ?? undefined
@@ -2934,23 +3002,15 @@ export function useChat(
setTransportIdle()
abortControllerRef.current = null
invalidateChatQueries({ includeDetail: !hasQueuedFollowUp })
if (!options?.error) {
const cid = chatIdRef.current
if (cid && onStreamEndRef.current) {
onStreamEndRef.current(cid, messagesRef.current)
}
}
if (options?.error) {
return
}
if (hasQueuedFollowUp) {
void enqueueQueueDispatchRef.current({ type: 'send_head' })
}
notifyTurnEnded({ error: isError })
},
[clearActiveTurn, invalidateChatQueries, reconcileTerminalPreviewSessions, setTransportIdle]
[
clearActiveTurn,
invalidateChatQueries,
notifyTurnEnded,
reconcileTerminalPreviewSessions,
setTransportIdle,
]
)
finalizeRef.current = finalize
@@ -3162,6 +3222,14 @@ export function useChat(
signal: abortController.signal,
})
// Capture for propagation on side-channel calls + non-React
// tool-completion callbacks (via trace-context singleton).
const traceparent = response.headers.get('traceparent')
if (traceparent) {
streamTraceparentRef.current = traceparent
setCurrentChatTraceparent(traceparent)
}
if (!response.ok) {
const errorData = await response.json().catch(() => ({}))
if (response.status === 409) {
@@ -3403,6 +3471,12 @@ export function useChat(
...(block.options ? { options: [...block.options] } : {}),
...(block.toolCall ? { toolCall: { ...block.toolCall } } : {}),
}))
// Snapshot BEFORE clearActiveTurn() nulls the refs. Both
// persistPartialResponse and the abort/stop fetches run inside
// stopBarrier below, after several awaits — the refs are long
// gone by the time the fetches serialize their headers.
const stopRequestIdSnapshot = streamRequestIdRef.current
const stopTraceparentSnapshot = streamTraceparentRef.current
locallyTerminalStreamIdRef.current = sid
streamGenRef.current++
@@ -3462,7 +3536,10 @@ export function useChat(
? (async () => {
const res = await fetch('/api/mothership/chat/abort', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: {
'Content-Type': 'application/json',
...(stopTraceparentSnapshot ? { traceparent: stopTraceparentSnapshot } : {}),
},
body: JSON.stringify({
streamId: sid,
...(resolvedChatId ? { chatId: resolvedChatId } : {}),
@@ -3485,6 +3562,8 @@ export function useChat(
streamId: sid,
content: stopContentSnapshot,
blocks: stopBlocksSnapshot,
requestId: stopRequestIdSnapshot,
traceparent: stopTraceparentSnapshot,
})
}
@@ -3498,6 +3577,8 @@ export function useChat(
pendingStopPromiseRef.current = stopBarrier
try {
await stopBarrier
// Dispatch queued follow-ups after Stop resolves.
notifyTurnEnded({ error: false })
} catch (err) {
setError(err instanceof Error ? err.message : 'Failed to stop the previous response')
throw err
@@ -3509,6 +3590,7 @@ export function useChat(
}, [
cancelActiveWorkflowExecutions,
invalidateChatQueries,
notifyTurnEnded,
persistPartialResponse,
queryClient,
resetEphemeralPreviewState,

View File

@@ -1,20 +1,32 @@
/**
* Sim OpenTelemetry - Server-side Instrumentation
*/
// Sim OTel bootstrap. Filter by `mothership.origin` or span-name
// prefix (`sim-mothership:` / `go-mothership:`) to separate the two
// halves of a mothership trace in the OTLP backend.
import type { Attributes, Context, Link, SpanKind } from '@opentelemetry/api'
import { DiagConsoleLogger, DiagLogLevel, diag } from '@opentelemetry/api'
import type { Sampler, SamplingResult } from '@opentelemetry/sdk-trace-base'
import { DiagConsoleLogger, DiagLogLevel, diag, TraceFlags, trace } from '@opentelemetry/api'
import type {
ReadableSpan,
Sampler,
SamplingResult,
Span,
SpanProcessor,
} from '@opentelemetry/sdk-trace-base'
import { createLogger } from '@sim/logger'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { env } from './lib/core/config/env'
diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.ERROR)
const logger = createLogger('OTelInstrumentation')
const MOTHERSHIP_ORIGIN = 'sim-mothership' as const
const SPAN_NAME_PREFIX = `${MOTHERSHIP_ORIGIN}: `
const SERVICE_INSTANCE_SLUG = 'sim' as const
const DEFAULT_TELEMETRY_CONFIG = {
endpoint: env.TELEMETRY_ENDPOINT || 'https://telemetry.simstudio.ai/v1/traces',
serviceName: 'sim-studio',
serviceName: 'mothership',
serviceVersion: '0.1.0',
serverSide: { enabled: true },
batchSettings: {
@@ -25,29 +37,95 @@ const DEFAULT_TELEMETRY_CONFIG = {
},
}
/**
* Span name prefixes we want to KEEP
*/
const ALLOWED_SPAN_PREFIXES = [
'platform.', // Our platform events
'gen_ai.', // GenAI semantic convention spans
'workflow.', // Workflow execution spans
'block.', // Block execution spans
'http.client.', // Our API block HTTP calls
'function.', // Function block execution
'router.', // Router block evaluation
'condition.', // Condition block evaluation
'loop.', // Loop block execution
'parallel.', // Parallel block execution
]
// Allowlist of span-name prefixes exported from this process.
// Non-mothership code (workflow executor, block runtime, framework
// noise) is dropped. Broaden carefully — `http.` etc. would reopen
// the firehose.
const ALLOWED_SPAN_PREFIXES = ['gen_ai.', 'copilot.', 'sim →', 'sim.', 'tool.execute']
function isBusinessSpan(spanName: string): boolean {
return ALLOWED_SPAN_PREFIXES.some((prefix) => spanName.startsWith(prefix))
}
// Parse `OTEL_EXPORTER_OTLP_HEADERS`: `key1=value1,key2=value2`
// (URL-encoded values, whitespace tolerated).
function parseOtlpHeadersEnv(raw: string): Record<string, string> {
const out: Record<string, string> = {}
if (!raw) return out
for (const part of raw.split(',')) {
const trimmed = part.trim()
if (!trimmed) continue
const eq = trimmed.indexOf('=')
if (eq <= 0) continue
const key = trimmed.slice(0, eq).trim()
const rawVal = trimmed.slice(eq + 1).trim()
let val = rawVal
try {
val = decodeURIComponent(rawVal)
} catch {
// value wasn't URL-encoded; keep as-is.
}
if (key) out[key] = val
}
return out
}
// Append `/v1/traces` to the OTLP base URL unless already present.
// The HTTP exporter doesn't auto-suffix the signal path even though
// the spec says the env var is a base URL.
function normalizeOtlpTracesUrl(url: string): string {
if (!url) return url
try {
const u = new URL(url)
if (u.pathname.endsWith('/v1/traces')) return url
const base = url.replace(/\/$/, '')
return `${base}/v1/traces`
} catch {
return url
}
}
// Sampling ratio from env (mirrors Go's `samplerFromEnv`); fallback
// is 100% everywhere. Retention caps cost, not sampling.
function resolveSamplingRatio(_isLocalEndpoint: boolean): number {
const raw = process.env.TELEMETRY_SAMPLING_RATIO || process.env.OTEL_TRACES_SAMPLER_ARG || ''
if (raw) {
const parsed = Number.parseFloat(raw)
if (Number.isFinite(parsed)) {
if (parsed <= 0) return 0
if (parsed >= 1) return 1
return parsed
}
}
return 1.0
}
// Tags allowed spans with `mothership.origin` and prepends
// `sim-mothership:` to the span name so backends can visually split
// the two halves even when service.name is shared.
class MothershipOriginSpanProcessor implements SpanProcessor {
onStart(span: Span): void {
const name = span.name
if (!isBusinessSpan(name)) {
return
}
span.setAttribute(TraceAttr.MothershipOrigin, MOTHERSHIP_ORIGIN)
if (!name.startsWith(SPAN_NAME_PREFIX)) {
span.updateName(`${SPAN_NAME_PREFIX}${name}`)
}
}
onEnd(_span: ReadableSpan): void {}
shutdown(): Promise<void> {
return Promise.resolve()
}
forceFlush(): Promise<void> {
return Promise.resolve()
}
}
async function initializeOpenTelemetry() {
try {
if (env.NEXT_TELEMETRY_DISABLED === '1') {
if (env.NEXT_TELEMETRY_DISABLED === '1' || process.env.NEXT_TELEMETRY_DISABLED === '1') {
logger.info('OpenTelemetry disabled via NEXT_TELEMETRY_DISABLED=1')
return
}
@@ -59,11 +137,29 @@ async function initializeOpenTelemetry() {
telemetryConfig = DEFAULT_TELEMETRY_CONFIG
}
// Prefer the OTel spec env var, fall back to legacy TELEMETRY_ENDPOINT.
const resolvedEndpoint =
process.env.OTEL_EXPORTER_OTLP_ENDPOINT ||
process.env.TELEMETRY_ENDPOINT ||
env.TELEMETRY_ENDPOINT ||
telemetryConfig.endpoint
telemetryConfig = {
...telemetryConfig,
endpoint: resolvedEndpoint,
serviceName: 'mothership',
}
if (telemetryConfig.serverSide?.enabled === false) {
logger.info('Server-side OpenTelemetry disabled in config')
return
}
logger.info('OpenTelemetry init', {
endpoint: telemetryConfig.endpoint,
serviceName: telemetryConfig.serviceName,
origin: MOTHERSHIP_ORIGIN,
})
const { NodeSDK } = await import('@opentelemetry/sdk-node')
const { defaultResource, resourceFromAttributes } = await import('@opentelemetry/resources')
const { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, ATTR_DEPLOYMENT_ENVIRONMENT } = await import(
@@ -71,11 +167,14 @@ async function initializeOpenTelemetry() {
)
const { OTLPTraceExporter } = await import('@opentelemetry/exporter-trace-otlp-http')
const { BatchSpanProcessor } = await import('@opentelemetry/sdk-trace-node')
const { ParentBasedSampler, TraceIdRatioBasedSampler, SamplingDecision } = await import(
const { TraceIdRatioBasedSampler, SamplingDecision } = await import(
'@opentelemetry/sdk-trace-base'
)
const createBusinessSpanSampler = (baseSampler: Sampler): Sampler => ({
// Drops Next framework spans, inherits SAMPLED from business
// parents, and re-samples business roots fresh (don't delegate to
// ParentBased — its unsampled-parent path is AlwaysOff by default).
const createBusinessSpanSampler = (rootRatioSampler: Sampler): Sampler => ({
shouldSample(
context: Context,
traceId: string,
@@ -88,25 +187,60 @@ async function initializeOpenTelemetry() {
return { decision: SamplingDecision.NOT_RECORD }
}
const parentSpanContext = trace.getSpanContext(context)
const parentIsSampled =
!!parentSpanContext &&
(parentSpanContext.traceFlags & TraceFlags.SAMPLED) === TraceFlags.SAMPLED
if (parentIsSampled) {
return { decision: SamplingDecision.RECORD_AND_SAMPLED }
}
if (isBusinessSpan(spanName)) {
return baseSampler.shouldSample(context, traceId, spanName, spanKind, attributes, links)
return rootRatioSampler.shouldSample(
context,
traceId,
spanName,
spanKind,
attributes,
links
)
}
return { decision: SamplingDecision.NOT_RECORD }
},
toString(): string {
return `BusinessSpanSampler{baseSampler=${baseSampler.toString()}}`
return `BusinessSpanSampler{rootSampler=${rootRatioSampler.toString()}}`
},
})
const otlpHeaders = parseOtlpHeadersEnv(process.env.OTEL_EXPORTER_OTLP_HEADERS || '')
const exporterUrl = normalizeOtlpTracesUrl(telemetryConfig.endpoint)
const exporter = new OTLPTraceExporter({
url: telemetryConfig.endpoint,
headers: {},
url: exporterUrl,
headers: otlpHeaders,
timeoutMillis: Math.min(telemetryConfig.batchSettings.exportTimeoutMillis, 10000),
keepAlive: false,
})
// Surface export failures (BatchSpanProcessor swallows them otherwise).
const origExport = exporter.export.bind(exporter)
exporter.export = (spans, resultCallback) => {
origExport(spans, (result) => {
if (result?.code !== 0) {
// eslint-disable-next-line no-console
console.error('[OTEL] exporter export failed', {
endpoint: telemetryConfig.endpoint,
resultCode: result?.code,
error: result?.error?.message,
spanCount: spans.length,
})
}
resultCallback(result)
})
}
const batchProcessor = new BatchSpanProcessor(exporter, {
maxQueueSize: telemetryConfig.batchSettings.maxQueueSize,
maxExportBatchSize: telemetryConfig.batchSettings.maxExportBatchSize,
@@ -114,28 +248,48 @@ async function initializeOpenTelemetry() {
exportTimeoutMillis: telemetryConfig.batchSettings.exportTimeoutMillis,
})
// Unique instance id per origin keeps Jaeger's clock-skew adjuster
// from grouping Sim+Go spans together (they'd see multi-second
// drift as intra-service and emit spurious warnings).
const serviceInstanceId = `${telemetryConfig.serviceName}-${SERVICE_INSTANCE_SLUG}`
const resource = defaultResource().merge(
resourceFromAttributes({
[ATTR_SERVICE_NAME]: telemetryConfig.serviceName,
[ATTR_SERVICE_VERSION]: telemetryConfig.serviceVersion,
[ATTR_DEPLOYMENT_ENVIRONMENT]: env.NODE_ENV || 'development',
'service.namespace': 'sim-ai-platform',
// OTEL_ → DEPLOYMENT_ENVIRONMENT → NODE_ENV; matches Go's
// `resourceEnvFromEnv()` so both halves tag the same value.
[ATTR_DEPLOYMENT_ENVIRONMENT]:
process.env.OTEL_DEPLOYMENT_ENVIRONMENT ||
process.env.DEPLOYMENT_ENVIRONMENT ||
env.NODE_ENV ||
'development',
'service.namespace': 'mothership',
'service.instance.id': serviceInstanceId,
'mothership.origin': MOTHERSHIP_ORIGIN,
'telemetry.sdk.name': 'opentelemetry',
'telemetry.sdk.language': 'nodejs',
'telemetry.sdk.version': '1.0.0',
})
)
const baseSampler = new ParentBasedSampler({
root: new TraceIdRatioBasedSampler(0.1),
const isLocalEndpoint = /localhost|127\.0\.0\.1/i.test(telemetryConfig.endpoint)
const samplingRatio = resolveSamplingRatio(isLocalEndpoint)
const rootRatioSampler = new TraceIdRatioBasedSampler(samplingRatio)
const sampler = createBusinessSpanSampler(rootRatioSampler)
logger.info('OpenTelemetry sampler configured', {
samplingRatio,
endpoint: telemetryConfig.endpoint,
origin: MOTHERSHIP_ORIGIN,
})
const sampler = createBusinessSpanSampler(baseSampler)
// Origin-prefix must run before batch so the rename/attr is captured.
const spanProcessors: SpanProcessor[] = [new MothershipOriginSpanProcessor(), batchProcessor]
const sdk = new NodeSDK({
resource,
spanProcessor: batchProcessor,
spanProcessors,
sampler,
traceExporter: exporter,
})
sdk.start()
@@ -152,7 +306,11 @@ async function initializeOpenTelemetry() {
process.on('SIGTERM', shutdownOtel)
process.on('SIGINT', shutdownOtel)
logger.info('OpenTelemetry instrumentation initialized with business span filtering')
logger.info('OpenTelemetry instrumentation initialized', {
serviceName: telemetryConfig.serviceName,
origin: MOTHERSHIP_ORIGIN,
samplingRatio,
})
} catch (error) {
logger.error('Failed to initialize OpenTelemetry instrumentation', error)
}

View File

@@ -1,3 +1,4 @@
import { trace } from '@opentelemetry/api'
import { db } from '@sim/db'
import {
type CopilotAsyncToolStatus,
@@ -8,6 +9,9 @@ import {
} from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { and, desc, eq, inArray, isNull } from 'drizzle-orm'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { markSpanForError } from '@/lib/copilot/request/otel'
import {
ASYNC_TOOL_STATUS,
type AsyncCompletionData,
@@ -16,6 +20,38 @@ import {
} from './lifecycle'
const logger = createLogger('CopilotAsyncRunsRepo')
// Resolve the tracer lazily per-call to avoid capturing the NoOp tracer
// before NodeSDK installs the global TracerProvider (Next.js 16/Turbopack
// can evaluate modules before instrumentation-node.ts finishes).
const getAsyncRunsTracer = () => trace.getTracer('sim-copilot-async-runs', '1.0.0')
// Wrap an async DB op in a client-kind span with canonical `db.*` attrs.
// Cancellation is routed through `markSpanForError` so aborts record the
// exception event but don't paint spans red.
async function withDbSpan<T>(
name: string,
op: string,
table: string,
attrs: Record<string, string | number | boolean | undefined>,
fn: () => Promise<T>
): Promise<T> {
const span = getAsyncRunsTracer().startSpan(name, {
attributes: {
[TraceAttr.DbSystem]: 'postgresql',
[TraceAttr.DbOperation]: op,
[TraceAttr.DbSqlTable]: table,
...Object.fromEntries(Object.entries(attrs).filter(([, v]) => v !== undefined)),
},
})
try {
return await fn()
} catch (error) {
markSpanForError(span, error)
throw error
} finally {
span.end()
}
}
export interface CreateRunSegmentInput {
id?: string
@@ -34,26 +70,43 @@ export interface CreateRunSegmentInput {
}
export async function createRunSegment(input: CreateRunSegmentInput) {
const [run] = await db
.insert(copilotRuns)
.values({
...(input.id ? { id: input.id } : {}),
executionId: input.executionId,
parentRunId: input.parentRunId ?? null,
chatId: input.chatId,
userId: input.userId,
workflowId: input.workflowId ?? null,
workspaceId: input.workspaceId ?? null,
streamId: input.streamId,
agent: input.agent ?? null,
model: input.model ?? null,
provider: input.provider ?? null,
requestContext: input.requestContext ?? {},
status: input.status ?? 'active',
})
.returning()
return run
return withDbSpan(
TraceSpan.CopilotAsyncRunsCreateRunSegment,
'INSERT',
'copilot_runs',
{
[TraceAttr.CopilotExecutionId]: input.executionId,
[TraceAttr.ChatId]: input.chatId,
[TraceAttr.StreamId]: input.streamId,
[TraceAttr.UserId]: input.userId,
[TraceAttr.CopilotRunParentId]: input.parentRunId ?? undefined,
[TraceAttr.CopilotRunAgent]: input.agent ?? undefined,
[TraceAttr.CopilotRunModel]: input.model ?? undefined,
[TraceAttr.CopilotRunProvider]: input.provider ?? undefined,
[TraceAttr.CopilotRunStatus]: input.status ?? 'active',
},
async () => {
const [run] = await db
.insert(copilotRuns)
.values({
...(input.id ? { id: input.id } : {}),
executionId: input.executionId,
parentRunId: input.parentRunId ?? null,
chatId: input.chatId,
userId: input.userId,
workflowId: input.workflowId ?? null,
workspaceId: input.workspaceId ?? null,
streamId: input.streamId,
agent: input.agent ?? null,
model: input.model ?? null,
provider: input.provider ?? null,
requestContext: input.requestContext ?? {},
status: input.status ?? 'active',
})
.returning()
return run
}
)
}
export async function updateRunStatus(
@@ -65,32 +118,53 @@ export async function updateRunStatus(
requestContext?: Record<string, unknown>
} = {}
) {
const [run] = await db
.update(copilotRuns)
.set({
status,
completedAt: updates.completedAt,
error: updates.error,
requestContext: updates.requestContext,
updatedAt: new Date(),
})
.where(eq(copilotRuns.id, runId))
.returning()
return run ?? null
return withDbSpan(
TraceSpan.CopilotAsyncRunsUpdateRunStatus,
'UPDATE',
'copilot_runs',
{
[TraceAttr.RunId]: runId,
[TraceAttr.CopilotRunStatus]: status,
[TraceAttr.CopilotRunHasError]: !!updates.error,
[TraceAttr.CopilotRunHasCompletedAt]: !!updates.completedAt,
},
async () => {
const [run] = await db
.update(copilotRuns)
.set({
status,
completedAt: updates.completedAt,
error: updates.error,
requestContext: updates.requestContext,
updatedAt: new Date(),
})
.where(eq(copilotRuns.id, runId))
.returning()
return run ?? null
}
)
}
export async function getLatestRunForExecution(executionId: string) {
const [run] = await db
.select()
.from(copilotRuns)
.where(eq(copilotRuns.executionId, executionId))
.orderBy(desc(copilotRuns.startedAt))
.limit(1)
return run ?? null
return withDbSpan(
TraceSpan.CopilotAsyncRunsGetLatestForExecution,
'SELECT',
'copilot_runs',
{ [TraceAttr.CopilotExecutionId]: executionId },
async () => {
const [run] = await db
.select()
.from(copilotRuns)
.where(eq(copilotRuns.executionId, executionId))
.orderBy(desc(copilotRuns.startedAt))
.limit(1)
return run ?? null
}
)
}
// Un-instrumented: called from a 4 Hz resume poll; per-call spans
// swamped traces. Use Prom histograms if latency visibility is needed.
export async function getLatestRunForStream(streamId: string, userId?: string) {
const conditions = userId
? and(eq(copilotRuns.streamId, streamId), eq(copilotRuns.userId, userId))
@@ -101,13 +175,20 @@ export async function getLatestRunForStream(streamId: string, userId?: string) {
.where(conditions)
.orderBy(desc(copilotRuns.startedAt))
.limit(1)
return run ?? null
}
export async function getRunSegment(runId: string) {
const [run] = await db.select().from(copilotRuns).where(eq(copilotRuns.id, runId)).limit(1)
return run ?? null
return withDbSpan(
TraceSpan.CopilotAsyncRunsGetRunSegment,
'SELECT',
'copilot_runs',
{ [TraceAttr.RunId]: runId },
async () => {
const [run] = await db.select().from(copilotRuns).where(eq(copilotRuns.id, runId)).limit(1)
return run ?? null
}
)
}
export async function createRunCheckpoint(input: {
@@ -117,18 +198,29 @@ export async function createRunCheckpoint(input: {
agentState: Record<string, unknown>
providerRequest: Record<string, unknown>
}) {
const [checkpoint] = await db
.insert(copilotRunCheckpoints)
.values({
runId: input.runId,
pendingToolCallId: input.pendingToolCallId,
conversationSnapshot: input.conversationSnapshot,
agentState: input.agentState,
providerRequest: input.providerRequest,
})
.returning()
return withDbSpan(
TraceSpan.CopilotAsyncRunsCreateRunCheckpoint,
'INSERT',
'copilot_run_checkpoints',
{
[TraceAttr.RunId]: input.runId,
[TraceAttr.CopilotCheckpointPendingToolCallId]: input.pendingToolCallId,
},
async () => {
const [checkpoint] = await db
.insert(copilotRunCheckpoints)
.values({
runId: input.runId,
pendingToolCallId: input.pendingToolCallId,
conversationSnapshot: input.conversationSnapshot,
agentState: input.agentState,
providerRequest: input.providerRequest,
})
.returning()
return checkpoint
return checkpoint
}
)
}
export async function upsertAsyncToolCall(input: {
@@ -139,67 +231,87 @@ export async function upsertAsyncToolCall(input: {
args?: Record<string, unknown>
status?: CopilotAsyncToolStatus
}) {
const existing = await getAsyncToolCall(input.toolCallId)
const incomingStatus = input.status ?? 'pending'
if (
existing &&
(isTerminalAsyncStatus(existing.status) || isDeliveredAsyncStatus(existing.status)) &&
!isTerminalAsyncStatus(incomingStatus) &&
!isDeliveredAsyncStatus(incomingStatus)
) {
logger.info('Ignoring async tool upsert that would downgrade terminal state', {
toolCallId: input.toolCallId,
existingStatus: existing.status,
incomingStatus,
})
return existing
}
const effectiveRunId = input.runId ?? existing?.runId ?? null
if (!effectiveRunId) {
logger.warn('upsertAsyncToolCall missing runId and no existing row', {
toolCallId: input.toolCallId,
toolName: input.toolName,
status: input.status ?? 'pending',
})
return null
}
return withDbSpan(
TraceSpan.CopilotAsyncRunsUpsertAsyncToolCall,
'UPSERT',
'copilot_async_tool_calls',
{
[TraceAttr.ToolCallId]: input.toolCallId,
[TraceAttr.ToolName]: input.toolName,
[TraceAttr.CopilotAsyncToolStatus]: input.status ?? 'pending',
[TraceAttr.RunId]: input.runId ?? undefined,
},
async () => {
const existing = await getAsyncToolCall(input.toolCallId)
const incomingStatus = input.status ?? 'pending'
if (
existing &&
(isTerminalAsyncStatus(existing.status) || isDeliveredAsyncStatus(existing.status)) &&
!isTerminalAsyncStatus(incomingStatus) &&
!isDeliveredAsyncStatus(incomingStatus)
) {
logger.info('Ignoring async tool upsert that would downgrade terminal state', {
toolCallId: input.toolCallId,
existingStatus: existing.status,
incomingStatus,
})
return existing
}
const effectiveRunId = input.runId ?? existing?.runId ?? null
if (!effectiveRunId) {
logger.warn('upsertAsyncToolCall missing runId and no existing row', {
toolCallId: input.toolCallId,
toolName: input.toolName,
status: input.status ?? 'pending',
})
return null
}
const now = new Date()
const [row] = await db
.insert(copilotAsyncToolCalls)
.values({
runId: effectiveRunId,
checkpointId: input.checkpointId ?? null,
toolCallId: input.toolCallId,
toolName: input.toolName,
args: input.args ?? {},
status: incomingStatus,
updatedAt: now,
})
.onConflictDoUpdate({
target: copilotAsyncToolCalls.toolCallId,
set: {
runId: effectiveRunId,
checkpointId: input.checkpointId ?? null,
toolName: input.toolName,
args: input.args ?? {},
status: incomingStatus,
updatedAt: now,
},
})
.returning()
const now = new Date()
const [row] = await db
.insert(copilotAsyncToolCalls)
.values({
runId: effectiveRunId,
checkpointId: input.checkpointId ?? null,
toolCallId: input.toolCallId,
toolName: input.toolName,
args: input.args ?? {},
status: incomingStatus,
updatedAt: now,
})
.onConflictDoUpdate({
target: copilotAsyncToolCalls.toolCallId,
set: {
runId: effectiveRunId,
checkpointId: input.checkpointId ?? null,
toolName: input.toolName,
args: input.args ?? {},
status: incomingStatus,
updatedAt: now,
},
})
.returning()
return row
return row
}
)
}
export async function getAsyncToolCall(toolCallId: string) {
const [row] = await db
.select()
.from(copilotAsyncToolCalls)
.where(eq(copilotAsyncToolCalls.toolCallId, toolCallId))
.limit(1)
return row ?? null
return withDbSpan(
TraceSpan.CopilotAsyncRunsGetAsyncToolCall,
'SELECT',
'copilot_async_tool_calls',
{ [TraceAttr.ToolCallId]: toolCallId },
async () => {
const [row] = await db
.select()
.from(copilotAsyncToolCalls)
.where(eq(copilotAsyncToolCalls.toolCallId, toolCallId))
.limit(1)
return row ?? null
}
)
}
export async function markAsyncToolStatus(
@@ -213,28 +325,41 @@ export async function markAsyncToolStatus(
completedAt?: Date | null
} = {}
) {
const claimedAt =
updates.claimedAt !== undefined
? updates.claimedAt
: status === 'running' && updates.claimedBy
? new Date()
: undefined
return withDbSpan(
TraceSpan.CopilotAsyncRunsMarkAsyncToolStatus,
'UPDATE',
'copilot_async_tool_calls',
{
[TraceAttr.ToolCallId]: toolCallId,
[TraceAttr.CopilotAsyncToolStatus]: status,
[TraceAttr.CopilotAsyncToolHasError]: !!updates.error,
[TraceAttr.CopilotAsyncToolClaimedBy]: updates.claimedBy ?? undefined,
},
async () => {
const claimedAt =
updates.claimedAt !== undefined
? updates.claimedAt
: status === 'running' && updates.claimedBy
? new Date()
: undefined
const [row] = await db
.update(copilotAsyncToolCalls)
.set({
status,
claimedBy: updates.claimedBy,
claimedAt,
result: updates.result,
error: updates.error,
completedAt: updates.completedAt,
updatedAt: new Date(),
})
.where(eq(copilotAsyncToolCalls.toolCallId, toolCallId))
.returning()
const [row] = await db
.update(copilotAsyncToolCalls)
.set({
status,
claimedBy: updates.claimedBy,
claimedAt,
result: updates.result,
error: updates.error,
completedAt: updates.completedAt,
updatedAt: new Date(),
})
.where(eq(copilotAsyncToolCalls.toolCallId, toolCallId))
.returning()
return row ?? null
return row ?? null
}
)
}
export async function markAsyncToolRunning(toolCallId: string, claimedBy: string) {
@@ -278,57 +403,91 @@ export async function markAsyncToolDelivered(toolCallId: string) {
}
export async function listAsyncToolCallsForRun(runId: string) {
return db
.select()
.from(copilotAsyncToolCalls)
.where(eq(copilotAsyncToolCalls.runId, runId))
.orderBy(desc(copilotAsyncToolCalls.createdAt))
return withDbSpan(
TraceSpan.CopilotAsyncRunsListForRun,
'SELECT',
'copilot_async_tool_calls',
{ [TraceAttr.RunId]: runId },
async () =>
db
.select()
.from(copilotAsyncToolCalls)
.where(eq(copilotAsyncToolCalls.runId, runId))
.orderBy(desc(copilotAsyncToolCalls.createdAt))
)
}
export async function getAsyncToolCalls(toolCallIds: string[]) {
if (toolCallIds.length === 0) return []
return db
.select()
.from(copilotAsyncToolCalls)
.where(inArray(copilotAsyncToolCalls.toolCallId, toolCallIds))
return withDbSpan(
TraceSpan.CopilotAsyncRunsGetMany,
'SELECT',
'copilot_async_tool_calls',
{ [TraceAttr.CopilotAsyncToolIdsCount]: toolCallIds.length },
async () =>
db
.select()
.from(copilotAsyncToolCalls)
.where(inArray(copilotAsyncToolCalls.toolCallId, toolCallIds))
)
}
export async function claimCompletedAsyncToolCall(toolCallId: string, workerId: string) {
const [row] = await db
.update(copilotAsyncToolCalls)
.set({
claimedBy: workerId,
claimedAt: new Date(),
updatedAt: new Date(),
})
.where(
and(
eq(copilotAsyncToolCalls.toolCallId, toolCallId),
inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']),
isNull(copilotAsyncToolCalls.claimedBy)
)
)
.returning()
return row ?? null
return withDbSpan(
TraceSpan.CopilotAsyncRunsClaimCompleted,
'UPDATE',
'copilot_async_tool_calls',
{
[TraceAttr.ToolCallId]: toolCallId,
[TraceAttr.CopilotAsyncToolWorkerId]: workerId,
},
async () => {
const [row] = await db
.update(copilotAsyncToolCalls)
.set({
claimedBy: workerId,
claimedAt: new Date(),
updatedAt: new Date(),
})
.where(
and(
eq(copilotAsyncToolCalls.toolCallId, toolCallId),
inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']),
isNull(copilotAsyncToolCalls.claimedBy)
)
)
.returning()
return row ?? null
}
)
}
export async function releaseCompletedAsyncToolClaim(toolCallId: string, workerId: string) {
const [row] = await db
.update(copilotAsyncToolCalls)
.set({
claimedBy: null,
claimedAt: null,
updatedAt: new Date(),
})
.where(
and(
eq(copilotAsyncToolCalls.toolCallId, toolCallId),
inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']),
eq(copilotAsyncToolCalls.claimedBy, workerId)
)
)
.returning()
return row ?? null
return withDbSpan(
TraceSpan.CopilotAsyncRunsReleaseClaim,
'UPDATE',
'copilot_async_tool_calls',
{
[TraceAttr.ToolCallId]: toolCallId,
[TraceAttr.CopilotAsyncToolWorkerId]: workerId,
},
async () => {
const [row] = await db
.update(copilotAsyncToolCalls)
.set({
claimedBy: null,
claimedAt: null,
updatedAt: new Date(),
})
.where(
and(
eq(copilotAsyncToolCalls.toolCallId, toolCallId),
inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']),
eq(copilotAsyncToolCalls.claimedBy, workerId)
)
)
.returning()
return row ?? null
}
)
}

View File

@@ -1,7 +1,7 @@
import { type Context as OtelContext, context as otelContextApi } from '@opentelemetry/api'
import { db } from '@sim/db'
import { copilotChats } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { eq, sql } from 'drizzle-orm'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
@@ -20,11 +20,14 @@ import { finalizeAssistantTurn } from '@/lib/copilot/chat/terminal-state'
import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context'
import { COPILOT_REQUEST_MODES } from '@/lib/copilot/constants'
import {
createBadRequestResponse,
createRequestTracker,
createUnauthorizedResponse,
} from '@/lib/copilot/request/http'
CopilotChatPersistOutcome,
CopilotTransport,
} from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { createBadRequestResponse, createUnauthorizedResponse } from '@/lib/copilot/request/http'
import { createSSEStream, SSE_RESPONSE_HEADERS } from '@/lib/copilot/request/lifecycle/start'
import { startCopilotOtelRoot, withCopilotSpan } from '@/lib/copilot/request/otel'
import {
acquirePendingChatStream,
getPendingChatStreamId,
@@ -258,6 +261,15 @@ async function persistUserMessage(params: {
contexts?: UnifiedChatRequest['contexts']
workspaceId?: string
notifyWorkspaceStatus: boolean
/**
* Root context for the mothership request. When present the persist
* span is created explicitly under it, which avoids relying on
* AsyncLocalStorage propagation — some upstream awaits (Next.js
* framework frames, Turbopack-instrumented I/O) can swap the active
* store out from under us in dev, which would otherwise leave this
* span parented to the about-to-be-dropped Next.js HTTP span.
*/
parentOtelContext?: OtelContext
}): Promise<unknown[] | undefined> {
const {
chatId,
@@ -267,31 +279,60 @@ async function persistUserMessage(params: {
contexts,
workspaceId,
notifyWorkspaceStatus,
parentOtelContext,
} = params
if (!chatId) return undefined
const userMsg = buildPersistedUserMessage({
id: userMessageId,
content: message,
fileAttachments,
contexts,
})
return withCopilotSpan(
TraceSpan.CopilotChatPersistUserMessage,
{
[TraceAttr.DbSystem]: 'postgresql',
[TraceAttr.DbSqlTable]: 'copilot_chats',
[TraceAttr.ChatId]: chatId,
[TraceAttr.ChatUserMessageId]: userMessageId,
[TraceAttr.ChatMessageBytes]: message.length,
[TraceAttr.ChatFileAttachmentCount]: fileAttachments?.length ?? 0,
[TraceAttr.ChatContextCount]: contexts?.length ?? 0,
...(workspaceId ? { [TraceAttr.WorkspaceId]: workspaceId } : {}),
},
async (span) => {
const userMsg = buildPersistedUserMessage({
id: userMessageId,
content: message,
fileAttachments,
contexts,
})
const [updated] = await db
.update(copilotChats)
.set({
messages: sql`${copilotChats.messages} || ${JSON.stringify([userMsg])}::jsonb`,
conversationId: userMessageId,
updatedAt: new Date(),
})
.where(eq(copilotChats.id, chatId))
.returning({ messages: copilotChats.messages })
const [updated] = await db
.update(copilotChats)
.set({
messages: sql`${copilotChats.messages} || ${JSON.stringify([userMsg])}::jsonb`,
conversationId: userMessageId,
updatedAt: new Date(),
})
.where(eq(copilotChats.id, chatId))
.returning({ messages: copilotChats.messages })
if (notifyWorkspaceStatus && updated && workspaceId) {
taskPubSub?.publishStatusChanged({ workspaceId, chatId, type: 'started' })
}
const messagesAfter = Array.isArray(updated?.messages) ? updated.messages : undefined
span.setAttributes({
[TraceAttr.ChatPersistOutcome]: updated
? CopilotChatPersistOutcome.Appended
: CopilotChatPersistOutcome.ChatNotFound,
[TraceAttr.ChatMessagesAfter]: messagesAfter?.length ?? 0,
})
return Array.isArray(updated?.messages) ? updated.messages : undefined
if (notifyWorkspaceStatus && updated && workspaceId) {
taskPubSub?.publishStatusChanged({
workspaceId,
chatId,
type: 'started',
})
}
return messagesAfter
},
parentOtelContext
)
}
async function buildInitialExecutionContext(params: {
@@ -336,12 +377,42 @@ function buildOnComplete(params: {
requestId: string
workspaceId?: string
notifyWorkspaceStatus: boolean
/**
* Root agent span for this request. When present, the final
* assistant message + invoked tool calls are recorded as
* `gen_ai.output.messages` on it before persistence runs. Keeps
* the Honeycomb Gen AI view complete across both the Sim root
* span and the Go-side `llm.stream` spans.
*/
otelRoot?: {
setOutputMessages: (output: {
assistantText?: string
toolCalls?: Array<{ id: string; name: string; arguments?: Record<string, unknown> }>
}) => void
}
}) {
const { chatId, userMessageId, requestId, workspaceId, notifyWorkspaceStatus } = params
const { chatId, userMessageId, requestId, workspaceId, notifyWorkspaceStatus, otelRoot } = params
return async (result: OrchestratorResult) => {
if (otelRoot && result.success) {
otelRoot.setOutputMessages({
assistantText: result.content,
toolCalls: result.toolCalls?.map((tc) => ({
id: tc.id,
name: tc.name,
arguments: tc.params,
})),
})
}
if (!chatId) return
// On cancel, /chat/stop is the sole DB writer — it persists
// partial content AND clears conversationId in one UPDATE. If we
// finalize here first the filter misses and content vanishes.
// Real errors still finalize so the stream marker clears.
if (result.cancelled) return
try {
await finalizeAssistantTurn({
chatId,
@@ -529,10 +600,23 @@ async function resolveBranch(params: {
}
export async function handleUnifiedChatPost(req: NextRequest) {
const tracker = createRequestTracker(false)
let actualChatId: string | undefined
let userMessageId = ''
let chatStreamLockAcquired = false
// Started once we've parsed the body (need userMessageId to stamp as
// streamId). Every subsequent span (persistUserMessage,
// createRunSegment, the whole SSE stream, etc.) nests under this
// root via AsyncLocalStorage / explicit propagation, and the stream's
// terminal code path calls finish() when the request actually ends.
// Errors thrown from the handler before the stream starts are
// finished here in the catch below.
let otelRoot: ReturnType<typeof startCopilotOtelRoot> | undefined
// Canonical logical ID; assigned from otelRoot.requestId (the OTel
// trace ID) as soon as startCopilotOtelRoot runs. Empty only in the
// narrow pre-otelRoot window where errors don't correlate anyway.
let requestId = ''
const executionId = crypto.randomUUID()
const runId = crypto.randomUUID()
try {
const session = await getSession()
@@ -540,212 +624,359 @@ export async function handleUnifiedChatPost(req: NextRequest) {
return createUnauthorizedResponse()
}
const authenticatedUserId = session.user.id
const authenticatedUserEmail = session.user.email
const body = ChatMessageSchema.parse(await req.json())
const normalizedContexts = normalizeContexts(body.contexts)
const normalizedContexts = normalizeContexts(body.contexts) ?? []
userMessageId = body.userMessageId || crypto.randomUUID()
const branch = await resolveBranch({
authenticatedUserId,
workflowId: body.workflowId,
workflowName: body.workflowName,
workspaceId: body.workspaceId,
model: body.model,
mode: body.mode,
provider: body.provider,
})
if (branch instanceof NextResponse) {
return branch
}
let currentChat: ChatLoadResult['chat'] = null
let conversationHistory: unknown[] = []
let chatIsNew = false
actualChatId = body.chatId
if (body.chatId || body.createNewChat) {
const chatResult = await resolveOrCreateChat({
chatId: body.chatId,
userId: authenticatedUserId,
...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}),
workspaceId: branch.workspaceId,
model: branch.titleModel,
type: branch.kind === 'workflow' ? 'copilot' : 'mothership',
})
currentChat = chatResult.chat
actualChatId = chatResult.chatId || body.chatId
chatIsNew = chatResult.isNew
conversationHistory = Array.isArray(chatResult.conversationHistory)
? chatResult.conversationHistory
: []
if (body.chatId && !currentChat) {
return NextResponse.json({ error: 'Chat not found' }, { status: 404 })
}
}
if (chatIsNew && actualChatId && body.resourceAttachments?.length) {
await persistChatResources(
actualChatId,
body.resourceAttachments.map((r) => ({
type: r.type,
id: r.id,
title: r.title ?? GENERIC_RESOURCE_TITLE[r.type],
}))
)
}
if (actualChatId) {
chatStreamLockAcquired = await acquirePendingChatStream(actualChatId, userMessageId)
if (!chatStreamLockAcquired) {
const activeStreamId = await getPendingChatStreamId(actualChatId)
return NextResponse.json(
{
error: 'A response is already in progress for this chat.',
...(activeStreamId ? { activeStreamId } : {}),
},
{ status: 409 }
)
}
}
const workspaceId = branch.workspaceId
const userPermissionPromise = workspaceId
? getUserEntityPermissions(authenticatedUserId, 'workspace', workspaceId).catch((error) => {
logger.warn('Failed to load user permissions', {
error: toError(error).message,
workspaceId,
})
return null
})
: Promise.resolve(null)
const workspaceContextPromise =
branch.kind === 'workspace'
? generateWorkspaceContext(branch.workspaceId, authenticatedUserId)
: Promise.resolve(undefined)
const agentContextsPromise = resolveAgentContexts({
contexts: normalizedContexts,
resourceAttachments: body.resourceAttachments,
userId: authenticatedUserId,
message: body.message,
workspaceId,
chatId: actualChatId,
requestId: tracker.requestId,
})
const persistedMessagesPromise = persistUserMessage({
chatId: actualChatId,
userMessageId,
message: body.message,
fileAttachments: body.fileAttachments,
contexts: normalizedContexts,
workspaceId,
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
})
const executionContextPromise = branch.buildExecutionContext({
userId: authenticatedUserId,
chatId: actualChatId,
userTimezone: body.userTimezone,
messageId: userMessageId,
})
const [agentContexts, userPermission, workspaceContext, persistedMessages, executionContext] =
await Promise.all([
agentContextsPromise,
userPermissionPromise,
workspaceContextPromise,
persistedMessagesPromise,
executionContextPromise,
])
if (persistedMessages) {
conversationHistory = persistedMessages.filter((message) => {
const record = message as Record<string, unknown>
return record.id !== userMessageId
})
}
const requestPayload =
branch.kind === 'workflow'
? await branch.buildPayload({
message: body.message,
userId: authenticatedUserId,
userMessageId,
chatId: actualChatId,
contexts: agentContexts,
fileAttachments: body.fileAttachments,
userPermission: userPermission ?? undefined,
userTimezone: body.userTimezone,
workflowId: branch.workflowId,
workflowName: branch.workflowName,
workspaceId: branch.workspaceId,
mode: branch.mode,
provider: branch.provider,
commands: body.commands,
prefetch: body.prefetch,
implicitFeedback: body.implicitFeedback,
})
: await branch.buildPayload({
message: body.message,
userId: authenticatedUserId,
userMessageId,
chatId: actualChatId,
contexts: agentContexts,
fileAttachments: body.fileAttachments,
userPermission: userPermission ?? undefined,
userTimezone: body.userTimezone,
workspaceContext,
})
const executionId = crypto.randomUUID()
const runId = crypto.randomUUID()
const stream = createSSEStream({
requestPayload,
userId: authenticatedUserId,
otelRoot = startCopilotOtelRoot({
streamId: userMessageId,
executionId,
runId,
chatId: actualChatId,
currentChat,
isNewChat: conversationHistory.length === 0,
message: body.message,
titleModel: branch.titleModel,
...(branch.titleProvider ? { titleProvider: branch.titleProvider } : {}),
requestId: tracker.requestId,
workspaceId,
orchestrateOptions: {
userId: authenticatedUserId,
...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}),
...(branch.kind === 'workspace' ? { workspaceId: branch.workspaceId } : {}),
transport: CopilotTransport.Stream,
userMessagePreview: body.message,
})
if (otelRoot.requestId) {
requestId = otelRoot.requestId
}
// Identity stamp — Go already stamps `user.id` on spans from the
// validated API-key path, but Sim is the only side of the wire
// that knows the human-facing email. Stamping both on the Sim
// root (so they show up on `rootAttrs` in Tempo search) saves
// the "turn user.id into a real person" round-trip to the DB
// for every ad-hoc investigation.
otelRoot.span.setAttribute(TraceAttr.UserId, authenticatedUserId)
if (authenticatedUserEmail) {
otelRoot.span.setAttribute(TraceAttr.UserEmail, authenticatedUserEmail)
}
// `setInputMessages` is internally gated on
// OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT; safe to call.
otelRoot.setInputMessages({ userMessage: body.message })
// Wrap the rest of the handler so nested spans attach to the
// root via AsyncLocalStorage (otherwise they orphan into new traces).
const activeOtelRoot = otelRoot
return await otelContextApi.with(activeOtelRoot.context, async () => {
const branch = await withCopilotSpan(
TraceSpan.CopilotChatResolveBranch,
{
[TraceAttr.WorkflowId]: body.workflowId ?? '',
[TraceAttr.WorkspaceId]: body.workspaceId ?? '',
},
() =>
resolveBranch({
authenticatedUserId,
workflowId: body.workflowId,
workflowName: body.workflowName,
workspaceId: body.workspaceId,
model: body.model,
mode: body.mode,
provider: body.provider,
}),
activeOtelRoot.context
)
if (branch instanceof NextResponse) {
// Non-actionable 4xx (400 bad-request from resolveBranch): stamp
// outcome=error for dashboards but leave span status UNSET so
// error alerts don't fire on normal validation rejections.
activeOtelRoot.span.setAttribute(TraceAttr.HttpStatusCode, branch.status)
activeOtelRoot.finish('error')
return branch
}
let currentChat: ChatLoadResult['chat'] = null
let conversationHistory: unknown[] = []
let chatIsNew = false
actualChatId = body.chatId
if (body.chatId || body.createNewChat) {
const chatResult = await withCopilotSpan(
TraceSpan.CopilotChatResolveOrCreateChat,
{
[TraceAttr.ChatPreexisting]: !!body.chatId,
[TraceAttr.CopilotChatIsNew]: !!body.createNewChat,
},
() =>
resolveOrCreateChat({
chatId: body.chatId,
userId: authenticatedUserId,
...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}),
workspaceId: branch.workspaceId,
model: branch.titleModel,
type: branch.kind === 'workflow' ? 'copilot' : 'mothership',
}),
activeOtelRoot.context
)
currentChat = chatResult.chat
actualChatId = chatResult.chatId || body.chatId
chatIsNew = chatResult.isNew
conversationHistory = Array.isArray(chatResult.conversationHistory)
? chatResult.conversationHistory
: []
if (body.chatId && !currentChat) {
activeOtelRoot.span.setAttribute(TraceAttr.HttpStatusCode, 404)
activeOtelRoot.finish('error')
return NextResponse.json({ error: 'Chat not found' }, { status: 404 })
}
}
if (chatIsNew && actualChatId && body.resourceAttachments?.length) {
await persistChatResources(
actualChatId,
body.resourceAttachments.map((r) => ({
type: r.type,
id: r.id,
title: r.title ?? GENERIC_RESOURCE_TITLE[r.type],
}))
)
}
let pendingStreamWaitMs = 0
if (actualChatId) {
const lockStart = Date.now()
chatStreamLockAcquired = await acquirePendingChatStream(actualChatId, userMessageId)
pendingStreamWaitMs = Date.now() - lockStart
if (!chatStreamLockAcquired) {
const activeStreamId = await getPendingChatStreamId(actualChatId)
// 409 is in the actionable set (see `isActionableErrorStatus`);
// pass a synthesized Error so the span escalates to ERROR status
// and surfaces on pending-stream-collision dashboards.
activeOtelRoot.span.setAttribute(TraceAttr.HttpStatusCode, 409)
activeOtelRoot.finish(
'error',
new Error('A response is already in progress for this chat.')
)
return NextResponse.json(
{
error: 'A response is already in progress for this chat.',
...(activeStreamId ? { activeStreamId } : {}),
},
{ status: 409 }
)
}
}
// Stamp request-shape metadata on the root `gen_ai.agent.execute`
// span now that `branch`, attachment counts, and the pending-stream
// wait are all known. This turns dashboard slicing by
// `copilot.surface` / `copilot.mode` / `copilot.interrupted_prior_stream`
// into a simple TraceQL filter.
activeOtelRoot.setRequestShape({
branchKind: branch.kind,
mode: body.mode,
model: body.model,
provider: body.provider,
createNewChat: body.createNewChat,
prefetch: body.prefetch,
fileAttachmentsCount: body.fileAttachments?.length ?? 0,
resourceAttachmentsCount: body.resourceAttachments?.length ?? 0,
contextsCount: normalizedContexts.length,
commandsCount: body.commands?.length ?? 0,
pendingStreamWaitMs,
})
const workspaceId = branch.workspaceId
const userPermissionPromise = workspaceId
? getUserEntityPermissions(authenticatedUserId, 'workspace', workspaceId).catch((error) => {
logger.warn('Failed to load user permissions', {
error: error instanceof Error ? error.message : String(error),
workspaceId,
})
return null
})
: Promise.resolve(null)
// Wrap the pre-LLM prep work in spans so the trace waterfall shows
// where time is going between "request received" and "llm.stream
// opens". Previously these ran bare under the root and inflated the
// apparent "gap" before the model call. Each promise is its own
// span; they run concurrently under Promise.all below.
const workspaceContextPromise =
branch.kind === 'workspace'
? withCopilotSpan(
TraceSpan.CopilotChatBuildWorkspaceContext,
{ [TraceAttr.WorkspaceId]: branch.workspaceId },
() => generateWorkspaceContext(branch.workspaceId, authenticatedUserId),
activeOtelRoot.context
)
: Promise.resolve(undefined)
const agentContextsPromise = withCopilotSpan(
TraceSpan.CopilotChatResolveAgentContexts,
{
[TraceAttr.CopilotContextsCount]: normalizedContexts.length,
[TraceAttr.CopilotResourceAttachmentsCount]: body.resourceAttachments?.length ?? 0,
},
() =>
resolveAgentContexts({
contexts: normalizedContexts,
resourceAttachments: body.resourceAttachments,
userId: authenticatedUserId,
message: body.message,
workspaceId,
chatId: actualChatId,
requestId,
}),
activeOtelRoot.context
)
const persistedMessagesPromise = persistUserMessage({
chatId: actualChatId,
userMessageId,
message: body.message,
fileAttachments: body.fileAttachments,
contexts: normalizedContexts,
workspaceId,
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
parentOtelContext: activeOtelRoot.context,
})
const executionContextPromise = withCopilotSpan(
TraceSpan.CopilotChatBuildExecutionContext,
{ [TraceAttr.CopilotBranchKind]: branch.kind },
() =>
branch.buildExecutionContext({
userId: authenticatedUserId,
chatId: actualChatId,
userTimezone: body.userTimezone,
messageId: userMessageId,
}),
activeOtelRoot.context
)
const [agentContexts, userPermission, workspaceContext, persistedMessages, executionContext] =
await Promise.all([
agentContextsPromise,
userPermissionPromise,
workspaceContextPromise,
persistedMessagesPromise,
executionContextPromise,
])
if (persistedMessages) {
conversationHistory = persistedMessages.filter((message) => {
const record = message as Record<string, unknown>
return record.id !== userMessageId
})
}
// buildPayload is the last synchronous step before the outbound
// Sim → Go HTTP call. It runs per-tool schema generation (subscription
// lookup + registry iteration, cached 30s) and file upload tracking
// per attachment. Wrapping it so we can see how much of the
// "before llm.stream" gap lives here vs elsewhere.
const requestPayload = await withCopilotSpan(
TraceSpan.CopilotChatBuildPayload,
{
[TraceAttr.CopilotBranchKind]: branch.kind,
[TraceAttr.CopilotFileAttachmentsCount]: body.fileAttachments?.length ?? 0,
[TraceAttr.CopilotContextsCount]: normalizedContexts.length,
},
() =>
branch.kind === 'workflow'
? branch.buildPayload({
message: body.message,
userId: authenticatedUserId,
userMessageId,
chatId: actualChatId,
contexts: agentContexts,
fileAttachments: body.fileAttachments,
userPermission: userPermission ?? undefined,
userTimezone: body.userTimezone,
workflowId: branch.workflowId,
workflowName: branch.workflowName,
workspaceId: branch.workspaceId,
mode: branch.mode,
provider: branch.provider,
commands: body.commands,
prefetch: body.prefetch,
implicitFeedback: body.implicitFeedback,
})
: branch.buildPayload({
message: body.message,
userId: authenticatedUserId,
userMessageId,
chatId: actualChatId,
contexts: agentContexts,
fileAttachments: body.fileAttachments,
userPermission: userPermission ?? undefined,
userTimezone: body.userTimezone,
workspaceContext,
}),
activeOtelRoot.context
)
if (actualChatId) {
activeOtelRoot.span.setAttribute(TraceAttr.ChatId, actualChatId)
}
if (workspaceId) {
activeOtelRoot.span.setAttribute(TraceAttr.WorkspaceId, workspaceId)
}
const stream = createSSEStream({
requestPayload,
userId: authenticatedUserId,
streamId: userMessageId,
executionId,
runId,
goRoute: branch.goRoute,
autoExecuteTools: true,
interactive: true,
executionContext,
onComplete: buildOnComplete({
chatId: actualChatId,
currentChat,
isNewChat: conversationHistory.length === 0,
message: body.message,
titleModel: branch.titleModel,
...(branch.titleProvider ? { titleProvider: branch.titleProvider } : {}),
requestId,
workspaceId,
otelRoot: activeOtelRoot,
orchestrateOptions: {
userId: authenticatedUserId,
...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}),
...(branch.kind === 'workspace' ? { workspaceId: branch.workspaceId } : {}),
chatId: actualChatId,
userMessageId,
requestId: tracker.requestId,
workspaceId,
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
}),
onError: buildOnError({
chatId: actualChatId,
userMessageId,
requestId: tracker.requestId,
workspaceId,
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
}),
},
})
executionId,
runId,
goRoute: branch.goRoute,
autoExecuteTools: true,
interactive: true,
executionContext,
onComplete: buildOnComplete({
chatId: actualChatId,
userMessageId,
requestId,
workspaceId,
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
otelRoot,
}),
onError: buildOnError({
chatId: actualChatId,
userMessageId,
requestId,
workspaceId,
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
}),
},
})
return new Response(stream, { headers: SSE_RESPONSE_HEADERS })
// Expose the root gen_ai.agent.execute span's trace identity to
// the browser so subsequent HTTP calls (stop, abort, confirm,
// SSE reconnect) can echo it back as `traceparent` — making
// all side-channel work on this request appear as child spans
// of this same trace in Tempo instead of disconnected roots.
// W3C traceparent format: `00-<trace-id>-<parent-id>-<flags>`.
const rootCtx = activeOtelRoot.span.spanContext()
const rootTraceparent = `00-${rootCtx.traceId}-${rootCtx.spanId}-${
(rootCtx.traceFlags & 0x1) === 0x1 ? '01' : '00'
}`
return new Response(stream, {
headers: {
...SSE_RESPONSE_HEADERS,
traceparent: rootTraceparent,
},
})
}) // end otelContextApi.with
} catch (error) {
if (chatStreamLockAcquired && actualChatId && userMessageId) {
await releasePendingChatStream(actualChatId, userMessageId)
}
otelRoot?.finish('error', error)
if (error instanceof z.ZodError) {
return NextResponse.json(
@@ -754,13 +985,15 @@ export async function handleUnifiedChatPost(req: NextRequest) {
)
}
logger.error(`[${tracker.requestId}] Error handling unified chat request`, {
logger.error(`[${requestId}] Error handling unified chat request`, {
error: error instanceof Error ? error.message : 'Unknown error',
stack: error instanceof Error ? error.stack : undefined,
})
return NextResponse.json(
{ error: error instanceof Error ? error.message : 'Internal server error' },
{
error: error instanceof Error ? error.message : 'Internal server error',
},
{ status: 500 }
)
}

View File

@@ -2,6 +2,10 @@ import { db } from '@sim/db'
import { copilotChats } from '@sim/db/schema'
import { and, eq, sql } from 'drizzle-orm'
import type { PersistedMessage } from '@/lib/copilot/chat/persisted-message'
import { CopilotChatFinalizeOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { withCopilotSpan } from '@/lib/copilot/request/otel'
interface FinalizeAssistantTurnParams {
chatId: string
@@ -19,39 +23,65 @@ export async function finalizeAssistantTurn({
userMessageId,
assistantMessage,
}: FinalizeAssistantTurnParams): Promise<void> {
const [row] = await db
.select({ messages: copilotChats.messages })
.from(copilotChats)
.where(eq(copilotChats.id, chatId))
.limit(1)
return withCopilotSpan(
TraceSpan.CopilotChatFinalizeAssistantTurn,
{
[TraceAttr.DbSystem]: 'postgresql',
[TraceAttr.DbSqlTable]: 'copilot_chats',
[TraceAttr.ChatId]: chatId,
[TraceAttr.ChatUserMessageId]: userMessageId,
[TraceAttr.ChatHasAssistantMessage]: !!assistantMessage,
},
async (span) => {
const [row] = await db
.select({ messages: copilotChats.messages })
.from(copilotChats)
.where(eq(copilotChats.id, chatId))
.limit(1)
const messages: Record<string, unknown>[] = Array.isArray(row?.messages) ? row.messages : []
const userIdx = messages.findIndex((message) => message.id === userMessageId)
const alreadyHasResponse =
userIdx >= 0 &&
userIdx + 1 < messages.length &&
(messages[userIdx + 1] as Record<string, unknown>)?.role === 'assistant'
const canAppendAssistant = userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse
const updateWhere = and(
eq(copilotChats.id, chatId),
eq(copilotChats.conversationId, userMessageId)
const messages: Record<string, unknown>[] = Array.isArray(row?.messages) ? row.messages : []
span.setAttribute(TraceAttr.ChatExistingMessageCount, messages.length)
const userIdx = messages.findIndex((message) => message.id === userMessageId)
const alreadyHasResponse =
userIdx >= 0 &&
userIdx + 1 < messages.length &&
(messages[userIdx + 1] as Record<string, unknown>)?.role === 'assistant'
const canAppendAssistant =
userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse
const updateWhere = and(
eq(copilotChats.id, chatId),
eq(copilotChats.conversationId, userMessageId)
)
const baseUpdate = {
conversationId: null,
updatedAt: new Date(),
}
if (assistantMessage && canAppendAssistant) {
await db
.update(copilotChats)
.set({
...baseUpdate,
messages: sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`,
})
.where(updateWhere)
span.setAttribute(
TraceAttr.ChatFinalizeOutcome,
CopilotChatFinalizeOutcome.AppendedAssistant
)
return
}
await db.update(copilotChats).set(baseUpdate).where(updateWhere)
span.setAttribute(
TraceAttr.ChatFinalizeOutcome,
assistantMessage
? alreadyHasResponse
? 'assistant_already_persisted'
: 'stale_user_message'
: 'cleared_stream_marker_only'
)
}
)
const baseUpdate = {
conversationId: null,
updatedAt: new Date(),
}
if (assistantMessage && canAppendAssistant) {
await db
.update(copilotChats)
.set({
...baseUpdate,
messages: sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`,
})
.where(updateWhere)
return
}
await db.update(copilotChats).set(baseUpdate).where(updateWhere)
}

View File

@@ -34,9 +34,6 @@ export const STREAM_STORAGE_KEY = 'copilot_active_stream'
/** POST — send a chat message through the unified mothership chat surface. */
export const MOTHERSHIP_CHAT_API_PATH = '/api/mothership/chat'
/** Backwards-compatible alias while remaining callers migrate. */
export const COPILOT_CHAT_API_PATH = MOTHERSHIP_CHAT_API_PATH
/** POST — confirm or reject a tool call. */
export const COPILOT_CONFIRM_API_PATH = '/api/copilot/confirm'

View File

@@ -1316,6 +1316,11 @@ export const MOTHERSHIP_STREAM_V1_SCHEMA: JsonSchema = {
MothershipStreamV1Trace: {
additionalProperties: false,
properties: {
goTraceId: {
description:
'OTel trace ID from the first Go ingress. May differ from requestId when Sim assigns the canonical request identity.',
type: 'string',
},
requestId: {
type: 'string',
},

View File

@@ -66,6 +66,10 @@ export interface MothershipStreamV1StreamRef {
streamId: string
}
export interface MothershipStreamV1Trace {
/**
* OTel trace ID from the first Go ingress. May differ from requestId when Sim assigns the canonical request identity.
*/
goTraceId?: string
requestId: string
spanId?: string
}

View File

@@ -34,6 +34,7 @@ export interface RequestTraceV1SimReport {
startMs: number
streamId?: string
usage?: RequestTraceV1UsageSummary
userMessage?: string
}
/**
* This interface was referenced by `RequestTraceV1SimReport`'s JSON-Schema
@@ -112,6 +113,7 @@ export interface RequestTraceV1SimReport1 {
startMs: number
streamId?: string
usage?: RequestTraceV1UsageSummary
userMessage?: string
}
export const RequestTraceV1Outcome = {

View File

@@ -0,0 +1,372 @@
// AUTO-GENERATED FILE. DO NOT EDIT.
//
// Source: copilot/copilot/contracts/trace-attribute-values-v1.schema.json
// Regenerate with: bun run trace-attribute-values-contract:generate
//
// Canonical closed-set value vocabularies for mothership OTel
// attributes. Call sites should reference e.g.
// `CopilotRequestCancelReason.ExplicitStop` rather than the raw
// string literal, so typos become compile errors and the Go contract
// remains the single source of truth.
export const AbortBackend = {
InProcess: 'in_process',
Redis: 'redis',
} as const
export type AbortBackendKey = keyof typeof AbortBackend
export type AbortBackendValue = (typeof AbortBackend)[AbortBackendKey]
export const AbortRedisResult = {
Error: 'error',
Ok: 'ok',
Slow: 'slow',
} as const
export type AbortRedisResultKey = keyof typeof AbortRedisResult
export type AbortRedisResultValue = (typeof AbortRedisResult)[AbortRedisResultKey]
export const AuthKeyMatch = {
Enterprise: 'enterprise',
None: 'none',
User: 'user',
} as const
export type AuthKeyMatchKey = keyof typeof AuthKeyMatch
export type AuthKeyMatchValue = (typeof AuthKeyMatch)[AuthKeyMatchKey]
export const BillingAnalyticsOutcome = {
Duplicate: 'duplicate',
RetriesExhausted: 'retries_exhausted',
Success: 'success',
Unknown: 'unknown',
} as const
export type BillingAnalyticsOutcomeKey = keyof typeof BillingAnalyticsOutcome
export type BillingAnalyticsOutcomeValue =
(typeof BillingAnalyticsOutcome)[BillingAnalyticsOutcomeKey]
export const BillingFlushOutcome = {
CheckpointAlreadyClaimed: 'checkpoint_already_claimed',
CheckpointLoadFailed: 'checkpoint_load_failed',
Flushed: 'flushed',
NoCheckpoint: 'no_checkpoint',
NoSnapshot: 'no_snapshot',
SkippedUnconfigured: 'skipped_unconfigured',
} as const
export type BillingFlushOutcomeKey = keyof typeof BillingFlushOutcome
export type BillingFlushOutcomeValue = (typeof BillingFlushOutcome)[BillingFlushOutcomeKey]
export const BillingRouteOutcome = {
AuthFailed: 'auth_failed',
Billed: 'billed',
BillingDisabled: 'billing_disabled',
DuplicateIdempotencyKey: 'duplicate_idempotency_key',
InternalError: 'internal_error',
InvalidBody: 'invalid_body',
} as const
export type BillingRouteOutcomeKey = keyof typeof BillingRouteOutcome
export type BillingRouteOutcomeValue = (typeof BillingRouteOutcome)[BillingRouteOutcomeKey]
export const CopilotAbortOutcome = {
BadRequest: 'bad_request',
FallbackPersistFailed: 'fallback_persist_failed',
MissingMessageId: 'missing_message_id',
MissingStreamId: 'missing_stream_id',
NoChatId: 'no_chat_id',
Ok: 'ok',
SettleTimeout: 'settle_timeout',
Settled: 'settled',
Unauthorized: 'unauthorized',
} as const
export type CopilotAbortOutcomeKey = keyof typeof CopilotAbortOutcome
export type CopilotAbortOutcomeValue = (typeof CopilotAbortOutcome)[CopilotAbortOutcomeKey]
export const CopilotBranchKind = {
Workflow: 'workflow',
Workspace: 'workspace',
} as const
export type CopilotBranchKindKey = keyof typeof CopilotBranchKind
export type CopilotBranchKindValue = (typeof CopilotBranchKind)[CopilotBranchKindKey]
export const CopilotChatFinalizeOutcome = {
AppendedAssistant: 'appended_assistant',
AssistantAlreadyPersisted: 'assistant_already_persisted',
ClearedStreamMarkerOnly: 'cleared_stream_marker_only',
StaleUserMessage: 'stale_user_message',
} as const
export type CopilotChatFinalizeOutcomeKey = keyof typeof CopilotChatFinalizeOutcome
export type CopilotChatFinalizeOutcomeValue =
(typeof CopilotChatFinalizeOutcome)[CopilotChatFinalizeOutcomeKey]
export const CopilotChatPersistOutcome = {
Appended: 'appended',
ChatNotFound: 'chat_not_found',
} as const
export type CopilotChatPersistOutcomeKey = keyof typeof CopilotChatPersistOutcome
export type CopilotChatPersistOutcomeValue =
(typeof CopilotChatPersistOutcome)[CopilotChatPersistOutcomeKey]
export const CopilotConfirmOutcome = {
Delivered: 'delivered',
Forbidden: 'forbidden',
InternalError: 'internal_error',
RunNotFound: 'run_not_found',
ToolCallNotFound: 'tool_call_not_found',
Unauthorized: 'unauthorized',
UpdateFailed: 'update_failed',
ValidationError: 'validation_error',
} as const
export type CopilotConfirmOutcomeKey = keyof typeof CopilotConfirmOutcome
export type CopilotConfirmOutcomeValue = (typeof CopilotConfirmOutcome)[CopilotConfirmOutcomeKey]
export const CopilotFinalizeOutcome = {
Aborted: 'aborted',
Error: 'error',
Success: 'success',
} as const
export type CopilotFinalizeOutcomeKey = keyof typeof CopilotFinalizeOutcome
export type CopilotFinalizeOutcomeValue = (typeof CopilotFinalizeOutcome)[CopilotFinalizeOutcomeKey]
export const CopilotLeg = {
SimToGo: 'sim_to_go',
} as const
export type CopilotLegKey = keyof typeof CopilotLeg
export type CopilotLegValue = (typeof CopilotLeg)[CopilotLegKey]
export const CopilotOutputFileOutcome = {
Failed: 'failed',
Uploaded: 'uploaded',
} as const
export type CopilotOutputFileOutcomeKey = keyof typeof CopilotOutputFileOutcome
export type CopilotOutputFileOutcomeValue =
(typeof CopilotOutputFileOutcome)[CopilotOutputFileOutcomeKey]
export const CopilotRecoveryOutcome = {
GapDetected: 'gap_detected',
InRange: 'in_range',
} as const
export type CopilotRecoveryOutcomeKey = keyof typeof CopilotRecoveryOutcome
export type CopilotRecoveryOutcomeValue = (typeof CopilotRecoveryOutcome)[CopilotRecoveryOutcomeKey]
export const CopilotRequestCancelReason = {
ClientDisconnect: 'client_disconnect',
ExplicitStop: 'explicit_stop',
Timeout: 'timeout',
Unknown: 'unknown',
} as const
export type CopilotRequestCancelReasonKey = keyof typeof CopilotRequestCancelReason
export type CopilotRequestCancelReasonValue =
(typeof CopilotRequestCancelReason)[CopilotRequestCancelReasonKey]
export const CopilotResourcesOp = {
Delete: 'delete',
None: 'none',
Upsert: 'upsert',
} as const
export type CopilotResourcesOpKey = keyof typeof CopilotResourcesOp
export type CopilotResourcesOpValue = (typeof CopilotResourcesOp)[CopilotResourcesOpKey]
export const CopilotResumeOutcome = {
BatchDelivered: 'batch_delivered',
ClientDisconnected: 'client_disconnected',
EndedWithoutTerminal: 'ended_without_terminal',
StreamNotFound: 'stream_not_found',
TerminalDelivered: 'terminal_delivered',
} as const
export type CopilotResumeOutcomeKey = keyof typeof CopilotResumeOutcome
export type CopilotResumeOutcomeValue = (typeof CopilotResumeOutcome)[CopilotResumeOutcomeKey]
export const CopilotSseCloseReason = {
Aborted: 'aborted',
BackendError: 'backend_error',
BillingLimit: 'billing_limit',
ClosedNoTerminal: 'closed_no_terminal',
Error: 'error',
Terminal: 'terminal',
Timeout: 'timeout',
} as const
export type CopilotSseCloseReasonKey = keyof typeof CopilotSseCloseReason
export type CopilotSseCloseReasonValue = (typeof CopilotSseCloseReason)[CopilotSseCloseReasonKey]
export const CopilotStopOutcome = {
ChatNotFound: 'chat_not_found',
InternalError: 'internal_error',
NoMatchingRow: 'no_matching_row',
Persisted: 'persisted',
Unauthorized: 'unauthorized',
ValidationError: 'validation_error',
} as const
export type CopilotStopOutcomeKey = keyof typeof CopilotStopOutcome
export type CopilotStopOutcomeValue = (typeof CopilotStopOutcome)[CopilotStopOutcomeKey]
export const CopilotSurface = {
Copilot: 'copilot',
Mothership: 'mothership',
} as const
export type CopilotSurfaceKey = keyof typeof CopilotSurface
export type CopilotSurfaceValue = (typeof CopilotSurface)[CopilotSurfaceKey]
export const CopilotTableOutcome = {
EmptyContent: 'empty_content',
EmptyRows: 'empty_rows',
Failed: 'failed',
Imported: 'imported',
InvalidJsonShape: 'invalid_json_shape',
InvalidShape: 'invalid_shape',
RowLimitExceeded: 'row_limit_exceeded',
TableNotFound: 'table_not_found',
Wrote: 'wrote',
} as const
export type CopilotTableOutcomeKey = keyof typeof CopilotTableOutcome
export type CopilotTableOutcomeValue = (typeof CopilotTableOutcome)[CopilotTableOutcomeKey]
export const CopilotTableSourceFormat = {
Csv: 'csv',
Json: 'json',
} as const
export type CopilotTableSourceFormatKey = keyof typeof CopilotTableSourceFormat
export type CopilotTableSourceFormatValue =
(typeof CopilotTableSourceFormat)[CopilotTableSourceFormatKey]
export const CopilotTransport = {
Batch: 'batch',
Headless: 'headless',
Stream: 'stream',
} as const
export type CopilotTransportKey = keyof typeof CopilotTransport
export type CopilotTransportValue = (typeof CopilotTransport)[CopilotTransportKey]
export const CopilotValidateOutcome = {
InternalAuthFailed: 'internal_auth_failed',
InternalError: 'internal_error',
InvalidBody: 'invalid_body',
Ok: 'ok',
UsageExceeded: 'usage_exceeded',
UserNotFound: 'user_not_found',
} as const
export type CopilotValidateOutcomeKey = keyof typeof CopilotValidateOutcome
export type CopilotValidateOutcomeValue = (typeof CopilotValidateOutcome)[CopilotValidateOutcomeKey]
export const CopilotVfsOutcome = {
PassthroughFitsBudget: 'passthrough_fits_budget',
PassthroughNoMetadata: 'passthrough_no_metadata',
PassthroughNoSharp: 'passthrough_no_sharp',
RejectedNoMetadata: 'rejected_no_metadata',
RejectedNoSharp: 'rejected_no_sharp',
RejectedTooLargeAfterResize: 'rejected_too_large_after_resize',
Resized: 'resized',
} as const
export type CopilotVfsOutcomeKey = keyof typeof CopilotVfsOutcome
export type CopilotVfsOutcomeValue = (typeof CopilotVfsOutcome)[CopilotVfsOutcomeKey]
export const CopilotVfsReadOutcome = {
BinaryPlaceholder: 'binary_placeholder',
DocumentParsed: 'document_parsed',
DocumentTooLarge: 'document_too_large',
ImagePrepared: 'image_prepared',
ImageTooLarge: 'image_too_large',
ParseFailed: 'parse_failed',
ReadFailed: 'read_failed',
TextRead: 'text_read',
TextTooLarge: 'text_too_large',
} as const
export type CopilotVfsReadOutcomeKey = keyof typeof CopilotVfsReadOutcome
export type CopilotVfsReadOutcomeValue = (typeof CopilotVfsReadOutcome)[CopilotVfsReadOutcomeKey]
export const CopilotVfsReadPath = {
Binary: 'binary',
Image: 'image',
ParseableDocument: 'parseable_document',
Text: 'text',
} as const
export type CopilotVfsReadPathKey = keyof typeof CopilotVfsReadPath
export type CopilotVfsReadPathValue = (typeof CopilotVfsReadPath)[CopilotVfsReadPathKey]
export const LlmErrorStage = {
BuildRequest: 'build_request',
Decode: 'decode',
HttpBuild: 'http_build',
HttpStatus: 'http_status',
Invoke: 'invoke',
MarshalRequest: 'marshal_request',
StreamClose: 'stream_close',
} as const
export type LlmErrorStageKey = keyof typeof LlmErrorStage
export type LlmErrorStageValue = (typeof LlmErrorStage)[LlmErrorStageKey]
export const RateLimitOutcome = {
Allowed: 'allowed',
IncrError: 'incr_error',
Limited: 'limited',
} as const
export type RateLimitOutcomeKey = keyof typeof RateLimitOutcome
export type RateLimitOutcomeValue = (typeof RateLimitOutcome)[RateLimitOutcomeKey]
export const ToolAsyncWaiterResolution = {
ContextCancelled: 'context_cancelled',
Poll: 'poll',
Pubsub: 'pubsub',
StoredAfterClose: 'stored_after_close',
StoredBeforeSubscribe: 'stored_before_subscribe',
StoredPostSubscribe: 'stored_post_subscribe',
SubscriptionClosed: 'subscription_closed',
Unknown: 'unknown',
} as const
export type ToolAsyncWaiterResolutionKey = keyof typeof ToolAsyncWaiterResolution
export type ToolAsyncWaiterResolutionValue =
(typeof ToolAsyncWaiterResolution)[ToolAsyncWaiterResolutionKey]
export const ToolErrorKind = {
Dispatch: 'dispatch',
NotFound: 'not_found',
} as const
export type ToolErrorKindKey = keyof typeof ToolErrorKind
export type ToolErrorKindValue = (typeof ToolErrorKind)[ToolErrorKindKey]
export const ToolExecutor = {
Client: 'client',
Go: 'go',
Sim: 'sim',
} as const
export type ToolExecutorKey = keyof typeof ToolExecutor
export type ToolExecutorValue = (typeof ToolExecutor)[ToolExecutorKey]
export const ToolStoreStatus = {
Cancelled: 'cancelled',
Completed: 'completed',
Failed: 'failed',
Pending: 'pending',
} as const
export type ToolStoreStatusKey = keyof typeof ToolStoreStatus
export type ToolStoreStatusValue = (typeof ToolStoreStatus)[ToolStoreStatusKey]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,50 @@
// AUTO-GENERATED FILE. DO NOT EDIT.
//
// Source: copilot/copilot/contracts/trace-events-v1.schema.json
// Regenerate with: bun run trace-events-contract:generate
//
// Canonical mothership OTel span event names. Call sites should
// reference `TraceEvent.<Identifier>` (e.g.
// `TraceEvent.RequestCancelled`) rather than raw string literals,
// so the Go-side contract is the single source of truth and typos
// become compile errors.
export const TraceEvent = {
BedrockInvokeRetryWithoutImages: 'bedrock.invoke.retry_without_images',
CopilotOutputFileError: 'copilot.output_file.error',
CopilotSseFirstEvent: 'copilot.sse.first_event',
CopilotSseIdleGapExceeded: 'copilot.sse.idle_gap_exceeded',
CopilotSseTerminalEventReceived: 'copilot.sse.terminal_event_received',
CopilotTableError: 'copilot.table.error',
CopilotVfsParseFailed: 'copilot.vfs.parse_failed',
CopilotVfsResizeAttempt: 'copilot.vfs.resize_attempt',
CopilotVfsResizeAttemptFailed: 'copilot.vfs.resize_attempt_failed',
LlmInvokeSent: 'llm.invoke.sent',
LlmStreamFirstChunk: 'llm.stream.first_chunk',
LlmStreamOpened: 'llm.stream.opened',
PgNotifyFailed: 'pg_notify_failed',
RedisSubscribed: 'redis.subscribed',
RequestCancelled: 'request.cancelled',
} as const
export type TraceEventKey = keyof typeof TraceEvent
export type TraceEventValue = (typeof TraceEvent)[TraceEventKey]
/** Readonly sorted list of every canonical event name. */
export const TraceEventValues: readonly TraceEventValue[] = [
'bedrock.invoke.retry_without_images',
'copilot.output_file.error',
'copilot.sse.first_event',
'copilot.sse.idle_gap_exceeded',
'copilot.sse.terminal_event_received',
'copilot.table.error',
'copilot.vfs.parse_failed',
'copilot.vfs.resize_attempt',
'copilot.vfs.resize_attempt_failed',
'llm.invoke.sent',
'llm.stream.first_chunk',
'llm.stream.opened',
'pg_notify_failed',
'redis.subscribed',
'request.cancelled',
] as const

View File

@@ -0,0 +1,153 @@
// AUTO-GENERATED FILE. DO NOT EDIT.
//
// Source: copilot/copilot/contracts/trace-spans-v1.schema.json
// Regenerate with: bun run trace-spans-contract:generate
//
// Canonical mothership OTel span names. Call sites should reference
// `TraceSpan.<Identifier>` (e.g. `TraceSpan.CopilotVfsReadFile`)
// rather than raw string literals, so the Go-side contract is the
// single source of truth and typos become compile errors.
export const TraceSpan = {
AnthropicCountTokens: 'anthropic.count_tokens',
AsyncToolStoreSet: 'async_tool_store.set',
AuthRateLimitRecord: 'auth.rate_limit.record',
AuthValidateKey: 'auth.validate_key',
ChatContinueWithToolResults: 'chat.continue_with_tool_results',
ChatExplicitAbortConsume: 'chat.explicit_abort.consume',
ChatExplicitAbortFlushPausedBilling: 'chat.explicit_abort.flush_paused_billing',
ChatExplicitAbortHandle: 'chat.explicit_abort.handle',
ChatExplicitAbortMark: 'chat.explicit_abort.mark',
ChatExplicitAbortPeek: 'chat.explicit_abort.peek',
ChatGateAcquire: 'chat.gate.acquire',
ChatPersistAfterDone: 'chat.persist_after_done',
ChatSetup: 'chat.setup',
ContextReduce: 'context.reduce',
ContextSummarizeChunk: 'context.summarize_chunk',
CopilotAnalyticsFlush: 'copilot.analytics.flush',
CopilotAnalyticsSaveRequest: 'copilot.analytics.save_request',
CopilotAnalyticsUpdateBilling: 'copilot.analytics.update_billing',
CopilotAsyncRunsClaimCompleted: 'copilot.async_runs.claim_completed',
CopilotAsyncRunsCreateRunCheckpoint: 'copilot.async_runs.create_run_checkpoint',
CopilotAsyncRunsCreateRunSegment: 'copilot.async_runs.create_run_segment',
CopilotAsyncRunsGetAsyncToolCall: 'copilot.async_runs.get_async_tool_call',
CopilotAsyncRunsGetLatestForExecution: 'copilot.async_runs.get_latest_for_execution',
CopilotAsyncRunsGetLatestForStream: 'copilot.async_runs.get_latest_for_stream',
CopilotAsyncRunsGetMany: 'copilot.async_runs.get_many',
CopilotAsyncRunsGetRunSegment: 'copilot.async_runs.get_run_segment',
CopilotAsyncRunsListForRun: 'copilot.async_runs.list_for_run',
CopilotAsyncRunsMarkAsyncToolStatus: 'copilot.async_runs.mark_async_tool_status',
CopilotAsyncRunsReleaseClaim: 'copilot.async_runs.release_claim',
CopilotAsyncRunsUpdateRunStatus: 'copilot.async_runs.update_run_status',
CopilotAsyncRunsUpsertAsyncToolCall: 'copilot.async_runs.upsert_async_tool_call',
CopilotAuthValidateApiKey: 'copilot.auth.validate_api_key',
CopilotBillingUpdateCost: 'copilot.billing.update_cost',
CopilotChatAbortActiveStream: 'copilot.chat.abort_active_stream',
CopilotChatAbortStream: 'copilot.chat.abort_stream',
CopilotChatAbortWaitSettle: 'copilot.chat.abort_wait_settle',
CopilotChatAcquirePendingStreamLock: 'copilot.chat.acquire_pending_stream_lock',
CopilotChatBuildExecutionContext: 'copilot.chat.build_execution_context',
CopilotChatBuildPayload: 'copilot.chat.build_payload',
CopilotChatBuildWorkspaceContext: 'copilot.chat.build_workspace_context',
CopilotChatFinalizeAssistantTurn: 'copilot.chat.finalize_assistant_turn',
CopilotChatPersistUserMessage: 'copilot.chat.persist_user_message',
CopilotChatResolveAgentContexts: 'copilot.chat.resolve_agent_contexts',
CopilotChatResolveBranch: 'copilot.chat.resolve_branch',
CopilotChatResolveOrCreateChat: 'copilot.chat.resolve_or_create_chat',
CopilotChatStopStream: 'copilot.chat.stop_stream',
CopilotConfirmToolResult: 'copilot.confirm.tool_result',
CopilotFinalizeStream: 'copilot.finalize_stream',
CopilotRecoveryCheckReplayGap: 'copilot.recovery.check_replay_gap',
CopilotResumeRequest: 'copilot.resume.request',
CopilotSseReadLoop: 'copilot.sse.read_loop',
CopilotSubagentExecute: 'copilot.subagent.execute',
CopilotToolWaitForClientResult: 'copilot.tool.wait_for_client_result',
CopilotToolsHandleResourceSideEffects: 'copilot.tools.handle_resource_side_effects',
CopilotToolsWriteCsvToTable: 'copilot.tools.write_csv_to_table',
CopilotToolsWriteOutputFile: 'copilot.tools.write_output_file',
CopilotToolsWriteOutputTable: 'copilot.tools.write_output_table',
CopilotVfsPrepareImage: 'copilot.vfs.prepare_image',
CopilotVfsReadFile: 'copilot.vfs.read_file',
GenAiAgentExecute: 'gen_ai.agent.execute',
LlmStream: 'llm.stream',
ProviderRouterCountTokens: 'provider.router.count_tokens',
ProviderRouterRoute: 'provider.router.route',
SimUpdateCost: 'sim.update_cost',
SimValidateApiKey: 'sim.validate_api_key',
ToolAsyncWaiterWait: 'tool.async_waiter.wait',
ToolExecute: 'tool.execute',
} as const
export type TraceSpanKey = keyof typeof TraceSpan
export type TraceSpanValue = (typeof TraceSpan)[TraceSpanKey]
/** Readonly sorted list of every canonical span name. */
export const TraceSpanValues: readonly TraceSpanValue[] = [
'anthropic.count_tokens',
'async_tool_store.set',
'auth.rate_limit.record',
'auth.validate_key',
'chat.continue_with_tool_results',
'chat.explicit_abort.consume',
'chat.explicit_abort.flush_paused_billing',
'chat.explicit_abort.handle',
'chat.explicit_abort.mark',
'chat.explicit_abort.peek',
'chat.gate.acquire',
'chat.persist_after_done',
'chat.setup',
'context.reduce',
'context.summarize_chunk',
'copilot.analytics.flush',
'copilot.analytics.save_request',
'copilot.analytics.update_billing',
'copilot.async_runs.claim_completed',
'copilot.async_runs.create_run_checkpoint',
'copilot.async_runs.create_run_segment',
'copilot.async_runs.get_async_tool_call',
'copilot.async_runs.get_latest_for_execution',
'copilot.async_runs.get_latest_for_stream',
'copilot.async_runs.get_many',
'copilot.async_runs.get_run_segment',
'copilot.async_runs.list_for_run',
'copilot.async_runs.mark_async_tool_status',
'copilot.async_runs.release_claim',
'copilot.async_runs.update_run_status',
'copilot.async_runs.upsert_async_tool_call',
'copilot.auth.validate_api_key',
'copilot.billing.update_cost',
'copilot.chat.abort_active_stream',
'copilot.chat.abort_stream',
'copilot.chat.abort_wait_settle',
'copilot.chat.acquire_pending_stream_lock',
'copilot.chat.build_execution_context',
'copilot.chat.build_payload',
'copilot.chat.build_workspace_context',
'copilot.chat.finalize_assistant_turn',
'copilot.chat.persist_user_message',
'copilot.chat.resolve_agent_contexts',
'copilot.chat.resolve_branch',
'copilot.chat.resolve_or_create_chat',
'copilot.chat.stop_stream',
'copilot.confirm.tool_result',
'copilot.finalize_stream',
'copilot.recovery.check_replay_gap',
'copilot.resume.request',
'copilot.sse.read_loop',
'copilot.subagent.execute',
'copilot.tool.wait_for_client_result',
'copilot.tools.handle_resource_side_effects',
'copilot.tools.write_csv_to_table',
'copilot.tools.write_output_file',
'copilot.tools.write_output_table',
'copilot.vfs.prepare_image',
'copilot.vfs.read_file',
'gen_ai.agent.execute',
'llm.stream',
'provider.router.count_tokens',
'provider.router.route',
'sim.update_cost',
'sim.validate_api_key',
'tool.async_waiter.wait',
'tool.execute',
] as const

View File

@@ -0,0 +1,79 @@
import { trace } from '@opentelemetry/api'
import {
BasicTracerProvider,
InMemorySpanExporter,
SimpleSpanProcessor,
} from '@opentelemetry/sdk-trace-base'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
describe('fetchGo', () => {
const exporter = new InMemorySpanExporter()
const provider = new BasicTracerProvider({
spanProcessors: [new SimpleSpanProcessor(exporter)],
})
beforeEach(() => {
exporter.reset()
trace.setGlobalTracerProvider(provider)
vi.restoreAllMocks()
})
it('emits a client span with http.* attrs and injects traceparent', async () => {
const fetchMock = vi.fn().mockImplementation(async (_url: string, init: RequestInit) => {
const headers = init.headers as Record<string, string>
expect(headers.traceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/)
return new Response('ok', {
status: 200,
headers: { 'content-length': '2' },
})
})
vi.stubGlobal('fetch', fetchMock)
const res = await fetchGo('https://backend.example.com/api/copilot', {
method: 'POST',
body: 'payload',
operation: 'stream',
attributes: { 'copilot.leg': 'sim_to_go' },
})
expect(res.status).toBe(200)
const spans = exporter.getFinishedSpans()
expect(spans).toHaveLength(1)
const attrs = spans[0].attributes
expect(spans[0].name).toBe('sim → go /api/copilot')
expect(attrs['http.method']).toBe('POST')
expect(attrs['http.url']).toBe('https://backend.example.com/api/copilot')
expect(attrs['http.target']).toBe('/api/copilot')
expect(attrs['http.status_code']).toBe(200)
expect(attrs['copilot.operation']).toBe('stream')
expect(attrs['copilot.leg']).toBe('sim_to_go')
expect(typeof attrs['http.response.headers_ms']).toBe('number')
})
it('marks span as error on non-2xx response', async () => {
vi.stubGlobal('fetch', vi.fn().mockResolvedValue(new Response('nope', { status: 500 })))
const res = await fetchGo('https://backend.example.com/api/tools/resume', {
method: 'POST',
})
expect(res.status).toBe(500)
const spans = exporter.getFinishedSpans()
expect(spans).toHaveLength(1)
expect(spans[0].status.code).toBe(2)
})
it('records exceptions when fetch throws', async () => {
vi.stubGlobal('fetch', vi.fn().mockRejectedValue(new Error('network boom')))
await expect(
fetchGo('https://backend.example.com/api/traces', { method: 'POST' })
).rejects.toThrow('network boom')
const spans = exporter.getFinishedSpans()
expect(spans).toHaveLength(1)
expect(spans[0].status.code).toBe(2)
expect(spans[0].events.some((e) => e.name === 'exception')).toBe(true)
})
})

View File

@@ -0,0 +1,112 @@
import { type Context, context, SpanStatusCode, trace } from '@opentelemetry/api'
import { CopilotLeg } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { traceHeaders } from '@/lib/copilot/request/go/propagation'
import { isActionableErrorStatus, markSpanForError } from '@/lib/copilot/request/otel'
// Lazy tracer resolution: module-level `trace.getTracer()` can be evaluated
// before `instrumentation-node.ts` installs the TracerProvider under
// Next.js 16 + Turbopack dev, freezing a NoOp tracer and silently dropping
// every outbound Sim → Go span. Resolving per-call avoids the race.
const getTracer = () => trace.getTracer('sim-copilot-http', '1.0.0')
interface OutboundFetchOptions extends RequestInit {
otelContext?: Context
spanName?: string
operation?: string
attributes?: Record<string, string | number | boolean>
}
/**
* Perform an outbound Sim → Go fetch wrapped in an OTel child span so each
* call shows up as a distinct segment in Jaeger, and propagates the W3C
* traceparent so the Go-side span joins the same trace.
*
* The span captures generic attributes (method, status, duration, response
* size, error code) so any future latency investigation — not just images or
* Bedrock — has uniform metadata to work with.
*/
export async function fetchGo(url: string, options: OutboundFetchOptions = {}): Promise<Response> {
const {
otelContext,
spanName,
operation,
attributes,
headers: providedHeaders,
...init
} = options
const parsed = safeParseUrl(url)
const pathname = parsed?.pathname ?? url
const method = (init.method ?? 'GET').toUpperCase()
const parentContext = otelContext ?? context.active()
const span = getTracer().startSpan(
spanName ?? `sim → go ${pathname}`,
{
attributes: {
[TraceAttr.HttpMethod]: method,
[TraceAttr.HttpUrl]: url,
[TraceAttr.HttpTarget]: pathname,
[TraceAttr.NetPeerName]: parsed?.host ?? '',
[TraceAttr.CopilotLeg]: CopilotLeg.SimToGo,
...(operation ? { [TraceAttr.CopilotOperation]: operation } : {}),
...(attributes ?? {}),
},
},
parentContext
)
const activeContext = trace.setSpan(parentContext, span)
const propagatedHeaders = traceHeaders({}, activeContext)
const mergedHeaders = {
...(providedHeaders as Record<string, string> | undefined),
...propagatedHeaders,
}
const start = performance.now()
try {
const response = await context.with(activeContext, () =>
fetch(url, {
...init,
method,
headers: mergedHeaders,
})
)
const elapsedMs = performance.now() - start
const contentLength = Number(response.headers.get('content-length') ?? 0)
span.setAttribute(TraceAttr.HttpStatusCode, response.status)
span.setAttribute(TraceAttr.HttpResponseHeadersMs, Math.round(elapsedMs))
if (contentLength > 0) {
span.setAttribute(TraceAttr.HttpResponseContentLength, contentLength)
}
// Only mark ERROR for actionable status codes. 4xx that represent
// normal auth/validation rejections (400/401/403/404/405/422/etc.)
// stay UNSET so error dashboards don't drown in expected rejection
// paths. See `isActionableErrorStatus` in Go's telemetry middleware
// for the mirror rule (5xx + 402/409/429).
if (isActionableErrorStatus(response.status)) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: `HTTP ${response.status}`,
})
} else {
span.setStatus({ code: SpanStatusCode.OK })
}
return response
} catch (error) {
span.setAttribute(TraceAttr.HttpResponseHeadersMs, Math.round(performance.now() - start))
markSpanForError(span, error)
throw error
} finally {
span.end()
}
}
function safeParseUrl(url: string): URL | null {
try {
return new URL(url)
} catch {
return null
}
}

View File

@@ -0,0 +1,57 @@
import { type Context, context } from '@opentelemetry/api'
import { W3CTraceContextPropagator } from '@opentelemetry/core'
const propagator = new W3CTraceContextPropagator()
const headerSetter = {
set(carrier: Record<string, string>, key: string, value: string) {
carrier[key] = value
},
}
const headerGetter = {
keys(carrier: Headers): string[] {
const out: string[] = []
carrier.forEach((_, key) => {
out.push(key)
})
return out
},
get(carrier: Headers, key: string): string | undefined {
return carrier.get(key) ?? undefined
},
}
/**
* Injects W3C trace context (traceparent, tracestate) into outbound HTTP
* headers so Go-side spans join the same OTel trace tree as the calling
* Sim span.
*
* Usage: spread the result into your fetch headers:
* fetch(url, { headers: { ...myHeaders, ...traceHeaders() } })
*/
export function traceHeaders(
carrier?: Record<string, string>,
otelContext?: Context
): Record<string, string> {
const headers: Record<string, string> = carrier ?? {}
propagator.inject(otelContext ?? context.active(), headers, headerSetter)
return headers
}
/**
* Extracts W3C trace context from incoming request headers (traceparent /
* tracestate) and returns an OTel Context seeded with the upstream span.
*
* Use this at the top of inbound Sim route handlers that Go calls into
* (e.g. /api/billing/update-cost, /api/copilot/api-keys/validate) so the
* Sim-side span becomes a proper child of the Go-side client span in the
* same trace — closing the round trip in Jaeger.
*
* When no traceparent is present (e.g. calls from a browser or a client
* that hasn't been instrumented), this returns `context.active()`
* unchanged, and any span started under it becomes a new root — the same
* behavior as before this helper existed.
*/
export function contextFromRequestHeaders(headers: Headers): Context {
return propagator.extract(context.active(), headers, headerGetter)
}

View File

@@ -17,7 +17,7 @@ import {
runStreamLoop,
} from '@/lib/copilot/request/go/stream'
import { createEvent } from '@/lib/copilot/request/session'
import { TraceCollector } from '@/lib/copilot/request/trace'
import { RequestTraceV1Outcome, TraceCollector } from '@/lib/copilot/request/trace'
import type { ExecutionContext, StreamingContext } from '@/lib/copilot/request/types'
function createSseResponse(events: unknown[]): Response {
@@ -281,4 +281,55 @@ describe('copilot go stream helpers', () => {
context.errors.some((message) => message.includes('Failed to parse SSE event JSON'))
).toBe(true)
})
it('records a split canonical request id and go trace id from the stream envelope', async () => {
vi.mocked(fetch).mockResolvedValueOnce(
createSseResponse([
{
v: 1,
type: MothershipStreamV1EventType.text,
seq: 1,
ts: '2026-01-01T00:00:00.000Z',
stream: { streamId: 'stream-1', cursor: '1' },
trace: {
requestId: 'sim-request-1',
goTraceId: 'go-trace-1',
},
payload: {
channel: 'assistant',
text: 'hello',
},
},
createEvent({
streamId: 'stream-1',
cursor: '2',
seq: 2,
requestId: 'sim-request-1',
type: MothershipStreamV1EventType.complete,
payload: {
status: MothershipStreamV1CompletionStatus.complete,
},
}),
])
)
const context = createStreamingContext()
context.requestId = 'sim-request-1'
const execContext: ExecutionContext = {
userId: 'user-1',
workflowId: 'workflow-1',
}
await runStreamLoop('https://example.com/mothership/stream', {}, context, execContext, {
timeout: 1000,
})
expect(context.requestId).toBe('sim-request-1')
expect(
context.trace.build({
outcome: RequestTraceV1Outcome.success,
simRequestId: 'sim-request-1',
}).goTraceId
).toBe('go-trace-1')
})
})

View File

@@ -1,7 +1,15 @@
import { type Context, SpanStatusCode } from '@opentelemetry/api'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { ORCHESTRATION_TIMEOUT_MS } from '@/lib/copilot/constants'
import { MothershipStreamV1SpanLifecycleEvent } from '@/lib/copilot/generated/mothership-stream-v1'
import {
type MothershipStreamV1EventType,
MothershipStreamV1SpanLifecycleEvent,
} from '@/lib/copilot/generated/mothership-stream-v1'
import { CopilotSseCloseReason } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
import {
buildPreviewContentUpdate,
createFilePreviewAdapterState,
@@ -12,9 +20,11 @@ import {
import { FatalSseEventError, processSSEStream } from '@/lib/copilot/request/go/parser'
import {
handleSubagentRouting,
prePersistClientExecutableToolCall,
sseHandlers,
subAgentHandlers,
} from '@/lib/copilot/request/handlers'
import { getCopilotTracer } from '@/lib/copilot/request/otel'
import {
eventToStreamEvent,
isSubagentSpanStreamEvent,
@@ -92,6 +102,7 @@ export interface StreamLoopOptions extends OrchestratorOptions {
* Called when the Go backend's trace ID (go_trace_id) is first received via SSE.
*/
onGoTraceId?: (goTraceId: string) => void
otelContext?: Context
}
/**
@@ -111,15 +122,31 @@ export async function runStreamLoop(
const { timeout = ORCHESTRATION_TIMEOUT_MS, abortSignal } = options
const filePreviewAdapterState = createFilePreviewAdapterState()
const fetchSpan = context.trace.startSpan(
`HTTP Request → ${new URL(fetchUrl).pathname}`,
'sim.http.fetch',
{ url: fetchUrl }
)
const response = await fetch(fetchUrl, {
const pathname = new URL(fetchUrl).pathname
const requestBodyBytes = estimateBodyBytes(fetchOptions.body)
const fetchSpan = context.trace.startSpan(`HTTP Request → ${pathname}`, 'sim.http.fetch', {
url: fetchUrl,
method: fetchOptions.method ?? 'GET',
requestBodyBytes,
})
const fetchStart = performance.now()
const response = await fetchGo(fetchUrl, {
...fetchOptions,
signal: abortSignal,
otelContext: options.otelContext,
spanName: `sim → go ${pathname}`,
operation: 'stream',
attributes: {
[TraceAttr.CopilotStream]: true,
...(requestBodyBytes ? { [TraceAttr.HttpRequestContentLength]: requestBodyBytes } : {}),
},
})
const headersElapsedMs = Math.round(performance.now() - fetchStart)
fetchSpan.attributes = {
...(fetchSpan.attributes ?? {}),
status: response.status,
headersMs: headersElapsedMs,
}
if (!response.ok) {
context.trace.endSpan(fetchSpan, 'error')
@@ -141,142 +168,237 @@ export async function runStreamLoop(
}
context.trace.endSpan(fetchSpan)
const reader = response.body.getReader()
const bodySpan = context.trace.startSpan(`SSE Body → ${pathname}`, 'sim.http.stream_body', {
url: fetchUrl,
method: fetchOptions.method ?? 'GET',
})
// Aggregate counters populated inline by the reader wrapper + onEvent
// dispatcher below and flushed to both the legacy TraceCollector span
// and the OTel read-loop span when the loop terminates. Kept as plain
// JS variables (not span attrs) so incrementing them is free — we
// only pay OTel cost once at span End().
//
// Idle-gap tracking is split two ways so we can tell apart
// upstream-silent from we-were-busy:
//
// - `longestInboundGapMs`: biggest time between consecutive
// `reader.read()` calls returning bytes. Upper bound on
// "Go silent". Actually also includes Node waiting for main
// thread free, so see dispatchMs below.
// - `longestDispatchMs`: biggest time any single event handler
// took between "event received" and "returned control". Upper
// bound on "Sim was CPU-bound on a handler". If this is high
// AND inbound gap is high at the same time, it's Sim. If only
// inbound gap is high, it's upstream.
// - `totalDispatchMs`: sum of all handler times. Helps gauge
// whether handlers in aggregate ate a meaningful fraction of
// the read loop.
const counters = {
bytes: 0,
chunks: 0,
events: 0,
eventsByType: {
session: 0,
text: 0,
tool: 0,
span: 0,
resource: 0,
run: 0,
error: 0,
complete: 0,
} as Record<MothershipStreamV1EventType, number>,
firstEventMs: undefined as number | undefined,
lastChunkMs: performance.now(),
longestInboundGapMs: 0,
longestDispatchMs: 0,
totalDispatchMs: 0,
}
const bodyStart = performance.now()
let endedOn: string = CopilotSseCloseReason.Terminal
// Wrap the body's reader so we can track per-chunk bytes and the gap
// between chunks. `processSSEStream` consumes this reader exactly as
// it would the raw one — no API changes there.
const IDLE_GAP_EVENT_THRESHOLD_MS = 10000
const rawReader = response.body.getReader()
const reader: ReadableStreamDefaultReader<Uint8Array> = {
async read() {
const result = await rawReader.read()
if (!result.done && result.value) {
const now = performance.now()
const gap = now - counters.lastChunkMs
if (gap > counters.longestInboundGapMs) counters.longestInboundGapMs = gap
counters.lastChunkMs = now
counters.chunks += 1
counters.bytes += result.value.byteLength
}
return result
},
cancel: (reason) => rawReader.cancel(reason),
releaseLock: () => rawReader.releaseLock(),
get closed() {
return rawReader.closed
},
}
const decoder = new TextDecoder()
const timeoutId = setTimeout(() => {
context.errors.push('Request timed out')
context.streamComplete = true
endedOn = CopilotSseCloseReason.Timeout
reader.cancel().catch(() => {})
}, timeout)
try {
await processSSEStream(reader, decoder, abortSignal, async (raw) => {
if (abortSignal?.aborted) {
context.wasAborted = true
return true
}
const parsedEvent = parsePersistedStreamEventEnvelope(raw)
if (!parsedEvent.ok) {
const detail = [parsedEvent.message, ...(parsedEvent.errors ?? [])]
.filter(Boolean)
.join('; ')
const failureMessage = `Received invalid stream event on shared path: ${detail}`
context.errors.push(failureMessage)
logger.error('Received invalid stream event on shared path', {
reason: parsedEvent.reason,
message: parsedEvent.message,
errors: parsedEvent.errors,
})
throw new FatalSseEventError(failureMessage)
}
const envelope = parsedEvent.event
const streamEvent = eventToStreamEvent(envelope)
if (envelope.trace?.requestId) {
const prev = context.requestId
context.requestId = envelope.trace.requestId
context.trace.setGoTraceId(envelope.trace.requestId)
if (envelope.trace.requestId !== prev) {
options.onGoTraceId?.(envelope.trace.requestId)
}
}
if (shouldSkipToolCallEvent(streamEvent) || shouldSkipToolResultEvent(streamEvent)) {
return
}
await processFilePreviewStreamEvent({
streamId: envelope.stream.streamId,
streamEvent,
context,
execContext,
options,
state: filePreviewAdapterState,
})
// Track how long THIS handler invocation takes so we can tell
// apart "Go was silent" from "we were CPU-bound on a handler".
// `longestInboundGapMs` includes handler time (the next reader.read
// doesn't run until the previous handler returns), so dispatch
// time is the correction needed to isolate upstream silence.
const dispatchStart = performance.now()
try {
await options.onEvent?.(streamEvent)
} catch (error) {
logger.warn('Failed to forward stream event', {
type: streamEvent.type,
error: toError(error).message,
})
}
if (counters.events === 0) {
counters.firstEventMs = Math.round(performance.now() - bodyStart)
}
counters.events += 1
if (abortSignal?.aborted) {
context.wasAborted = true
return true
}
// Yield a macrotask so Node.js flushes the HTTP response buffer to
// the browser. Microtask yields (await Promise.resolve()) are not
// enough — the I/O layer needs a full event loop tick to write.
await new Promise<void>((resolve) => setImmediate(resolve))
const parsedEvent = parsePersistedStreamEventEnvelope(raw)
if (!parsedEvent.ok) {
const detail = [parsedEvent.message, ...(parsedEvent.errors ?? [])]
.filter(Boolean)
.join('; ')
const failureMessage = `Received invalid stream event on shared path: ${detail}`
context.errors.push(failureMessage)
logger.error('Received invalid stream event on shared path', {
reason: parsedEvent.reason,
message: parsedEvent.message,
errors: parsedEvent.errors,
})
throw new FatalSseEventError(failureMessage)
}
if (options.onBeforeDispatch?.(streamEvent, context)) {
return context.streamComplete || undefined
}
const envelope = parsedEvent.event
const streamEvent = eventToStreamEvent(envelope)
if (envelope.trace?.requestId) {
const goTraceId = envelope.trace.goTraceId || envelope.trace.requestId
context.trace.setGoTraceId(goTraceId)
options.onGoTraceId?.(goTraceId)
}
if (isSubagentSpanStreamEvent(streamEvent)) {
const spanData = parseSubagentSpanData(streamEvent.payload.data)
const toolCallId = streamEvent.scope?.parentToolCallId || spanData?.toolCallId
const subagentName = streamEvent.payload.agent
const spanEvt = streamEvent.payload.event
const isPendingPause = spanData?.pending === true
if (spanEvt === MothershipStreamV1SpanLifecycleEvent.start) {
const lastParent = context.subAgentParentStack[context.subAgentParentStack.length - 1]
const lastBlock = context.contentBlocks[context.contentBlocks.length - 1]
if (toolCallId) {
if (lastParent !== toolCallId) {
context.subAgentParentStack.push(toolCallId)
}
context.subAgentParentToolCallId = toolCallId
context.subAgentContent[toolCallId] ??= ''
context.subAgentToolCalls[toolCallId] ??= []
}
if (
subagentName &&
!(
lastParent === toolCallId &&
lastBlock?.type === 'subagent' &&
lastBlock.content === subagentName
)
) {
context.contentBlocks.push({
type: 'subagent',
content: subagentName,
timestamp: Date.now(),
})
}
// Per-type counters for the copilot.sse.read_loop span. Bound set
// (8 types) so this can never blow up into high cardinality.
if (streamEvent.type in counters.eventsByType) {
counters.eventsByType[streamEvent.type as MothershipStreamV1EventType] += 1
}
if (shouldSkipToolCallEvent(streamEvent) || shouldSkipToolResultEvent(streamEvent)) {
return
}
if (spanEvt === MothershipStreamV1SpanLifecycleEvent.end) {
if (isPendingPause) {
await processFilePreviewStreamEvent({
streamId: envelope.stream.streamId,
streamEvent,
context,
execContext,
options,
state: filePreviewAdapterState,
})
await prePersistClientExecutableToolCall(streamEvent, context)
try {
await options.onEvent?.(streamEvent)
} catch (error) {
logger.warn('Failed to forward stream event', {
type: streamEvent.type,
error: error instanceof Error ? error.message : String(error),
})
}
// Yield a macrotask so Node.js flushes the HTTP response buffer to
// the browser. Microtask yields (await Promise.resolve()) are not
// enough — the I/O layer needs a full event loop tick to write.
await new Promise<void>((resolve) => setImmediate(resolve))
if (options.onBeforeDispatch?.(streamEvent, context)) {
return context.streamComplete || undefined
}
if (isSubagentSpanStreamEvent(streamEvent)) {
const spanData = parseSubagentSpanData(streamEvent.payload.data)
const toolCallId = streamEvent.scope?.parentToolCallId || spanData?.toolCallId
const subagentName = streamEvent.payload.agent
const spanEvt = streamEvent.payload.event
const isPendingPause = spanData?.pending === true
if (spanEvt === MothershipStreamV1SpanLifecycleEvent.start) {
const lastParent = context.subAgentParentStack[context.subAgentParentStack.length - 1]
const lastBlock = context.contentBlocks[context.contentBlocks.length - 1]
if (toolCallId) {
if (lastParent !== toolCallId) {
context.subAgentParentStack.push(toolCallId)
}
context.subAgentParentToolCallId = toolCallId
context.subAgentContent[toolCallId] ??= ''
context.subAgentToolCalls[toolCallId] ??= []
}
if (
subagentName &&
!(
lastParent === toolCallId &&
lastBlock?.type === 'subagent' &&
lastBlock.content === subagentName
)
) {
context.contentBlocks.push({
type: 'subagent',
content: subagentName,
timestamp: Date.now(),
})
}
return
}
if (context.subAgentParentStack.length > 0) {
context.subAgentParentStack.pop()
} else {
logger.warn('subagent end without matching start')
if (spanEvt === MothershipStreamV1SpanLifecycleEvent.end) {
if (isPendingPause) {
return
}
if (context.subAgentParentStack.length > 0) {
context.subAgentParentStack.pop()
} else {
logger.warn('subagent end without matching start')
}
context.subAgentParentToolCallId =
context.subAgentParentStack.length > 0
? context.subAgentParentStack[context.subAgentParentStack.length - 1]
: undefined
return
}
context.subAgentParentToolCallId =
context.subAgentParentStack.length > 0
? context.subAgentParentStack[context.subAgentParentStack.length - 1]
: undefined
return
}
}
if (handleSubagentRouting(streamEvent, context)) {
const handler = subAgentHandlers[streamEvent.type]
if (handleSubagentRouting(streamEvent, context)) {
const handler = subAgentHandlers[streamEvent.type]
if (handler) {
await handler(streamEvent, context, execContext, options)
}
return context.streamComplete || undefined
}
const handler = sseHandlers[streamEvent.type]
if (handler) {
await handler(streamEvent, context, execContext, options)
}
return context.streamComplete || undefined
} finally {
const dispatchMs = performance.now() - dispatchStart
counters.totalDispatchMs += dispatchMs
if (dispatchMs > counters.longestDispatchMs) counters.longestDispatchMs = dispatchMs
}
const handler = sseHandlers[streamEvent.type]
if (handler) {
await handler(streamEvent, context, execContext, options)
}
return context.streamComplete || undefined
})
if (!context.streamComplete && !abortSignal?.aborted && !context.wasAborted) {
@@ -288,18 +410,209 @@ export async function runStreamLoop(
requestId: context.requestId,
messageId: context.messageId,
})
endedOn = CopilotSseCloseReason.ClosedNoTerminal
throw new CopilotBackendError(message, { status: 503 })
}
} catch (error) {
if (error instanceof FatalSseEventError && !context.errors.includes(error.message)) {
context.errors.push(error.message)
}
if (endedOn === CopilotSseCloseReason.Terminal) {
endedOn =
error instanceof CopilotBackendError
? CopilotSseCloseReason.BackendError
: error instanceof BillingLimitError
? CopilotSseCloseReason.BillingLimit
: CopilotSseCloseReason.Error
}
throw error
} finally {
if (abortSignal?.aborted) {
context.wasAborted = true
await reader.cancel().catch(() => {})
if (endedOn === CopilotSseCloseReason.Terminal) {
endedOn = CopilotSseCloseReason.Aborted
}
}
clearTimeout(timeoutId)
// Legacy TraceCollector span (consumed by the in-memory trace
// collector, kept for backwards compatibility with existing
// tooling). The real OTel span is stamped below.
const bodyDurationMs = Math.round(performance.now() - bodyStart)
bodySpan.attributes = {
...(bodySpan.attributes ?? {}),
eventsReceived: counters.events,
firstEventMs: counters.firstEventMs,
endedOn,
durationMs: bodyDurationMs,
}
context.trace.endSpan(
bodySpan,
endedOn === CopilotSseCloseReason.Terminal
? 'ok'
: endedOn === CopilotSseCloseReason.Aborted
? 'cancelled'
: 'error'
)
// Real OTel span for Tempo/Grafana. Stamped aggregate-only so
// there is no per-chunk OTel cost — one span per read loop with
// integer counters, plus a bounded set of events.
//
// `expectedTerminal` = "the caller considered this leg the FINAL
// leg and genuinely expected a terminal event on the wire." We
// derive it from `context.streamComplete` MINUS the tool-pause
// case: when the server emits a `run.checkpoint_pause`, its
// handler also sets `streamComplete=true` to stop the read loop
// cleanly, but no `complete` SSE event is ever sent in that
// case — that's the tool-pause protocol, not a missing terminal.
// `awaitingAsyncContinuation` is set by the same handler, so
// its presence distinguishes "tool pause, no terminal expected"
// from "caller thought stream was done but server never said so"
// (= the real disappeared-response bug class).
const expectedTerminal = context.streamComplete && !context.awaitingAsyncContinuation
stampSseReadLoopSpan(bodyStart, counters, endedOn, fetchUrl, pathname, {
idleGapEventThresholdMs: IDLE_GAP_EVENT_THRESHOLD_MS,
expectedTerminal,
})
}
}
function estimateBodyBytes(body: BodyInit | null | undefined): number {
if (!body) {
return 0
}
if (typeof body === 'string') {
return body.length
}
if (body instanceof ArrayBuffer) {
return body.byteLength
}
if (ArrayBuffer.isView(body)) {
return body.byteLength
}
return 0
}
type SseReadLoopCounters = {
bytes: number
chunks: number
events: number
eventsByType: Record<MothershipStreamV1EventType, number>
firstEventMs: number | undefined
longestInboundGapMs: number
longestDispatchMs: number
totalDispatchMs: number
}
/**
* Ship a one-shot `copilot.sse.read_loop` OTel span with the aggregate
* counters collected during the read loop. Uses `startTime` so the
* span's duration reflects the actual loop wall clock even though we
* only talk to OTel once at the end.
*
* Deliberately synchronous, no per-chunk span calls: total OTel cost
* per read loop is fixed (~10 attrs + up to 3 events), independent of
* chunk count.
*/
function stampSseReadLoopSpan(
startPerfMs: number,
counters: SseReadLoopCounters,
closeReason: string,
fetchUrl: string,
pathname: string,
opts: { idleGapEventThresholdMs: number; expectedTerminal: boolean }
): void {
// Translate performance.now() values into wall-clock Date values so
// the span's timestamps land in real time (OTel accepts both, but we
// need to pair startTime with a matching "now" for .end()).
const nowPerf = performance.now()
const nowWall = Date.now()
const startWall = nowWall - (nowPerf - startPerfMs)
const terminalEventSeen = counters.eventsByType.complete > 0
// `terminal_event_missing` is the single-attribute dashboard signal
// for the "disappeared response" bug class: the caller considered
// this leg to be the final one (`context.streamComplete === true`)
// but no `complete` event arrived on the wire. Tool-pause legs have
// expectedTerminal=false and never trip this, so dashboards can
// filter on `{ .copilot.sse.terminal_event_missing = true }` without
// false positives.
const terminalEventMissing = opts.expectedTerminal && !terminalEventSeen
const tracer = getCopilotTracer()
const span = tracer.startSpan(TraceSpan.CopilotSseReadLoop, {
startTime: startWall,
attributes: {
[TraceAttr.HttpUrl]: fetchUrl,
[TraceAttr.HttpPath]: pathname,
[TraceAttr.CopilotSseBytesReceived]: counters.bytes,
[TraceAttr.CopilotSseChunksReceived]: counters.chunks,
[TraceAttr.CopilotSseEventsReceived]: counters.events,
[TraceAttr.CopilotSseEventsSession]: counters.eventsByType.session,
[TraceAttr.CopilotSseEventsText]: counters.eventsByType.text,
[TraceAttr.CopilotSseEventsTool]: counters.eventsByType.tool,
[TraceAttr.CopilotSseEventsSpan]: counters.eventsByType.span,
[TraceAttr.CopilotSseEventsResource]: counters.eventsByType.resource,
[TraceAttr.CopilotSseEventsRun]: counters.eventsByType.run,
[TraceAttr.CopilotSseEventsError]: counters.eventsByType.error,
[TraceAttr.CopilotSseEventsComplete]: counters.eventsByType.complete,
[TraceAttr.CopilotSseLongestInboundGapMs]: Math.round(counters.longestInboundGapMs),
[TraceAttr.CopilotSseLongestDispatchMs]: Math.round(counters.longestDispatchMs),
[TraceAttr.CopilotSseTotalDispatchMs]: Math.round(counters.totalDispatchMs),
[TraceAttr.CopilotSseCloseReason]: closeReason,
[TraceAttr.CopilotSseExpectedTerminal]: opts.expectedTerminal,
[TraceAttr.CopilotSseTerminalEventSeen]: terminalEventSeen,
[TraceAttr.CopilotSseTerminalEventMissing]: terminalEventMissing,
},
})
if (counters.firstEventMs !== undefined) {
span.setAttribute(TraceAttr.CopilotSseFirstEventMs, counters.firstEventMs)
// Anchor the event to the moment the first SSE event was actually
// received (startWall + firstEventMs), not `now`, so a trace
// waterfall shows the diamond at the TTFT point — not at span end.
span.addEvent(
TraceEvent.CopilotSseFirstEvent,
{ [TraceAttr.CopilotSseFirstEventMs]: counters.firstEventMs },
startWall + counters.firstEventMs
)
}
// Fire the idle-gap event when the INBOUND gap (time between TCP
// reads returning bytes) exceeds the threshold. This is the
// "upstream was silent or Sim was CPU-bound" signal; dispatch time
// on its own doesn't warrant an event because it's within our
// control and visible on a dedicated attribute.
if (counters.longestInboundGapMs >= opts.idleGapEventThresholdMs) {
span.addEvent(TraceEvent.CopilotSseIdleGapExceeded, {
[TraceAttr.CopilotSseLongestInboundGapMs]: Math.round(counters.longestInboundGapMs),
[TraceAttr.CopilotSseLongestDispatchMs]: Math.round(counters.longestDispatchMs),
})
}
if (terminalEventSeen) {
span.addEvent(TraceEvent.CopilotSseTerminalEventReceived)
}
// Span status: only mark ERROR for real failures. User aborts and
// clean terminals stay UNSET so dashboards filtering `status=error`
// don't light up for normal cancellations. Tool-pause legs (caller
// didn't set streamComplete) are NOT errors even though they have
// no complete event.
if (terminalEventMissing) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: 'SSE read loop finished without terminal event (caller expected one)',
})
} else if (
closeReason !== CopilotSseCloseReason.Terminal &&
closeReason !== CopilotSseCloseReason.Aborted
) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: `SSE read loop ended with reason: ${closeReason}`,
})
}
span.end(nowWall)
}

View File

@@ -8,9 +8,10 @@ import { handleRunEvent } from './run'
import { handleSessionEvent } from './session'
import { handleSpanEvent } from './span'
import { handleTextEvent } from './text'
import { handleToolEvent } from './tool'
import { handleToolEvent, prePersistClientExecutableToolCall } from './tool'
import type { StreamHandler } from './types'
export { prePersistClientExecutableToolCall }
export type { StreamHandler, ToolScope } from './types'
const logger = createLogger('CopilotHandlerRouting')

View File

@@ -1,3 +1,62 @@
import {
MothershipStreamV1SpanLifecycleEvent,
MothershipStreamV1SpanPayloadKind,
} from '@/lib/copilot/generated/mothership-stream-v1'
import type { StreamHandler } from './types'
export const handleSpanEvent: StreamHandler = () => {}
/**
* Mirror Go-emitted span lifecycle events onto the Sim-side TraceCollector.
*
* Go publishes `span` events for subagent lifecycles and structured-result
* payloads. For subagents, the start/end pair is also used for UI routing
* elsewhere; here we additionally record a named span on the trace collector
* so the final RequestTraceV1 report shows the full nested structure without
* requiring the reader to inspect the raw envelope stream.
*/
export const handleSpanEvent: StreamHandler = (event, context) => {
if (event.type !== 'span') {
return
}
const payload = event.payload as {
kind?: string
event?: string
agent?: string
data?: unknown
}
const kind = payload?.kind ?? ''
const evt = payload?.event ?? ''
if (kind === MothershipStreamV1SpanPayloadKind.subagent) {
const scopeAgent =
typeof payload.agent === 'string' && payload.agent ? payload.agent : 'subagent'
if (evt === MothershipStreamV1SpanLifecycleEvent.start) {
const span = context.trace.startSpan(`subagent:${scopeAgent}`, 'go.subagent', {
agent: scopeAgent,
parentToolCallId: event.scope?.parentToolCallId,
})
context.subAgentTraceSpans ??= new Map()
context.subAgentTraceSpans.set(`${scopeAgent}:${event.scope?.parentToolCallId || ''}`, span)
} else if (evt === MothershipStreamV1SpanLifecycleEvent.end) {
const key = `${scopeAgent}:${event.scope?.parentToolCallId || ''}`
const span = context.subAgentTraceSpans?.get(key)
if (span) {
context.trace.endSpan(span, 'ok')
context.subAgentTraceSpans?.delete(key)
}
}
return
}
if (
kind === MothershipStreamV1SpanPayloadKind.structured_result ||
kind === MothershipStreamV1SpanPayloadKind.subagent_result
) {
const span = context.trace.startSpan(`${kind}:${payload.agent ?? 'main'}`, `go.${kind}`, {
agent: payload.agent,
hasData: payload.data !== undefined,
})
context.trace.endSpan(span, 'ok')
return
}
}

View File

@@ -8,6 +8,9 @@ import {
MothershipStreamV1ToolOutcome,
type MothershipStreamV1ToolResultPayload,
} from '@/lib/copilot/generated/mothership-stream-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { withCopilotSpan } from '@/lib/copilot/request/otel'
import {
isToolArgsDeltaStreamEvent,
isToolCallStreamEvent,
@@ -52,6 +55,52 @@ function applyToolDisplay(
if (displayTitle) toolCall.displayTitle = displayTitle
}
/**
* Upsert the durable `async_tool_calls` row before the authoritative tool-call
* SSE frame is forwarded to the client, so `/api/copilot/confirm` can never
* race ahead of the row that identifies the call. This is the sole
* persistence point for client-executable tools; gating mirrors the
* client-wait branch in `dispatchToolExecution`.
*/
export async function prePersistClientExecutableToolCall(
event: StreamEvent,
context: StreamingContext
): Promise<void> {
if (event.type !== 'tool') return
if (!isToolCallStreamEvent(event)) return
const data = event.payload
const isGenerating = data.status === TOOL_CALL_STATUS.generating
const isPartial = data.partial === true || isGenerating
if (isPartial) return
const ui = getToolCallUI(data)
if (!ui.clientExecutable) return
const catalogEntry = getToolEntry(data.toolName)
const isInternal = ui.internal === true || catalogEntry?.internal === true
if (isInternal) return
const delegateWorkflowRunToClient = isWorkflowToolName(data.toolName)
if (isSimExecuted(data.toolName) && !delegateWorkflowRunToClient) return
if (!context.runId) return
await upsertAsyncToolCall({
runId: context.runId,
toolCallId: data.toolCallId,
toolName: data.toolName,
args: data.arguments,
status: MothershipStreamV1AsyncToolRecordStatus.running,
}).catch((err) => {
logger.warn('Failed to pre-persist async tool row before forwarding call frame', {
toolCallId: data.toolCallId,
toolName: data.toolName,
error: err instanceof Error ? err.message : String(err),
})
})
}
/**
* Unified tool event handler for both main and subagent scopes.
*
@@ -363,35 +412,35 @@ async function dispatchToolExecution(
}
} else {
toolCall.status = 'executing'
const pendingPromise = (async () => {
await upsertAsyncToolCall({
runId: context.runId,
toolCallId,
toolName,
args,
status: MothershipStreamV1AsyncToolRecordStatus.running,
}).catch((err) => {
logger.warn(`Failed to persist async tool row for client-executable ${scopeLabel}tool`, {
const pendingPromise = withCopilotSpan(
TraceSpan.CopilotToolWaitForClientResult,
{
[TraceAttr.ToolName]: toolName,
[TraceAttr.ToolCallId]: toolCallId,
[TraceAttr.ToolTimeoutMs]: options.timeout || STREAM_TIMEOUT_MS,
...(context.runId ? { [TraceAttr.RunId]: context.runId } : {}),
},
async (span) => {
const completion = await waitForToolCompletion(
toolCallId,
toolName,
error: toError(err).message,
})
})
const completion = await waitForToolCompletion(
toolCallId,
options.timeout || STREAM_TIMEOUT_MS,
options.abortSignal
)
handleClientCompletion(toolCall, toolCallId, completion)
await emitSyntheticToolResult(toolCallId, toolCall.name, completion, options)
return (
completion ?? {
status: MothershipStreamV1ToolOutcome.error,
message: 'Tool completion missing',
data: { error: 'Tool completion missing' },
options.timeout || STREAM_TIMEOUT_MS,
options.abortSignal
)
span.setAttribute(TraceAttr.ToolCompletionReceived, completion !== undefined)
if (completion) {
span.setAttribute(TraceAttr.ToolOutcome, completion.status)
}
)
})().catch((err) => {
handleClientCompletion(toolCall, toolCallId, completion)
await emitSyntheticToolResult(toolCallId, toolCall.name, completion, options)
return (
completion ?? {
status: MothershipStreamV1ToolOutcome.error,
message: 'Tool completion missing',
data: { error: 'Tool completion missing' },
}
)
}
).catch((err) => {
logger.error(`Client-executable ${scopeLabel}tool wait failed`, {
toolCallId,
toolName,

View File

@@ -38,7 +38,7 @@ export function createRequestId(): string {
return generateId()
}
export function createShortRequestId(): string {
function createShortRequestId(): string {
return generateRequestId()
}

View File

@@ -1,3 +1,4 @@
import { SpanStatusCode, trace } from '@opentelemetry/api'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { updateRunStatus } from '@/lib/copilot/async-runs/repository'
@@ -5,30 +6,70 @@ import {
MothershipStreamV1CompletionStatus,
MothershipStreamV1EventType,
} from '@/lib/copilot/generated/mothership-stream-v1'
import {
type RequestTraceV1Outcome,
RequestTraceV1Outcome as RequestTraceV1OutcomeConst,
} from '@/lib/copilot/generated/request-trace-v1'
import { CopilotFinalizeOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import type { StreamWriter } from '@/lib/copilot/request/session'
import type { OrchestratorResult } from '@/lib/copilot/request/types'
const logger = createLogger('CopilotStreamFinalize')
const getTracer = () => trace.getTracer('sim-copilot-finalize', '1.0.0')
/**
* Single finalization path for stream results.
* Handles abort / error / success and publishes the terminal event.
* Replaces duplicated blocks in the old chat-streaming.ts.
*/
// Single finalization path. `outcome` is the caller's resolved verdict
// so we don't have to re-derive cancel vs error from raw signals.
export async function finalizeStream(
result: OrchestratorResult,
publisher: StreamWriter,
runId: string,
aborted: boolean,
outcome: RequestTraceV1Outcome,
requestId: string
): Promise<void> {
if (aborted) {
return handleAborted(result, publisher, runId, requestId)
const spanOutcome =
outcome === RequestTraceV1OutcomeConst.cancelled
? CopilotFinalizeOutcome.Aborted
: outcome === RequestTraceV1OutcomeConst.success
? CopilotFinalizeOutcome.Success
: CopilotFinalizeOutcome.Error
const span = getTracer().startSpan(TraceSpan.CopilotFinalizeStream, {
attributes: {
[TraceAttr.CopilotFinalizeOutcome]: spanOutcome,
[TraceAttr.RunId]: runId,
[TraceAttr.RequestId]: requestId,
[TraceAttr.CopilotResultToolCalls]: result.toolCalls?.length ?? 0,
[TraceAttr.CopilotResultContentBlocks]: result.contentBlocks?.length ?? 0,
[TraceAttr.CopilotResultContentLength]: result.content?.length ?? 0,
[TraceAttr.CopilotPublisherSawComplete]: publisher.sawComplete,
[TraceAttr.CopilotPublisherClientDisconnected]: publisher.clientDisconnected,
},
})
try {
if (outcome === RequestTraceV1OutcomeConst.cancelled) {
await handleAborted(result, publisher, runId, requestId)
} else if (outcome === RequestTraceV1OutcomeConst.error) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: result.error || 'orchestration failed',
})
await handleError(result, publisher, runId, requestId)
} else {
await handleSuccess(publisher, runId, requestId)
}
// Successful + cancelled paths fall through as status-unset → set
// OK so dashboards don't show "incomplete" for normal terminals.
if (outcome !== RequestTraceV1OutcomeConst.error) {
span.setStatus({ code: SpanStatusCode.OK })
}
} catch (error) {
span.recordException(error instanceof Error ? error : new Error(String(error)))
span.setStatus({ code: SpanStatusCode.ERROR, message: 'finalize threw' })
throw error
} finally {
span.end()
}
if (!result.success) {
return handleError(result, publisher, runId, requestId)
}
return handleSuccess(publisher, runId, requestId)
}
async function handleAborted(

View File

@@ -2,6 +2,9 @@
* @vitest-environment node
*/
import { propagation, trace } from '@opentelemetry/api'
import { W3CTraceContextPropagator } from '@opentelemetry/core'
import { BasicTracerProvider } from '@opentelemetry/sdk-trace-base'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1'
import type { OrchestratorResult } from '@/lib/copilot/request/types'
@@ -29,6 +32,8 @@ function createLifecycleResult(overrides?: Partial<OrchestratorResult>): Orchest
describe('runHeadlessCopilotLifecycle', () => {
beforeEach(() => {
trace.setGlobalTracerProvider(new BasicTracerProvider())
propagation.setGlobalPropagator(new W3CTraceContextPropagator())
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue(
@@ -155,6 +160,40 @@ describe('runHeadlessCopilotLifecycle', () => {
expect(body.simRequestId).toBe('workflow-request-id')
})
it('passes an OTel context to the lifecycle and trace report', async () => {
let lifecycleTraceparent = ''
runCopilotLifecycle.mockImplementationOnce(async (_payload, options) => {
const { traceHeaders } = await import('@/lib/copilot/request/go/propagation')
lifecycleTraceparent = traceHeaders({}, options.otelContext).traceparent ?? ''
return createLifecycleResult()
})
await runHeadlessCopilotLifecycle(
{
message: 'hello',
messageId: 'req-otel',
},
{
userId: 'user-1',
chatId: 'chat-1',
workflowId: 'workflow-1',
goRoute: '/api/mothership/execute',
interactive: false,
}
)
expect(lifecycleTraceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/)
const [, init] = vi.mocked(fetch).mock.calls[0] as [string, RequestInit]
const headers = init.headers as Record<string, string>
// The outbound trace report now runs inside its own OTel child span, so
// traceparent has the same trace-id as the lifecycle but a different
// span-id. Both must stay on the same trace.
const lifecycleTraceId = lifecycleTraceparent.split('-')[1]
expect(headers.traceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/)
expect(headers.traceparent.split('-')[1]).toBe(lifecycleTraceId)
expect(headers.traceparent.split('-')[2]).not.toBe(lifecycleTraceparent.split('-')[2])
})
it('reports an error trace when the lifecycle throws', async () => {
runCopilotLifecycle.mockRejectedValueOnce(new Error('kaboom'))

View File

@@ -1,13 +1,14 @@
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { generateId } from '@sim/utils/id'
import type { RequestTraceV1Outcome as RequestTraceOutcome } from '@/lib/copilot/generated/request-trace-v1'
import {
RequestTraceV1Outcome,
RequestTraceV1SpanStatus,
} from '@/lib/copilot/generated/request-trace-v1'
import { CopilotTransport } from '@/lib/copilot/generated/trace-attribute-values-v1'
import type { CopilotLifecycleOptions } from '@/lib/copilot/request/lifecycle/run'
import { runCopilotLifecycle } from '@/lib/copilot/request/lifecycle/run'
import { withCopilotOtelContext } from '@/lib/copilot/request/otel'
import { reportTrace, TraceCollector } from '@/lib/copilot/request/trace'
import type { OrchestratorResult } from '@/lib/copilot/request/types'
@@ -34,51 +35,72 @@ export async function runHeadlessCopilotLifecycle(
let result: OrchestratorResult | undefined
let outcome: RequestTraceOutcome = RequestTraceV1Outcome.error
try {
result = await runCopilotLifecycle(requestPayload, {
...options,
trace,
simRequestId,
})
outcome = options.abortSignal?.aborted
? RequestTraceV1Outcome.cancelled
: result.success
? RequestTraceV1Outcome.success
: RequestTraceV1Outcome.error
return result
} catch (error) {
outcome = options.abortSignal?.aborted
? RequestTraceV1Outcome.cancelled
: RequestTraceV1Outcome.error
throw error
} finally {
trace.endSpan(
requestSpan,
outcome === RequestTraceV1Outcome.success
? RequestTraceV1SpanStatus.ok
: outcome === RequestTraceV1Outcome.cancelled
? RequestTraceV1SpanStatus.cancelled
: RequestTraceV1SpanStatus.error
)
try {
await reportTrace(
trace.build({
outcome,
return withCopilotOtelContext(
{
requestId: simRequestId,
route: options.goRoute,
chatId: options.chatId,
workflowId: options.workflowId,
executionId: options.executionId,
runId: options.runId,
transport: CopilotTransport.Headless,
},
async (otelContext) => {
try {
result = await runCopilotLifecycle(requestPayload, {
...options,
trace,
simRequestId,
chatId: result?.chatId ?? options.chatId,
runId: options.runId,
executionId: options.executionId,
usage: result?.usage,
cost: result?.cost,
otelContext,
})
)
} catch (error) {
logger.warn('Failed to report headless trace', {
simRequestId,
chatId: result?.chatId ?? options.chatId,
error: toError(error).message,
})
outcome = options.abortSignal?.aborted
? RequestTraceV1Outcome.cancelled
: result.success
? RequestTraceV1Outcome.success
: RequestTraceV1Outcome.error
return result
} catch (error) {
outcome = options.abortSignal?.aborted
? RequestTraceV1Outcome.cancelled
: RequestTraceV1Outcome.error
throw error
} finally {
trace.endSpan(
requestSpan,
outcome === RequestTraceV1Outcome.success
? RequestTraceV1SpanStatus.ok
: outcome === RequestTraceV1Outcome.cancelled
? RequestTraceV1SpanStatus.cancelled
: RequestTraceV1SpanStatus.error
)
try {
// Best-effort extraction of the prompt from the untyped
// headless payload. Keeps parity with the streaming path
// where `message` is destructured directly.
const userMessage =
typeof requestPayload.message === 'string' ? requestPayload.message : undefined
await reportTrace(
trace.build({
outcome,
simRequestId,
chatId: result?.chatId ?? options.chatId,
runId: options.runId,
executionId: options.executionId,
userMessage,
usage: result?.usage,
cost: result?.cost,
}),
otelContext
)
} catch (error) {
logger.warn('Failed to report headless trace', {
simRequestId,
chatId: result?.chatId ?? options.chatId,
error: error instanceof Error ? error.message : String(error),
})
}
}
}
}
)
}

View File

@@ -1,3 +1,4 @@
import type { Context } from '@opentelemetry/api'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { generateId } from '@sim/utils/id'
@@ -50,6 +51,7 @@ export interface CopilotLifecycleOptions extends OrchestratorOptions {
goRoute?: string
trace?: TraceCollector
simRequestId?: string
otelContext?: Context
onGoTraceId?: (goTraceId: string) => void
executionContext?: ExecutionContext
}
@@ -112,6 +114,7 @@ export async function runCopilotLifecycle(
const context = createStreamingContext({
chatId,
requestId: lifecycleOptions.simRequestId,
executionId: resolvedExecutionId,
runId: resolvedRunId,
messageId: payloadMsgId,
@@ -123,6 +126,15 @@ export async function runCopilotLifecycle(
const result: OrchestratorResult = {
success: context.errors.length === 0 && !context.wasAborted,
// `cancelled` is an explicit discriminator so callers can tell
// "user hit Stop" (don't clear the chat row; /chat/stop owns it)
// from "backend errored" (do clear the row so the chat isn't
// stuck with a non-null `conversationId`). An error that also
// happens to fire the abort signal still counts as an error
// path, but practically that doesn't happen in the success
// branch here — if there are errors we never reach a
// wasAborted-without-errors state.
cancelled: context.wasAborted && context.errors.length === 0,
content: context.accumulatedContent,
contentBlocks: context.contentBlocks,
toolCalls: buildToolCallSummaries(context),
@@ -137,9 +149,23 @@ export async function runCopilotLifecycle(
} catch (error) {
const err = error instanceof Error ? error : new Error('Copilot orchestration failed')
logger.error('Copilot orchestration failed', { error: err.message })
await lifecycleOptions.onError?.(err)
// If the abort signal fired, this throw is a consequence of the
// cancel (publisher.publish fails once the client disconnects, a
// downstream Go read throws on ctx cancel, etc.) — NOT a real
// backend error. Don't invoke `onError`, because on the cancel
// path `/api/copilot/chat/stop` is the single DB writer and
// `onError` would race with it via `finalizeAssistantTurn`,
// clearing `conversationId` before stop's UPDATE can match (see
// `buildOnComplete` in chat/post.ts for the full rationale).
// Return `cancelled: true` so upstream classification stays
// consistent with the success-path cancel result.
const wasCancelled = lifecycleOptions.abortSignal?.aborted ?? false
if (!wasCancelled) {
await lifecycleOptions.onError?.(err)
}
return {
success: false,
cancelled: wasCancelled,
content: '',
contentBlocks: [],
toolCalls: [],
@@ -225,7 +251,6 @@ async function runCheckpointLoop(
'Content-Type': 'application/json',
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
'X-Client-Version': SIM_AGENT_VERSION,
...(options.simRequestId ? { 'X-Sim-Request-ID': options.simRequestId } : {}),
},
body: JSON.stringify(payload),
},

View File

@@ -2,7 +2,10 @@
* @vitest-environment node
*/
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { propagation, trace } from '@opentelemetry/api'
import { W3CTraceContextPropagator } from '@opentelemetry/core'
import { BasicTracerProvider } from '@opentelemetry/sdk-trace-base'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
import { MothershipStreamV1EventType } from '@/lib/copilot/generated/mothership-stream-v1'
const {
@@ -115,6 +118,19 @@ async function drainStream(stream: ReadableStream) {
describe('createSSEStream terminal error handling', () => {
beforeEach(() => {
vi.clearAllMocks()
trace.setGlobalTracerProvider(new BasicTracerProvider())
propagation.setGlobalPropagator(new W3CTraceContextPropagator())
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue(
new Response(JSON.stringify({ title: 'Test title' }), {
status: 200,
headers: {
'Content-Type': 'application/json',
},
})
)
)
resetBuffer.mockResolvedValue(undefined)
clearFilePreviewSessions.mockResolvedValue(undefined)
scheduleBufferCleanup.mockResolvedValue(undefined)
@@ -131,6 +147,10 @@ describe('createSSEStream terminal error handling', () => {
updateRunStatus.mockResolvedValue(null)
})
afterEach(() => {
vi.unstubAllGlobals()
})
it('writes a terminal error event before close when orchestration returns success=false', async () => {
runCopilotLifecycle.mockResolvedValue({
success: false,
@@ -190,4 +210,39 @@ describe('createSSEStream terminal error handling', () => {
)
expect(scheduleBufferCleanup).toHaveBeenCalledWith('stream-1')
})
it('passes an OTel context into the streaming lifecycle', async () => {
let lifecycleTraceparent = ''
runCopilotLifecycle.mockImplementation(async (_payload, options) => {
const { traceHeaders } = await import('@/lib/copilot/request/go/propagation')
lifecycleTraceparent = traceHeaders({}, options.otelContext).traceparent ?? ''
return {
success: true,
content: 'OK',
contentBlocks: [],
toolCalls: [],
}
})
const stream = createSSEStream({
requestPayload: { message: 'hello' },
userId: 'user-1',
streamId: 'stream-1',
executionId: 'exec-1',
runId: 'run-1',
currentChat: null,
isNewChat: false,
message: 'hello',
titleModel: 'gpt-5.4',
requestId: 'req-otel',
orchestrateOptions: {
goRoute: '/api/mothership',
workflowId: 'workflow-1',
},
})
await drainStream(stream)
expect(lifecycleTraceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/)
})
})

View File

@@ -1,7 +1,7 @@
import { type Context, context as otelContextApi } from '@opentelemetry/api'
import { db } from '@sim/db'
import { copilotChats } from '@sim/db/schema'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { eq } from 'drizzle-orm'
import { createRunSegment } from '@/lib/copilot/async-runs/repository'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
@@ -9,13 +9,25 @@ import {
MothershipStreamV1EventType,
MothershipStreamV1SessionKind,
} from '@/lib/copilot/generated/mothership-stream-v1'
import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1'
import {
RequestTraceV1Outcome,
RequestTraceV1SpanStatus,
} from '@/lib/copilot/generated/request-trace-v1'
import {
CopilotRequestCancelReason,
type CopilotRequestCancelReasonValue,
CopilotTransport,
} from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
import { finalizeStream } from '@/lib/copilot/request/lifecycle/finalize'
import type { CopilotLifecycleOptions } from '@/lib/copilot/request/lifecycle/run'
import { runCopilotLifecycle } from '@/lib/copilot/request/lifecycle/run'
import { type CopilotLifecycleOutcome, startCopilotOtelRoot } from '@/lib/copilot/request/otel'
import {
cleanupAbortMarker,
clearFilePreviewSessions,
isExplicitStopReason,
registerActiveStream,
releasePendingChatStream,
resetBuffer,
@@ -53,6 +65,11 @@ export interface StreamingOrchestrationParams {
requestId: string
workspaceId?: string
orchestrateOptions: Omit<CopilotLifecycleOptions, 'onEvent'>
/**
* Pre-started root; child spans bind to it and `finish()` fires on
* termination. Omit to let the stream start its own root (headless).
*/
otelRoot?: ReturnType<typeof startCopilotOtelRoot>
}
export function createSSEStream(params: StreamingOrchestrationParams): ReadableStream {
@@ -71,163 +88,334 @@ export function createSSEStream(params: StreamingOrchestrationParams): ReadableS
requestId,
workspaceId,
orchestrateOptions,
otelRoot,
} = params
// Reuse caller's root if provided; otherwise start our own.
const activeOtelRoot =
otelRoot ??
startCopilotOtelRoot({
requestId,
route: orchestrateOptions.goRoute,
chatId,
workflowId: orchestrateOptions.workflowId,
executionId,
runId,
streamId,
transport: CopilotTransport.Stream,
})
const abortController = new AbortController()
registerActiveStream(streamId, abortController)
const publisher = new StreamWriter({ streamId, chatId, requestId })
// Classify cancel: signal.reason (explicit-stop set) wins, then
// clientDisconnected, else Unknown (latent contract bug — log it).
const recordCancelled = (errorMessage?: string): CopilotRequestCancelReasonValue => {
const rawReason = abortController.signal.reason
let cancelReason: CopilotRequestCancelReasonValue
if (isExplicitStopReason(rawReason)) {
cancelReason = CopilotRequestCancelReason.ExplicitStop
} else if (publisher.clientDisconnected) {
cancelReason = CopilotRequestCancelReason.ClientDisconnect
} else {
cancelReason = CopilotRequestCancelReason.Unknown
const serializedReason =
rawReason === undefined
? 'undefined'
: rawReason instanceof Error
? `${rawReason.name}: ${rawReason.message}`
: typeof rawReason === 'string'
? rawReason
: (() => {
try {
return JSON.stringify(rawReason)
} catch {
return String(rawReason)
}
})()
// Contract violation: add the new reason to AbortReason /
// isExplicitStopReason or extend the classifier.
logger.error(`[${requestId}] Stream cancelled with unknown abort reason`, {
streamId,
chatId,
reason: serializedReason,
})
activeOtelRoot.span.setAttribute(TraceAttr.CopilotAbortUnknownReason, serializedReason)
}
activeOtelRoot.span.setAttribute(TraceAttr.CopilotRequestCancelReason, cancelReason)
activeOtelRoot.span.addEvent(TraceEvent.RequestCancelled, {
[TraceAttr.CopilotRequestCancelReason]: cancelReason,
...(errorMessage ? { [TraceAttr.ErrorMessage]: errorMessage } : {}),
})
return cancelReason
}
const collector = new TraceCollector()
return new ReadableStream({
async start(controller) {
publisher.attach(controller)
const requestSpan = collector.startSpan('Mothership Request', 'request', {
streamId,
chatId,
runId,
})
let outcome: 'success' | 'error' | 'cancelled' = 'error'
let lifecycleResult:
| {
usage?: { prompt: number; completion: number }
cost?: { input: number; output: number; total: number }
}
| undefined
await Promise.all([resetBuffer(streamId), clearFilePreviewSessions(streamId)])
if (chatId) {
createRunSegment({
id: runId,
executionId,
chatId,
userId,
workflowId: (requestPayload.workflowId as string | undefined) || null,
workspaceId,
streamId,
model: (requestPayload.model as string | undefined) || null,
provider: (requestPayload.provider as string | undefined) || null,
requestContext: { requestId },
}).catch((error) => {
logger.warn(`[${requestId}] Failed to create copilot run segment`, {
error: toError(error).message,
})
})
}
const abortPoller = startAbortPoller(streamId, abortController, { requestId })
publisher.startKeepalive()
if (chatId) {
publisher.publish({
type: MothershipStreamV1EventType.session,
payload: {
kind: MothershipStreamV1SessionKind.chat,
chatId,
},
})
}
fireTitleGeneration({
chatId,
currentChat,
isNewChat,
message,
titleModel,
titleProvider,
workspaceId,
requestId,
publisher,
})
try {
const result = await runCopilotLifecycle(requestPayload, {
...orchestrateOptions,
executionId,
runId,
trace: collector,
simRequestId: requestId,
abortSignal: abortController.signal,
onEvent: async (event) => {
await publisher.publish(event)
},
})
lifecycleResult = result
outcome = abortController.signal.aborted
? RequestTraceV1Outcome.cancelled
: result.success
? RequestTraceV1Outcome.success
: RequestTraceV1Outcome.error
await finalizeStream(result, publisher, runId, abortController.signal.aborted, requestId)
} catch (error) {
outcome = abortController.signal.aborted
? RequestTraceV1Outcome.cancelled
: RequestTraceV1Outcome.error
if (publisher.clientDisconnected) {
logger.info(`[${requestId}] Stream errored after client disconnect`, {
error: error instanceof Error ? error.message : 'Stream error',
})
}
logger.error(`[${requestId}] Unexpected orchestration error:`, error)
const syntheticResult = {
success: false as const,
content: '',
contentBlocks: [],
toolCalls: [],
error: 'An unexpected error occurred while processing the response.',
}
await finalizeStream(
syntheticResult,
publisher,
runId,
abortController.signal.aborted,
requestId
)
} finally {
collector.endSpan(
requestSpan,
outcome === RequestTraceV1Outcome.success
? 'ok'
: outcome === RequestTraceV1Outcome.cancelled
? 'cancelled'
: 'error'
)
clearInterval(abortPoller)
// Re-enter the root OTel context — ALS doesn't survive the
// Next handler → ReadableStream.start boundary.
await otelContextApi.with(activeOtelRoot.context, async () => {
const otelContext = activeOtelRoot.context
let rootOutcome: CopilotLifecycleOutcome = RequestTraceV1Outcome.error
let rootError: unknown
// `cancelReason` must be declared OUTSIDE the outer `try` so
// it remains in scope for the outer `finally` that calls
// `activeOtelRoot.finish(rootOutcome, rootError, cancelReason)`.
// `let` bindings declared inside a `try` block are NOT visible
// in the paired `finally`; referencing one there raises a
// TDZ ReferenceError, skipping `finish()`, leaving the root
// span never-ended, and making Tempo see every child as an
// orphan under a phantom parent. (Regression landed 2026-04-21.)
let cancelReason: CopilotRequestCancelReasonValue | undefined
try {
await publisher.close()
} catch (error) {
logger.warn(`[${requestId}] Failed to flush stream persistence during close`, {
error: toError(error).message,
const requestSpan = collector.startSpan('Mothership Request', 'request', {
streamId,
chatId,
runId,
})
}
unregisterActiveStream(streamId)
if (chatId) {
await releasePendingChatStream(chatId, streamId)
}
await scheduleBufferCleanup(streamId)
await scheduleFilePreviewSessionCleanup(streamId)
await cleanupAbortMarker(streamId)
let outcome: CopilotLifecycleOutcome = RequestTraceV1Outcome.error
let lifecycleResult:
| {
usage?: { prompt: number; completion: number }
cost?: { input: number; output: number; total: number }
}
| undefined
const trace = collector.build({
outcome: outcome as 'success' | 'error' | 'cancelled',
simRequestId: requestId,
streamId,
chatId,
runId,
executionId,
usage: lifecycleResult?.usage,
cost: lifecycleResult?.cost,
})
reportTrace(trace).catch(() => {})
}
await Promise.all([resetBuffer(streamId), clearFilePreviewSessions(streamId)])
if (chatId) {
createRunSegment({
id: runId,
executionId,
chatId,
userId,
workflowId: (requestPayload.workflowId as string | undefined) || null,
workspaceId,
streamId,
model: (requestPayload.model as string | undefined) || null,
provider: (requestPayload.provider as string | undefined) || null,
requestContext: { requestId },
}).catch((error) => {
logger.warn(`[${requestId}] Failed to create copilot run segment`, {
error: error instanceof Error ? error.message : String(error),
})
})
}
const abortPoller = startAbortPoller(streamId, abortController, {
requestId,
})
publisher.startKeepalive()
if (chatId) {
publisher.publish({
type: MothershipStreamV1EventType.session,
payload: {
kind: MothershipStreamV1SessionKind.chat,
chatId,
},
})
}
fireTitleGeneration({
chatId,
currentChat,
isNewChat,
message,
titleModel,
titleProvider,
workspaceId,
requestId,
publisher,
otelContext,
})
try {
const result = await runCopilotLifecycle(requestPayload, {
...orchestrateOptions,
executionId,
runId,
trace: collector,
simRequestId: requestId,
otelContext,
abortSignal: abortController.signal,
onEvent: async (event) => {
await publisher.publish(event)
},
})
lifecycleResult = result
// Outcome classification (priority order):
// 1. `result.success` → success. The orchestrator
// reporting "finished cleanly" wins over any later
// signal change. Matters for the narrow race where
// the user clicks Stop a beat after the stream
// completed.
// 2. `signal.aborted` (from `abortActiveStream` or the
// Redis-marker poller) OR `clientDisconnected` with
// a non-success result → cancelled. `recordCancelled`
// further refines into explicit_stop / client_disconnect
// / unknown via `signal.reason`.
// 3. Otherwise → error.
outcome = result.success
? RequestTraceV1Outcome.success
: abortController.signal.aborted || publisher.clientDisconnected
? RequestTraceV1Outcome.cancelled
: RequestTraceV1Outcome.error
if (outcome === RequestTraceV1Outcome.cancelled) {
cancelReason = recordCancelled()
}
// Pass the resolved outcome — not `signal.aborted` — so
// `finalizeStream` classifies the same way we did above.
// A client-disconnect-without-controller-abort still needs
// to hit `handleAborted` (not `handleError`) so the chat
// row gets `cancelled` terminal state instead of `error`.
await finalizeStream(result, publisher, runId, outcome, requestId)
} catch (error) {
// Error-path classification: if the abort signal fired or
// the client disconnected, treat the thrown error as a
// cancel (same rationale as the try-path above).
const wasCancelled = abortController.signal.aborted || publisher.clientDisconnected
outcome = wasCancelled ? RequestTraceV1Outcome.cancelled : RequestTraceV1Outcome.error
if (outcome === RequestTraceV1Outcome.cancelled) {
cancelReason = recordCancelled(error instanceof Error ? error.message : String(error))
}
if (publisher.clientDisconnected) {
logger.info(`[${requestId}] Stream errored after client disconnect`, {
error: error instanceof Error ? error.message : 'Stream error',
})
}
// Demote to warn when the throw came from a user-initiated
// cancel — it isn't an "unexpected" failure then, and the
// error-level log pollutes alerting on normal Stop presses.
const logFn = outcome === RequestTraceV1Outcome.cancelled ? logger.warn : logger.error
logFn.call(logger, `[${requestId}] Orchestration ended with ${outcome}:`, error)
const syntheticResult = {
success: false as const,
content: '',
contentBlocks: [],
toolCalls: [],
error: 'An unexpected error occurred while processing the response.',
}
await finalizeStream(syntheticResult, publisher, runId, outcome, requestId)
} finally {
collector.endSpan(
requestSpan,
outcome === RequestTraceV1Outcome.success
? RequestTraceV1SpanStatus.ok
: outcome === RequestTraceV1Outcome.cancelled
? RequestTraceV1SpanStatus.cancelled
: RequestTraceV1SpanStatus.error
)
clearInterval(abortPoller)
try {
await publisher.close()
} catch (error) {
logger.warn(`[${requestId}] Failed to flush stream persistence during close`, {
error: error instanceof Error ? error.message : String(error),
})
}
unregisterActiveStream(streamId)
if (chatId) {
await releasePendingChatStream(chatId, streamId)
}
await scheduleBufferCleanup(streamId)
await scheduleFilePreviewSessionCleanup(streamId)
await cleanupAbortMarker(streamId)
const trace = collector.build({
outcome,
simRequestId: requestId,
streamId,
chatId,
runId,
executionId,
// Pass the raw user prompt through so the Go-side trace
// ingest can stamp it onto the `request_traces.message`
// column at insert time. Avoids relying on the late
// `UpdateAnalytics` UPDATE (which silently misses many
// rows).
userMessage: message,
usage: lifecycleResult?.usage,
cost: lifecycleResult?.cost,
})
reportTrace(trace, otelContext).catch((err) => {
logger.warn(`[${requestId}] Failed to report trace`, {
error: err instanceof Error ? err.message : String(err),
})
})
rootOutcome = outcome
if (lifecycleResult?.usage) {
activeOtelRoot.span.setAttributes({
[TraceAttr.GenAiUsageInputTokens]: lifecycleResult.usage.prompt ?? 0,
[TraceAttr.GenAiUsageOutputTokens]: lifecycleResult.usage.completion ?? 0,
})
}
if (lifecycleResult?.cost) {
activeOtelRoot.span.setAttributes({
[TraceAttr.BillingCostInputUsd]: lifecycleResult.cost.input ?? 0,
[TraceAttr.BillingCostOutputUsd]: lifecycleResult.cost.output ?? 0,
[TraceAttr.BillingCostTotalUsd]: lifecycleResult.cost.total ?? 0,
})
}
}
} catch (error) {
rootOutcome = RequestTraceV1Outcome.error
rootError = error
throw error
} finally {
// `finish` is idempotent, so it's safe whether the POST
// handler started the root (and may also call finish on an
// error path before the stream ran) or we did. The cancel
// reason (if any) determines whether `cancelled` is an
// expected outcome (explicit_stop → status OK) or a real
// error (client_disconnect / unknown → status ERROR).
//
// Belt-and-suspenders: if `finish()` itself throws (e.g. an
// argument in the TDZ, a bad attribute, a regression in
// status-setting), fall back to `span.end()` directly. A
// root that never ends leaves every child orphaned in Tempo
// under a phantom parent; force-ending it keeps the trace
// shape intact even when the pretty-finalize path is
// broken. The error is logged so Loki greps surface the
// regression instead of it silently costing us trace
// fidelity for hours.
try {
activeOtelRoot.finish(rootOutcome, rootError, cancelReason)
} catch (finishError) {
logger.error(`[${requestId}] activeOtelRoot.finish threw; force-ending root span`, {
error: finishError instanceof Error ? finishError.message : String(finishError),
})
try {
activeOtelRoot.span.end()
} catch {
// Already ended or an OTel internal failure — nothing
// more we can do. The export pipe has already had its
// chance; swallow to avoid masking the original error
// path.
}
}
}
})
},
cancel() {
// The browser's SSE reader closed. Flip `clientDisconnected` so
// in-flight `publisher.publish` calls silently no-op (prevents
// enqueueing on a closed controller).
//
// Browser disconnect is NOT an abort — firing the controller
// here retroactively reclassifies in-flight successful streams
// as aborted and skips assistant persistence. Let the
// orchestrator drain naturally; publish no-ops post-disconnect.
// Explicit Stop still fires the controller via /chat/abort.
publisher.markDisconnected()
},
})
@@ -247,6 +435,7 @@ function fireTitleGeneration(params: {
workspaceId?: string
requestId: string
publisher: StreamWriter
otelContext?: Context
}): void {
const {
chatId,
@@ -258,10 +447,16 @@ function fireTitleGeneration(params: {
workspaceId,
requestId,
publisher,
otelContext,
} = params
if (!chatId || currentChat?.title || !isNewChat) return
requestChatTitle({ message, model: titleModel, provider: titleProvider })
requestChatTitle({
message,
model: titleModel,
provider: titleProvider,
otelContext,
})
.then(async (title) => {
if (!title) return
await db.update(copilotChats).set({ title }).where(eq(copilotChats.id, chatId))
@@ -270,7 +465,11 @@ function fireTitleGeneration(params: {
payload: { kind: MothershipStreamV1SessionKind.title, title },
})
if (workspaceId) {
taskPubSub?.publishStatusChanged({ workspaceId, chatId, type: 'renamed' })
taskPubSub?.publishStatusChanged({
workspaceId,
chatId,
type: 'renamed',
})
}
})
.catch((error) => {
@@ -286,20 +485,35 @@ export async function requestChatTitle(params: {
message: string
model: string
provider?: string
otelContext?: Context
}): Promise<string | null> {
const { message, model, provider } = params
const { message, model, provider, otelContext } = params
if (!message || !model) return null
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
const headers: Record<string, string> = {
'Content-Type': 'application/json',
}
if (env.COPILOT_API_KEY) {
headers['x-api-key'] = env.COPILOT_API_KEY
}
try {
const response = await fetch(`${SIM_AGENT_API_URL}/api/generate-chat-title`, {
const { fetchGo } = await import('@/lib/copilot/request/go/fetch')
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/generate-chat-title`, {
method: 'POST',
headers,
body: JSON.stringify({ message, model, ...(provider ? { provider } : {}) }),
body: JSON.stringify({
message,
model,
...(provider ? { provider } : {}),
}),
otelContext,
spanName: 'sim → go /api/generate-chat-title',
operation: 'generate_chat_title',
attributes: {
[TraceAttr.GenAiRequestModel]: model,
...(provider ? { [TraceAttr.GenAiSystem]: provider } : {}),
},
})
const payload = await response.json().catch(() => ({}))

View File

@@ -0,0 +1,588 @@
import { randomBytes } from 'crypto'
import {
type Context,
context,
ROOT_CONTEXT,
type Span,
type SpanContext,
SpanKind,
SpanStatusCode,
TraceFlags,
trace,
} from '@opentelemetry/api'
import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1'
import {
CopilotBranchKind,
CopilotRequestCancelReason,
type CopilotRequestCancelReasonValue,
CopilotSurface,
CopilotTransport,
} from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { contextFromRequestHeaders } from '@/lib/copilot/request/go/propagation'
import { isExplicitStopReason } from '@/lib/copilot/request/session/abort-reason'
// OTel GenAI content-capture env var (spec:
// https://opentelemetry.io/docs/specs/semconv/gen-ai/). Mirrored on
// the Go side so a single var controls both halves.
const GENAI_CAPTURE_ENV = 'OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT'
// OTLP backends commonly reject attrs over 64 KiB; cap proactively.
const GENAI_MESSAGE_ATTR_MAX_BYTES = 60 * 1024
function isGenAIMessageCaptureEnabled(): boolean {
const raw = (process.env[GENAI_CAPTURE_ENV] || '').toLowerCase().trim()
return raw === 'true' || raw === '1' || raw === 'yes'
}
// True iff `err` represents the user explicitly clicking Stop — the
// only cancellation we treat as expected (non-error).
//
// Policy across the codebase: an explicit user stop leaves span
// status UNSET; every other cancellation (client tab close,
// network drop, internal timeout, uncategorized abort) escalates
// to `status=error` so it shows up on error dashboards. This is
// the Sim mirror of `requestctx.IsExplicitUserStop` on the Go
// side; keep the two semantically aligned.
//
// Detection modes:
//
// - Plain-string reject value: `controller.abort('user_stop:...')`
// rejects fetch() with the reason STRING directly. Matches
// `isExplicitStopReason()` exactly (UserStop / RedisPoller).
// - DOMException / Error object: `controller.abort()` with no arg
// (or older runtimes) rejects with an AbortError whose `.cause`
// or `.message` may carry the reason. We inspect both.
//
// Anything that doesn't resolve to an explicit-stop reason (plain
// AbortError with no identifiable cause, timeout-flavored aborts,
// arbitrary Error instances) returns false and gets `status=error`.
export function isExplicitUserStopError(err: unknown): boolean {
if (err == null) return false
if (typeof err === 'string') return isExplicitStopReason(err)
if (typeof err === 'object') {
const e = err as { cause?: unknown; message?: unknown }
if (isExplicitStopReason(e.cause)) return true
if (typeof e.message === 'string' && isExplicitStopReason(e.message)) return true
}
return false
}
/**
* True iff an HTTP response status code represents a real server-side
* problem (5xx) or a user-visible condition we want to alert on
* (402 Payment Required, 409 Conflict, 429 Too Many Requests).
*
* Everything else — in particular the 4xx flood from bot probes and
* expected auth/validation rejections — stays UNSET on the span so
* dashboards don't treat normal rejections as errors.
*
* Mirrored on the Go side in
* `copilot/internal/http/middleware/telemetry.go`. Keep the two in
* sync if you change the actionable set.
*/
export function isActionableErrorStatus(code: number): boolean {
if (code >= 500) return true
return code === 402 || code === 409 || code === 429
}
// Record exception + set ERROR unless the error is an explicit user
// stop (see `isExplicitUserStopError`). Every other cancellation —
// client disconnect, internal timeout, uncategorized AbortError —
// becomes a real error that the dashboards will surface.
export function markSpanForError(span: Span, error: unknown): void {
const asError = error instanceof Error ? error : new Error(String(error))
span.recordException(asError)
if (!isExplicitUserStopError(error)) {
span.setStatus({
code: SpanStatusCode.ERROR,
message: error instanceof Error ? error.message : String(error),
})
}
}
// OTel GenAI message shape (kept minimal). Mirror changes on the Go side.
interface GenAIAgentPart {
type: 'text' | 'tool_call' | 'tool_call_response'
content?: string
id?: string
name?: string
arguments?: Record<string, unknown>
response?: string
}
interface GenAIAgentMessage {
role: 'system' | 'user' | 'assistant' | 'tool'
parts: GenAIAgentPart[]
}
function marshalAgentMessages(messages: GenAIAgentMessage[]): string | undefined {
if (messages.length === 0) return undefined
const json = JSON.stringify(messages)
if (json.length <= GENAI_MESSAGE_ATTR_MAX_BYTES) return json
// Simple tail-preserving truncation: drop from the front until we
// fit. Matches the Go side's behavior. The last message is
// usually the most diagnostic for span-level outcome.
let remaining = messages.slice()
while (remaining.length > 1) {
remaining = remaining.slice(1)
const candidate = JSON.stringify(remaining)
if (candidate.length <= GENAI_MESSAGE_ATTR_MAX_BYTES) return candidate
}
// Single message still over cap — truncate the text part in place
// with a marker so the partial content is still readable.
const only = remaining[0]
for (const part of only.parts) {
if (part.type === 'text' && part.content) {
const headroom = GENAI_MESSAGE_ATTR_MAX_BYTES - 1024
if (part.content.length > headroom) {
part.content = `${part.content.slice(0, headroom)}\n\n[truncated: capture cap ${GENAI_MESSAGE_ATTR_MAX_BYTES} bytes]`
}
}
}
const final = JSON.stringify([only])
return final.length <= GENAI_MESSAGE_ATTR_MAX_BYTES ? final : undefined
}
interface CopilotAgentInputMessages {
userMessage?: string
systemPrompt?: string
}
interface CopilotAgentOutputMessages {
assistantText?: string
toolCalls?: Array<{
id: string
name: string
arguments?: Record<string, unknown>
}>
}
function setAgentInputMessages(span: Span, input: CopilotAgentInputMessages): void {
if (!isGenAIMessageCaptureEnabled()) return
const messages: GenAIAgentMessage[] = []
if (input.systemPrompt) {
messages.push({
role: 'system',
parts: [{ type: 'text', content: input.systemPrompt }],
})
}
if (input.userMessage) {
messages.push({
role: 'user',
parts: [{ type: 'text', content: input.userMessage }],
})
}
const serialized = marshalAgentMessages(messages)
if (serialized) {
span.setAttribute(TraceAttr.GenAiInputMessages, serialized)
}
}
function setAgentOutputMessages(span: Span, output: CopilotAgentOutputMessages): void {
if (!isGenAIMessageCaptureEnabled()) return
const parts: GenAIAgentPart[] = []
if (output.assistantText) {
parts.push({ type: 'text', content: output.assistantText })
}
for (const tc of output.toolCalls ?? []) {
parts.push({
type: 'tool_call',
id: tc.id,
name: tc.name,
...(tc.arguments ? { arguments: tc.arguments } : {}),
})
}
if (parts.length === 0) return
const serialized = marshalAgentMessages([{ role: 'assistant', parts }])
if (serialized) {
span.setAttribute(TraceAttr.GenAiOutputMessages, serialized)
}
}
export type CopilotLifecycleOutcome =
(typeof RequestTraceV1Outcome)[keyof typeof RequestTraceV1Outcome]
// Lazy tracer — Next 16/Turbopack can evaluate modules before NodeSDK
// installs the real TracerProvider; resolving per call avoids a
// cached NoOpTracer silently disabling OTel.
export function getCopilotTracer() {
return trace.getTracer('sim-ai-platform', '1.0.0')
}
function getTracer() {
return getCopilotTracer()
}
// Wrap an inbound handler that Go called into so its span parents
// under the Go-side trace (via `traceparent`).
export async function withIncomingGoSpan<T>(
headers: Headers,
spanName: string,
attributes: Record<string, string | number | boolean> | undefined,
fn: (span: Span) => Promise<T>
): Promise<T> {
const parentContext = contextFromRequestHeaders(headers)
const tracer = getTracer()
return tracer.startActiveSpan(
spanName,
{ kind: SpanKind.SERVER, attributes },
parentContext,
async (span) => {
try {
const result = await fn(span)
span.setStatus({ code: SpanStatusCode.OK })
return result
} catch (error) {
markSpanForError(span, error)
throw error
} finally {
span.end()
}
}
)
}
// Wrap a copilot-lifecycle op in an OTel span. Pass `parentContext`
// explicitly when AsyncLocalStorage-tracked context can be dropped
// across multiple awaits (otherwise the child falls back to a framework
// span that the sampler drops).
export async function withCopilotSpan<T>(
spanName: string,
attributes: Record<string, string | number | boolean> | undefined,
fn: (span: Span) => Promise<T>,
parentContext?: Context
): Promise<T> {
const tracer = getTracer()
const runBody = async (span: Span) => {
try {
const result = await fn(span)
span.setStatus({ code: SpanStatusCode.OK })
return result
} catch (error) {
markSpanForError(span, error)
throw error
} finally {
span.end()
}
}
if (parentContext) {
return tracer.startActiveSpan(spanName, { attributes }, parentContext, runBody)
}
return tracer.startActiveSpan(spanName, { attributes }, runBody)
}
// External OTel `tool.execute` span for Sim-side tool work (the Go
// side's `tool.execute` is just the enqueue, stays ~0ms).
export async function withCopilotToolSpan<T>(
input: {
toolName: string
toolCallId: string
runId?: string
chatId?: string
argsBytes?: number
argsPreview?: string
},
fn: (span: Span) => Promise<T>
): Promise<T> {
const tracer = getTracer()
return tracer.startActiveSpan(
`tool.execute ${input.toolName}`,
{
attributes: {
[TraceAttr.ToolName]: input.toolName,
[TraceAttr.ToolCallId]: input.toolCallId,
[TraceAttr.ToolExecutor]: 'sim',
...(input.runId ? { [TraceAttr.RunId]: input.runId } : {}),
...(input.chatId ? { [TraceAttr.ChatId]: input.chatId } : {}),
...(typeof input.argsBytes === 'number'
? { [TraceAttr.ToolArgsBytes]: input.argsBytes }
: {}),
// argsPreview can leak pasted credentials in tool args; gate
// behind the GenAI content-capture env var.
...(input.argsPreview && isGenAIMessageCaptureEnabled()
? { [TraceAttr.ToolArgsPreview]: input.argsPreview }
: {}),
},
},
async (span) => {
try {
const result = await fn(span)
span.setStatus({ code: SpanStatusCode.OK })
return result
} catch (error) {
markSpanForError(span, error)
throw error
} finally {
span.end()
}
}
)
}
function isValidSpanContext(spanContext: SpanContext): boolean {
return (
/^[0-9a-f]{32}$/.test(spanContext.traceId) &&
spanContext.traceId !== '00000000000000000000000000000000' &&
/^[0-9a-f]{16}$/.test(spanContext.spanId) &&
spanContext.spanId !== '0000000000000000'
)
}
function createFallbackSpanContext(): SpanContext {
return {
traceId: randomBytes(16).toString('hex'),
spanId: randomBytes(8).toString('hex'),
traceFlags: TraceFlags.SAMPLED,
}
}
interface CopilotOtelScope {
// Leave unset on the chat POST — startCopilotOtelRoot will derive
// from the root span's OTel trace ID (same value Grafana uses).
// Set explicitly on paths that need a non-trace-derived ID (headless,
// resume taking an ID from persisted state).
requestId?: string
route?: string
chatId?: string
workflowId?: string
executionId?: string
runId?: string
streamId?: string
transport: 'headless' | 'stream'
userMessagePreview?: string
}
// Dashboard-column width; long enough for triage disambiguation.
const USER_MESSAGE_PREVIEW_MAX_CHARS = 500
function buildAgentSpanAttributes(
scope: CopilotOtelScope & { requestId: string }
): Record<string, string | number | boolean> {
// Gated behind the same env var as full GenAI message capture — a
// 500-char preview is still user prompt content.
const preview = isGenAIMessageCaptureEnabled()
? truncateUserMessagePreview(scope.userMessagePreview)
: undefined
return {
[TraceAttr.GenAiAgentName]: 'mothership',
[TraceAttr.GenAiAgentId]:
scope.transport === CopilotTransport.Stream ? 'mothership-stream' : 'mothership-headless',
[TraceAttr.GenAiOperationName]:
scope.transport === CopilotTransport.Stream ? 'chat' : 'invoke_agent',
[TraceAttr.RequestId]: scope.requestId,
[TraceAttr.SimRequestId]: scope.requestId,
[TraceAttr.CopilotRoute]: scope.route ?? '',
[TraceAttr.CopilotTransport]: scope.transport,
...(scope.chatId ? { [TraceAttr.ChatId]: scope.chatId } : {}),
...(scope.workflowId ? { [TraceAttr.WorkflowId]: scope.workflowId } : {}),
...(scope.executionId ? { [TraceAttr.CopilotExecutionId]: scope.executionId } : {}),
...(scope.runId ? { [TraceAttr.RunId]: scope.runId } : {}),
...(scope.streamId ? { [TraceAttr.StreamId]: scope.streamId } : {}),
...(preview ? { [TraceAttr.CopilotUserMessagePreview]: preview } : {}),
}
}
function truncateUserMessagePreview(raw: unknown): string | undefined {
if (typeof raw !== 'string') return undefined
const collapsed = raw.replace(/\s+/g, ' ').trim()
if (!collapsed) return undefined
if (collapsed.length <= USER_MESSAGE_PREVIEW_MAX_CHARS) return collapsed
return `${collapsed.slice(0, USER_MESSAGE_PREVIEW_MAX_CHARS - 1)}`
}
// Request-shape metadata known only after branch resolution. Stamped
// on the root span for dashboard filtering.
interface CopilotOtelRequestShape {
branchKind?: 'workflow' | 'workspace'
mode?: string
model?: string
provider?: string
createNewChat?: boolean
prefetch?: boolean
fileAttachmentsCount?: number
resourceAttachmentsCount?: number
contextsCount?: number
commandsCount?: number
pendingStreamWaitMs?: number
interruptedPriorStream?: boolean
}
interface CopilotOtelRoot {
span: Span
context: Context
/**
* Finalize the root span. `cancelReason`, when provided, decides
* whether a `cancelled` outcome leaves span status UNSET (for
* explicit user stops — our single non-error cancel class) or
* escalates to ERROR (client disconnect, unknown, etc.). Omit it
* for non-cancellation outcomes.
*/
finish: (
outcome?: CopilotLifecycleOutcome,
error?: unknown,
cancelReason?: CopilotRequestCancelReasonValue
) => void
setInputMessages: (input: CopilotAgentInputMessages) => void
setOutputMessages: (output: CopilotAgentOutputMessages) => void
setRequestShape: (shape: CopilotOtelRequestShape) => void
}
export function startCopilotOtelRoot(
scope: CopilotOtelScope
): CopilotOtelRoot & { requestId: string } {
// TRUE root — don't inherit from Next's HTTP handler span (the
// sampler drops those; we'd orphan the whole mothership tree).
const parentContext = ROOT_CONTEXT
// Start with a placeholder `requestId`, then overwrite using the
// span's actual trace ID so the UI copy-button value pastes
// directly into Grafana.
const span = getTracer().startSpan(
TraceSpan.GenAiAgentExecute,
{ attributes: buildAgentSpanAttributes({ ...scope, requestId: '' }) },
parentContext
)
const carrierSpan = isValidSpanContext(span.spanContext())
? span
: trace.wrapSpanContext(createFallbackSpanContext())
const spanContext = carrierSpan.spanContext()
const requestId =
scope.requestId ??
(spanContext.traceId && spanContext.traceId.length === 32 ? spanContext.traceId : '')
span.setAttribute(TraceAttr.RequestId, requestId)
span.setAttribute(TraceAttr.SimRequestId, requestId)
const rootContext = trace.setSpan(parentContext, carrierSpan)
let finished = false
const finish: CopilotOtelRoot['finish'] = (outcome, error, cancelReason) => {
if (finished) return
finished = true
const resolvedOutcome = outcome ?? RequestTraceV1Outcome.success
span.setAttribute(TraceAttr.CopilotRequestOutcome, resolvedOutcome)
// Policy: `explicit_stop` is the ONLY cancellation we treat as
// expected (status unset → dashboards see it as OK). Everything
// else — client_disconnect, unknown reason, bug-case cancels —
// escalates to ERROR so it shows up on error panels.
const isExplicitStop = cancelReason === CopilotRequestCancelReason.ExplicitStop
if (error) {
markSpanForError(span, error)
if (isExplicitStop || isExplicitUserStopError(error)) {
span.setStatus({ code: SpanStatusCode.OK })
}
} else if (resolvedOutcome === RequestTraceV1Outcome.success) {
span.setStatus({ code: SpanStatusCode.OK })
} else if (resolvedOutcome === RequestTraceV1Outcome.cancelled) {
if (isExplicitStop) {
span.setStatus({ code: SpanStatusCode.OK })
} else {
span.setStatus({
code: SpanStatusCode.ERROR,
message: `cancelled: ${cancelReason ?? 'unknown'}`,
})
}
}
span.end()
}
return {
span,
context: rootContext,
requestId,
finish,
setInputMessages: (input) => setAgentInputMessages(span, input),
setOutputMessages: (output) => setAgentOutputMessages(span, output),
setRequestShape: (shape) => applyRequestShape(span, shape),
}
}
// Pending-stream-lock wait above this = inferred send-to-interrupt.
const INTERRUPT_WAIT_MS_THRESHOLD = 50
function applyRequestShape(span: Span, shape: CopilotOtelRequestShape): void {
if (shape.branchKind) {
span.setAttribute(TraceAttr.CopilotBranchKind, shape.branchKind)
span.setAttribute(
TraceAttr.CopilotSurface,
shape.branchKind === CopilotBranchKind.Workflow
? CopilotSurface.Copilot
: CopilotSurface.Mothership
)
}
if (shape.mode) span.setAttribute(TraceAttr.CopilotMode, shape.mode)
if (shape.model) span.setAttribute(TraceAttr.GenAiRequestModel, shape.model)
if (shape.provider) span.setAttribute(TraceAttr.GenAiSystem, shape.provider)
if (typeof shape.createNewChat === 'boolean') {
span.setAttribute(TraceAttr.CopilotChatIsNew, shape.createNewChat)
}
if (typeof shape.prefetch === 'boolean') {
span.setAttribute(TraceAttr.CopilotPrefetch, shape.prefetch)
}
if (typeof shape.fileAttachmentsCount === 'number') {
span.setAttribute(TraceAttr.CopilotFileAttachmentsCount, shape.fileAttachmentsCount)
}
if (typeof shape.resourceAttachmentsCount === 'number') {
span.setAttribute(TraceAttr.CopilotResourceAttachmentsCount, shape.resourceAttachmentsCount)
}
if (typeof shape.contextsCount === 'number') {
span.setAttribute(TraceAttr.CopilotContextsCount, shape.contextsCount)
}
if (typeof shape.commandsCount === 'number') {
span.setAttribute(TraceAttr.CopilotCommandsCount, shape.commandsCount)
}
if (typeof shape.pendingStreamWaitMs === 'number') {
span.setAttribute(TraceAttr.CopilotPendingStreamWaitMs, shape.pendingStreamWaitMs)
const interrupted =
typeof shape.interruptedPriorStream === 'boolean'
? shape.interruptedPriorStream
: shape.pendingStreamWaitMs > INTERRUPT_WAIT_MS_THRESHOLD
span.setAttribute(TraceAttr.CopilotInterruptedPriorStream, interrupted)
} else if (typeof shape.interruptedPriorStream === 'boolean') {
span.setAttribute(TraceAttr.CopilotInterruptedPriorStream, shape.interruptedPriorStream)
}
}
export async function withCopilotOtelContext<T>(
scope: CopilotOtelScope,
fn: (otelContext: Context) => Promise<T>
): Promise<T> {
const parentContext = context.active()
// Same trace-id-derives-requestId dance as startCopilotOtelRoot — see
// that function for the rationale. Stamp a placeholder, read the real
// trace ID off the span, then overwrite.
const span = getTracer().startSpan(
TraceSpan.GenAiAgentExecute,
{ attributes: buildAgentSpanAttributes({ ...scope, requestId: scope.requestId ?? '' }) },
parentContext
)
const carrierSpan = isValidSpanContext(span.spanContext())
? span
: trace.wrapSpanContext(createFallbackSpanContext())
const spanContext = carrierSpan.spanContext()
const resolvedRequestId =
scope.requestId ??
(spanContext.traceId && spanContext.traceId.length === 32 ? spanContext.traceId : '')
if (resolvedRequestId) {
span.setAttribute(TraceAttr.RequestId, resolvedRequestId)
span.setAttribute(TraceAttr.SimRequestId, resolvedRequestId)
}
const otelContext = trace.setSpan(parentContext, carrierSpan)
let terminalStatusSet = false
try {
const result = await context.with(otelContext, () => fn(otelContext))
span.setStatus({ code: SpanStatusCode.OK })
terminalStatusSet = true
return result
} catch (error) {
markSpanForError(span, error)
terminalStatusSet = true
throw error
} finally {
if (!terminalStatusSet) {
// Extremely defensive: should be unreachable, but avoids leaking
// an unset span status if some future refactor breaks both arms.
span.setStatus({ code: SpanStatusCode.OK })
}
span.end()
}
}

View File

@@ -0,0 +1,42 @@
/**
* Abort-reason vocabulary for Sim-originated cancellations.
*
* This is deliberately a zero-dependency module (no OTel, no logger,
* no DB) so it can be imported from both the telemetry layer
* (`request/otel.ts`) and the abort layer (`request/session/abort.ts`)
* without creating a circular dependency. The longer prose lives in
* `abort.ts`; anything here is the raw classification vocabulary
* consumed by span-status / finalizer code.
*/
/**
* Reason strings passed to `AbortController.abort(reason)` for every
* Sim-originated cancel path.
*/
export const AbortReason = {
/** Same-process stop: browser→Sim→abortActiveStream. */
UserStop: 'user_stop:abortActiveStream',
/**
* Cross-process stop: the Sim node that holds the SSE didn't
* receive the Stop HTTP call, but it polled the Redis abort marker
* that the node that DID receive it wrote, and aborts on the poll.
*/
RedisPoller: 'redis_abort_marker:poller',
/** Internal timeout on the outbound explicit-abort fetch to Go. */
ExplicitAbortFetchTimeout: 'timeout:go_explicit_abort_fetch',
} as const
export type AbortReasonValue = (typeof AbortReason)[keyof typeof AbortReason]
/**
* True iff `reason` indicates the user explicitly triggered the abort
* (as opposed to an implicit client disconnect or server timeout).
* Treated as a small closed vocabulary — any string not in
* `AbortReason` is presumed non-explicit. This is the canonical
* "should I treat this cancellation as expected?" predicate: span
* status-setters consult it to suppress ERROR only for user-initiated
* stops, mirroring `requestctx.IsExplicitUserStop` on the Go side.
*/
export function isExplicitStopReason(reason: unknown): boolean {
return reason === AbortReason.UserStop || reason === AbortReason.RedisPoller
}

View File

@@ -1,7 +1,12 @@
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { sleep } from '@sim/utils/helpers'
import { AbortBackend } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { withCopilotSpan } from '@/lib/copilot/request/otel'
import { acquireLock, getRedisClient, releaseLock } from '@/lib/core/config/redis'
import { AbortReason } from './abort-reason'
import { clearAbortMarker, hasAbortMarker, writeAbortMarker } from './buffer'
const logger = createLogger('SessionAbort')
@@ -122,74 +127,136 @@ export async function acquirePendingChatStream(
streamId: string,
timeoutMs = 5_000
): Promise<boolean> {
const redis = getRedisClient()
if (redis) {
const deadline = Date.now() + timeoutMs
for (;;) {
try {
const acquired = await acquireLock(
getChatStreamLockKey(chatId),
streamId,
CHAT_STREAM_LOCK_TTL_SECONDS
)
if (acquired) {
// Span records wall time spent waiting for the per-chat stream lock.
// Typical case: sub-10ms uncontested acquire. Worst case: up to
// `timeoutMs` spent polling while a prior stream finishes. Previously
// this time looked like "unexplained gap before llm.stream".
return withCopilotSpan(
TraceSpan.CopilotChatAcquirePendingStreamLock,
{
[TraceAttr.ChatId]: chatId,
[TraceAttr.StreamId]: streamId,
[TraceAttr.LockTimeoutMs]: timeoutMs,
},
async (span) => {
const redis = getRedisClient()
span.setAttribute(TraceAttr.LockBackend, redis ? AbortBackend.Redis : AbortBackend.InProcess)
if (redis) {
const deadline = Date.now() + timeoutMs
for (;;) {
try {
const acquired = await acquireLock(
getChatStreamLockKey(chatId),
streamId,
CHAT_STREAM_LOCK_TTL_SECONDS
)
if (acquired) {
registerPendingChatStream(chatId, streamId)
span.setAttribute(TraceAttr.LockAcquired, true)
return true
}
if (!pendingChatStreams.has(chatId)) {
const ownerStreamId = await redis.get(getChatStreamLockKey(chatId))
if (ownerStreamId) {
const settled = await waitForPendingChatStream(chatId, 0, ownerStreamId)
if (settled) {
continue
}
}
}
} catch (error) {
logger.warn('Failed to acquire chat stream lock', {
chatId,
streamId,
error: toError(error).message,
})
}
if (Date.now() >= deadline) {
span.setAttribute(TraceAttr.LockAcquired, false)
span.setAttribute(TraceAttr.LockTimedOut, true)
return false
}
await sleep(200)
}
}
for (;;) {
const existing = pendingChatStreams.get(chatId)
if (!existing) {
registerPendingChatStream(chatId, streamId)
span.setAttribute(TraceAttr.LockAcquired, true)
return true
}
if (!pendingChatStreams.has(chatId)) {
const ownerStreamId = await redis.get(getChatStreamLockKey(chatId))
if (ownerStreamId) {
const settled = await waitForPendingChatStream(chatId, 0, ownerStreamId)
if (settled) {
continue
}
}
const settled = await Promise.race([
existing.promise.then(() => true),
new Promise<boolean>((resolve) => setTimeout(() => resolve(false), timeoutMs)),
])
if (!settled) {
span.setAttribute(TraceAttr.LockAcquired, false)
span.setAttribute(TraceAttr.LockTimedOut, true)
return false
}
} catch (error) {
logger.warn('Failed to acquire chat stream lock', {
chatId,
streamId,
error: toError(error).message,
})
}
if (Date.now() >= deadline) {
return false
}
await sleep(200)
}
}
for (;;) {
const existing = pendingChatStreams.get(chatId)
if (!existing) {
registerPendingChatStream(chatId, streamId)
return true
}
const settled = await Promise.race([
existing.promise.then(() => true),
new Promise<boolean>((resolve) => setTimeout(() => resolve(false), timeoutMs)),
])
if (!settled) {
return false
}
}
)
}
/**
* Returns `true` if it aborted an in-process controller,
* `false` if it only wrote the marker (no local controller found).
*
* Spanned because the two operations inside can stall independently
* — Redis latency on `writeAbortMarker` was previously invisible, and
* the "no local controller" branch (happens when the stream handler
* is on a different Sim box than the one receiving /chat/abort) is
* a subtle but important outcome to distinguish from "aborted a live
* controller" in dashboards.
*/
export async function abortActiveStream(streamId: string): Promise<boolean> {
await writeAbortMarker(streamId)
const controller = activeStreams.get(streamId)
if (!controller) return false
controller.abort('user_stop:abortActiveStream')
activeStreams.delete(streamId)
return true
return withCopilotSpan(
TraceSpan.CopilotChatAbortActiveStream,
{ [TraceAttr.StreamId]: streamId },
async (span) => {
await writeAbortMarker(streamId)
span.setAttribute(TraceAttr.CopilotAbortMarkerWritten, true)
const controller = activeStreams.get(streamId)
if (!controller) {
span.setAttribute(TraceAttr.CopilotAbortControllerFired, false)
return false
}
controller.abort(AbortReason.UserStop)
activeStreams.delete(streamId)
span.setAttribute(TraceAttr.CopilotAbortControllerFired, true)
return true
}
)
}
export type { AbortReasonValue } from './abort-reason'
/**
* `AbortReason` vocabulary and the `isExplicitStopReason` classifier
* live in a sibling zero-dependency module so the telemetry layer
* (`request/otel.ts`) can import them without creating a circular
* import back through `session/abort.ts`'s OTel-wrapped helpers.
*
* Context on why the distinction matters: when the user clicks Stop,
* we fire `abortController.abort(AbortReason.UserStop)` from
* `abortActiveStream()`. That causes Sim's SSE writer to close,
* which in turn makes the BROWSER's SSE reader see the stream end
* — which fires the browser-side fetch AbortController and
* propagates back to Sim as `publisher.markDisconnected()`. So on
* an explicit Stop you observe BOTH "explicit reason" AND
* "client disconnected" — the discriminator is the reason string,
* not the client flag.
*
* For any NEW abort path, add its reason in `./abort-reason.ts` and
* update `isExplicitStopReason` if it should be classified as a user
* stop.
*/
export { AbortReason, isExplicitStopReason } from './abort-reason'
const pollingStreams = new Set<string>()
export function startAbortPoller(
@@ -208,7 +275,7 @@ export function startAbortPoller(
try {
const shouldAbort = await hasAbortMarker(streamId)
if (shouldAbort && !abortController.signal.aborted) {
abortController.abort('redis_abort_marker:poller')
abortController.abort(AbortReason.RedisPoller)
await clearAbortMarker(streamId)
}
} catch (error) {

View File

@@ -27,6 +27,10 @@ describe('stream session contract parser', () => {
it('accepts contract text events', () => {
const event = {
...BASE_ENVELOPE,
trace: {
...BASE_ENVELOPE.trace,
goTraceId: 'go-trace-1',
},
type: 'text' as const,
payload: {
channel: 'assistant' as const,
@@ -97,7 +101,11 @@ describe('stream session contract parser', () => {
const event = {
...BASE_ENVELOPE,
type: 'span' as const,
payload: { kind: 'subagent' as const, event: 'start' as const, agent: 'file' },
payload: {
kind: 'subagent' as const,
event: 'start' as const,
agent: 'file',
},
}
expect(isContractStreamEventEnvelope(event)).toBe(true)

View File

@@ -171,7 +171,12 @@ function isStreamRef(value: unknown): value is MothershipStreamV1StreamRef {
}
function isTrace(value: unknown): value is MothershipStreamV1Trace {
return isRecord(value) && typeof value.requestId === 'string' && isOptionalString(value.spanId)
return (
isRecord(value) &&
typeof value.requestId === 'string' &&
isOptionalString(value.goTraceId) &&
isOptionalString(value.spanId)
)
}
function isStreamScope(value: unknown): value is MothershipStreamV1StreamScope {
@@ -317,9 +322,12 @@ function isContractEnvelope(value: unknown): value is MothershipStreamV1EventEnv
// Synthetic file-preview envelope validators
// ---------------------------------------------------------------------------
function isSyntheticEnvelopeBase(
value: unknown
): value is Omit<SyntheticFilePreviewEventEnvelope, 'payload'> & { payload?: unknown } {
function isSyntheticEnvelopeBase(value: unknown): value is Omit<
SyntheticFilePreviewEventEnvelope,
'payload'
> & {
payload?: unknown
} {
return (
isRecord(value) &&
value.v === 1 &&

View File

@@ -1,4 +1,8 @@
import type { Context } from '@opentelemetry/api'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { fetchGo } from '@/lib/copilot/request/go/fetch'
import { AbortReason } from '@/lib/copilot/request/session/abort'
import { env } from '@/lib/core/config/env'
export const DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS = 3000
@@ -8,19 +12,31 @@ export async function requestExplicitStreamAbort(params: {
userId: string
chatId?: string
timeoutMs?: number
otelContext?: Context
}): Promise<void> {
const { streamId, userId, chatId, timeoutMs = DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS } = params
const {
streamId,
userId,
chatId,
timeoutMs = DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS,
otelContext,
} = params
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
const headers: Record<string, string> = {
'Content-Type': 'application/json',
}
if (env.COPILOT_API_KEY) {
headers['x-api-key'] = env.COPILOT_API_KEY
}
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort('timeout:go_explicit_abort_fetch'), timeoutMs)
const timeout = setTimeout(
() => controller.abort(AbortReason.ExplicitAbortFetchTimeout),
timeoutMs
)
try {
const response = await fetch(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, {
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, {
method: 'POST',
headers,
signal: controller.signal,
@@ -29,6 +45,13 @@ export async function requestExplicitStreamAbort(params: {
userId,
...(chatId ? { chatId } : {}),
}),
otelContext,
spanName: 'sim → go /api/streams/explicit-abort',
operation: 'explicit_abort',
attributes: {
[TraceAttr.StreamId]: streamId,
...(chatId ? { [TraceAttr.ChatId]: chatId } : {}),
},
})
if (!response.ok) {

View File

@@ -1,8 +1,11 @@
export {
AbortReason,
type AbortReasonValue,
abortActiveStream,
acquirePendingChatStream,
cleanupAbortMarker,
getPendingChatStreamId,
isExplicitStopReason,
registerActiveStream,
releasePendingChatStream,
startAbortPoller,

View File

@@ -0,0 +1,38 @@
/**
* @vitest-environment node
*/
import { describe, expect, it, vi } from 'vitest'
const { getLatestSeq, getOldestSeq, readEvents } = vi.hoisted(() => ({
getLatestSeq: vi.fn(),
getOldestSeq: vi.fn(),
readEvents: vi.fn(),
}))
vi.mock('./buffer', () => ({
getLatestSeq,
getOldestSeq,
readEvents,
}))
import { checkForReplayGap } from './recovery'
describe('checkForReplayGap', () => {
it('uses the latest buffered request id when run metadata is missing it', async () => {
getOldestSeq.mockResolvedValue(10)
getLatestSeq.mockResolvedValue(12)
readEvents.mockResolvedValue([
{
trace: { requestId: 'req-live-123' },
},
])
const result = await checkForReplayGap('stream-1', '1')
expect(readEvents).toHaveBeenCalledWith('stream-1', '11')
expect(result?.gapDetected).toBe(true)
expect(result?.envelopes[0].trace.requestId).toBe('req-live-123')
expect(result?.envelopes[1].trace.requestId).toBe('req-live-123')
})
})

View File

@@ -3,7 +3,11 @@ import {
MothershipStreamV1CompletionStatus,
MothershipStreamV1EventType,
} from '@/lib/copilot/generated/mothership-stream-v1'
import { getLatestSeq, getOldestSeq } from './buffer'
import { CopilotRecoveryOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { withCopilotSpan } from '@/lib/copilot/request/otel'
import { getLatestSeq, getOldestSeq, readEvents } from './buffer'
import { createEvent } from './event'
const logger = createLogger('SessionRecovery')
@@ -15,62 +19,105 @@ export interface ReplayGapResult {
export async function checkForReplayGap(
streamId: string,
afterCursor: string
afterCursor: string,
requestId?: string
): Promise<ReplayGapResult | null> {
const requestedAfterSeq = Number(afterCursor || '0')
if (requestedAfterSeq <= 0) {
// Fast path: no cursor → nothing to check. Skip the span to avoid
// emitting zero-work spans on every stream connect.
return null
}
const oldestSeq = await getOldestSeq(streamId)
const latestSeq = await getLatestSeq(streamId)
return withCopilotSpan(
TraceSpan.CopilotRecoveryCheckReplayGap,
{
[TraceAttr.StreamId]: streamId,
[TraceAttr.CopilotRecoveryRequestedAfterSeq]: requestedAfterSeq,
...(requestId ? { [TraceAttr.RequestId]: requestId } : {}),
},
async (span) => {
const oldestSeq = await getOldestSeq(streamId)
const latestSeq = await getLatestSeq(streamId)
span.setAttributes({
[TraceAttr.CopilotRecoveryOldestSeq]: oldestSeq ?? -1,
[TraceAttr.CopilotRecoveryLatestSeq]: latestSeq ?? -1,
})
if (
latestSeq !== null &&
latestSeq > 0 &&
oldestSeq !== null &&
requestedAfterSeq < oldestSeq - 1
) {
logger.warn('Replay gap detected: requested cursor is below oldest available event', {
streamId,
requestedAfterSeq,
oldestAvailableSeq: oldestSeq,
latestSeq,
})
const gapEnvelope = createEvent({
streamId,
cursor: String(latestSeq + 1),
seq: latestSeq + 1,
requestId: '',
type: MothershipStreamV1EventType.error,
payload: {
message: 'Replay history is no longer available. Some events may have been lost.',
code: 'replay_gap',
data: {
oldestAvailableSeq: oldestSeq,
if (
latestSeq !== null &&
latestSeq > 0 &&
oldestSeq !== null &&
requestedAfterSeq < oldestSeq - 1
) {
const resolvedRequestId = await resolveReplayGapRequestId(streamId, latestSeq, requestId)
logger.warn('Replay gap detected: requested cursor is below oldest available event', {
streamId,
requestedAfterSeq,
},
},
})
oldestAvailableSeq: oldestSeq,
latestSeq,
})
span.setAttribute(TraceAttr.CopilotRecoveryOutcome, CopilotRecoveryOutcome.GapDetected)
const terminalEnvelope = createEvent({
streamId,
cursor: String(latestSeq + 2),
seq: latestSeq + 2,
requestId: '',
type: MothershipStreamV1EventType.complete,
payload: {
status: MothershipStreamV1CompletionStatus.error,
reason: 'replay_gap',
},
})
const gapEnvelope = createEvent({
streamId,
cursor: String(latestSeq + 1),
seq: latestSeq + 1,
requestId: resolvedRequestId,
type: MothershipStreamV1EventType.error,
payload: {
message: 'Replay history is no longer available. Some events may have been lost.',
code: 'replay_gap',
data: {
oldestAvailableSeq: oldestSeq,
requestedAfterSeq,
},
},
})
return {
gapDetected: true,
envelopes: [gapEnvelope, terminalEnvelope],
const terminalEnvelope = createEvent({
streamId,
cursor: String(latestSeq + 2),
seq: latestSeq + 2,
requestId: resolvedRequestId,
type: MothershipStreamV1EventType.complete,
payload: {
status: MothershipStreamV1CompletionStatus.error,
reason: 'replay_gap',
},
})
return {
gapDetected: true,
envelopes: [gapEnvelope, terminalEnvelope],
}
}
span.setAttribute(TraceAttr.CopilotRecoveryOutcome, CopilotRecoveryOutcome.InRange)
return null
}
)
}
async function resolveReplayGapRequestId(
streamId: string,
latestSeq: number,
requestId?: string
): Promise<string> {
if (typeof requestId === 'string' && requestId.length > 0) {
return requestId
}
return null
try {
const latestEvents = await readEvents(streamId, String(Math.max(latestSeq - 1, 0)))
const latestRequestId = latestEvents[0]?.trace?.requestId
return typeof latestRequestId === 'string' ? latestRequestId : ''
} catch (error) {
logger.warn('Failed to resolve request ID for replay gap', {
streamId,
latestSeq,
error: error instanceof Error ? error.message : String(error),
})
return ''
}
}

View File

@@ -2,14 +2,17 @@ import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { generateId } from '@sim/utils/id'
import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import { SIM_AGENT_API_URL, SIM_AGENT_VERSION } from '@/lib/copilot/constants'
import {
MothershipStreamV1EventType,
MothershipStreamV1SpanPayloadKind,
} from '@/lib/copilot/generated/mothership-stream-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { createStreamingContext } from '@/lib/copilot/request/context/request-context'
import { buildToolCallSummaries } from '@/lib/copilot/request/context/result'
import { runStreamLoop } from '@/lib/copilot/request/go/stream'
import { withCopilotSpan } from '@/lib/copilot/request/otel'
import type {
ExecutionContext,
OrchestratorOptions,
@@ -30,6 +33,7 @@ export interface SubagentOrchestratorOptions extends Omit<OrchestratorOptions, '
workflowId?: string
workspaceId?: string
userPermission?: string
simRequestId?: string
onComplete?: (result: SubagentOrchestratorResult) => void | Promise<void>
}
@@ -51,6 +55,47 @@ export async function orchestrateSubagentStream(
agentId: string,
requestPayload: Record<string, unknown>,
options: SubagentOrchestratorOptions
): Promise<SubagentOrchestratorResult> {
return withCopilotSpan(
TraceSpan.CopilotSubagentExecute,
{
[TraceAttr.SubagentId]: agentId,
// Sim-side entrypoint = MCP / headless subagent call. No parent
// agent (the caller is an external client); treat as depth 2 and
// mark as NOT nested so it aggregates with Go-side direct-child
// subagent spans on dashboards. Grandchildren are stamped
// depth=3 + nested=true in
// `agents/nested.go:executeNestedAgent`.
[TraceAttr.SubagentDepth]: 2,
[TraceAttr.SubagentNested]: false,
[TraceAttr.SubagentParentAgentId]: 'mcp',
[TraceAttr.UserId]: options.userId,
...(options.simRequestId ? { [TraceAttr.SimRequestId]: options.simRequestId } : {}),
...(options.workflowId ? { [TraceAttr.WorkflowId]: options.workflowId } : {}),
...(options.workspaceId ? { [TraceAttr.WorkspaceId]: options.workspaceId } : {}),
},
async (otelSpan) => {
const result = await orchestrateSubagentStreamInner(agentId, requestPayload, options)
otelSpan.setAttributes({
[TraceAttr.SubagentOutcomeSuccess]: result.success,
[TraceAttr.SubagentOutcomeToolCallCount]: result.toolCalls.length,
[TraceAttr.SubagentOutcomeContentBytes]: result.content?.length ?? 0,
...(result.structuredResult?.type
? { [TraceAttr.SubagentOutcomeStructuredType]: result.structuredResult.type }
: {}),
...(result.error
? { [TraceAttr.SubagentOutcomeError]: String(result.error).slice(0, 500) }
: {}),
})
return result
}
)
}
async function orchestrateSubagentStreamInner(
agentId: string,
requestPayload: Record<string, unknown>,
options: SubagentOrchestratorOptions
): Promise<SubagentOrchestratorResult> {
const { userId, workflowId, workspaceId, userPermission } = options
const chatId =
@@ -87,6 +132,7 @@ export async function orchestrateSubagentStream(
const msgId = requestPayload?.messageId
const context = createStreamingContext({
chatId,
requestId: options.simRequestId,
messageId: typeof msgId === 'string' ? msgId : generateId(),
})
@@ -100,6 +146,7 @@ export async function orchestrateSubagentStream(
headers: {
'Content-Type': 'application/json',
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
'X-Client-Version': SIM_AGENT_VERSION,
},
body: JSON.stringify({
...requestPayload,
@@ -149,7 +196,10 @@ export async function orchestrateSubagentStream(
return result
} catch (error) {
const err = error instanceof Error ? error : new Error('Subagent orchestration failed')
logger.error('Subagent orchestration failed', { error: err.message, agentId })
logger.error('Subagent orchestration failed', {
error: err.message,
agentId,
})
await options.onError?.(err)
return {
success: false,

View File

@@ -18,7 +18,9 @@ import {
MothershipStreamV1ToolPhase,
} from '@/lib/copilot/generated/mothership-stream-v1'
import { CreateWorkflow } from '@/lib/copilot/generated/tool-catalog-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { publishToolConfirmation } from '@/lib/copilot/persistence/tool-confirm'
import { withCopilotToolSpan } from '@/lib/copilot/request/otel'
import { markToolResultSeen } from '@/lib/copilot/request/sse-utils'
import {
getToolCallStateOutput,
@@ -54,6 +56,81 @@ function hasOutputValue(result: { output?: unknown } | undefined): result is { o
return result !== undefined && Object.hasOwn(result, 'output')
}
interface ToolResultSpanSummary {
resultSuccess: boolean
outputBytes: number
outputKind: string
errorMessage?: string
imageCount?: number
imageBytes?: number
attachmentMediaType?: string
}
function summarizeToolResultForSpan(result: {
success: boolean
output?: unknown
error?: string
}): ToolResultSpanSummary {
const summary: ToolResultSpanSummary = {
resultSuccess: Boolean(result.success),
outputBytes: 0,
outputKind: 'none',
}
if (!result.success && result.error) {
summary.errorMessage = String(result.error).slice(0, 500)
}
if (!hasOutputValue(result)) {
return summary
}
const output = (result as { output: unknown }).output
if (typeof output === 'string') {
summary.outputKind = 'string'
summary.outputBytes = output.length
} else if (output && typeof output === 'object') {
summary.outputKind = Array.isArray(output) ? 'array' : 'object'
try {
summary.outputBytes = JSON.stringify(output).length
} catch {
summary.outputBytes = 0
}
const attachment = extractAttachmentShape(output)
if (attachment) {
summary.imageCount = attachment.imageCount
summary.imageBytes = attachment.imageBytes
if (attachment.mediaType) {
summary.attachmentMediaType = attachment.mediaType
}
}
} else if (output !== undefined && output !== null) {
summary.outputKind = typeof output
summary.outputBytes = String(output).length
}
return summary
}
function extractAttachmentShape(
output: unknown
): { imageCount: number; imageBytes: number; mediaType?: string } | null {
if (!isRecord(output)) return null
const candidate = (output as Record<string, unknown>).attachment
if (!isRecord(candidate)) return null
const source = (candidate as Record<string, unknown>).source
if (!isRecord(source)) return null
const type =
typeof (candidate as Record<string, unknown>).type === 'string'
? ((candidate as Record<string, unknown>).type as string)
: ''
if (type !== 'image') return null
const mediaType =
typeof source.media_type === 'string' ? (source.media_type as string) : undefined
const data = typeof source.data === 'string' ? (source.data as string) : ''
return {
imageCount: 1,
imageBytes: data.length,
mediaType,
}
}
function buildCompletionSignal(input: {
status: AsyncCompletionSignal['status']
message?: string
@@ -164,6 +241,44 @@ export async function executeToolAndReport(
message: 'Tool call not found',
})
const argsPayload = toolCall.params
? (() => {
try {
return JSON.stringify(toolCall.params)
} catch {
return undefined
}
})()
: undefined
return withCopilotToolSpan(
{
toolName: toolCall.name,
toolCallId: toolCall.id,
runId: context.runId,
chatId: execContext.chatId,
argsBytes: argsPayload?.length,
argsPreview: argsPayload?.slice(0, 200),
},
async (otelSpan) => {
const completion = await executeToolAndReportInner(toolCall, context, execContext, options)
otelSpan.setAttribute(TraceAttr.ToolOutcome, completion.status)
if (completion.message) {
otelSpan.setAttribute(
TraceAttr.ToolOutcomeMessage,
String(completion.message).slice(0, 500)
)
}
return completion
}
)
}
async function executeToolAndReportInner(
toolCall: ToolCallState,
context: StreamingContext,
execContext: ExecutionContext,
options?: OrchestratorOptions
): Promise<AsyncToolCompletion> {
if (toolCall.status === 'executing') {
return buildCompletionSignal({
status: MothershipStreamV1AsyncToolRecordStatus.running,
@@ -377,6 +492,11 @@ export async function executeToolAndReport(
endToolSpan('cancelled', { cancelReason: 'abort_during_post_processing_csv' })
return cancelledCompletion('Request aborted during tool post-processing')
}
toolSpan.attributes = {
...toolSpan.attributes,
...summarizeToolResultForSpan(result),
}
setTerminalToolCallState(toolCall, {
status: result.success
? MothershipStreamV1ToolOutcome.success

View File

@@ -1,6 +1,11 @@
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { FunctionExecute, UserTable } from '@/lib/copilot/generated/tool-catalog-v1'
import { CopilotOutputFileOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { withCopilotSpan } from '@/lib/copilot/request/otel'
import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/request/types'
import { uploadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
@@ -162,55 +167,80 @@ export async function maybeWriteOutputToFile(
const explicitFormat =
(params?.outputFormat as string | undefined) ?? (args?.outputFormat as string | undefined)
try {
const fileName = normalizeOutputWorkspaceFileName(outputPath)
const format = resolveOutputFormat(fileName, explicitFormat)
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
const content = serializeOutputForFile(result.output, format)
const contentType = FORMAT_TO_CONTENT_TYPE[format]
// Only span the actual write path (where we upload to storage). Fast
// no-op returns above don't need a span — they'd just pad the trace
// with empty work.
return withCopilotSpan(
TraceSpan.CopilotToolsWriteOutputFile,
{
[TraceAttr.ToolName]: toolName,
[TraceAttr.WorkspaceId]: context.workspaceId,
},
async (span) => {
try {
const fileName = normalizeOutputWorkspaceFileName(outputPath)
const format = resolveOutputFormat(fileName, explicitFormat)
span.setAttributes({
[TraceAttr.CopilotOutputFileName]: fileName,
[TraceAttr.CopilotOutputFileFormat]: format,
})
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
const content = serializeOutputForFile(result.output, format)
const contentType = FORMAT_TO_CONTENT_TYPE[format]
const buffer = Buffer.from(content, 'utf-8')
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
const uploaded = await uploadWorkspaceFile(
context.workspaceId,
context.userId,
buffer,
fileName,
contentType
)
const buffer = Buffer.from(content, 'utf-8')
span.setAttribute(TraceAttr.CopilotOutputFileBytes, buffer.length)
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
const uploaded = await uploadWorkspaceFile(
context.workspaceId!,
context.userId!,
buffer,
fileName,
contentType
)
span.setAttributes({
[TraceAttr.CopilotOutputFileId]: uploaded.id,
[TraceAttr.CopilotOutputFileOutcome]: CopilotOutputFileOutcome.Uploaded,
})
logger.info('Tool output written to file', {
toolName,
fileName,
size: buffer.length,
fileId: uploaded.id,
})
logger.info('Tool output written to file', {
toolName,
fileName,
size: buffer.length,
fileId: uploaded.id,
})
return {
success: true,
output: {
message: `Output written to files/${fileName} (${buffer.length} bytes)`,
fileId: uploaded.id,
fileName,
size: buffer.length,
downloadUrl: uploaded.url,
},
resources: [{ type: 'file', id: uploaded.id, title: fileName }],
return {
success: true,
output: {
message: `Output written to files/${fileName} (${buffer.length} bytes)`,
fileId: uploaded.id,
fileName,
size: buffer.length,
downloadUrl: uploaded.url,
},
resources: [{ type: 'file', id: uploaded.id, title: fileName }],
}
} catch (err) {
const message = toError(err).message
logger.warn('Failed to write tool output to file', {
toolName,
outputPath,
error: message,
})
span.setAttribute(TraceAttr.CopilotOutputFileOutcome, CopilotOutputFileOutcome.Failed)
span.addEvent(TraceEvent.CopilotOutputFileError, {
[TraceAttr.ErrorMessage]: message.slice(0, 500),
})
return {
success: false,
error: `Failed to write output file: ${message}`,
}
}
}
} catch (err) {
const message = toError(err).message
logger.warn('Failed to write tool output to file', {
toolName,
outputPath,
error: message,
})
return {
success: false,
error: `Failed to write output file: ${message}`,
}
}
)
}

View File

@@ -4,6 +4,9 @@ import {
MothershipStreamV1EventType,
MothershipStreamV1ResourceOp,
} from '@/lib/copilot/generated/mothership-stream-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { withCopilotSpan } from '@/lib/copilot/request/otel'
import type { StreamEvent, ToolCallResult } from '@/lib/copilot/request/types'
import {
extractDeletedResourcesFromToolResult,
@@ -30,63 +33,102 @@ export async function handleResourceSideEffects(
onEvent: ((event: StreamEvent) => void | Promise<void>) | undefined,
isAborted: () => boolean
): Promise<void> {
let isDeleteOp = false
if (hasDeleteCapability(toolName)) {
const deleted = extractDeletedResourcesFromToolResult(toolName, params, result.output)
if (deleted.length > 0) {
isDeleteOp = true
removeChatResources(chatId, deleted).catch((err) => {
logger.warn('Failed to remove chat resources after deletion', {
chatId,
error: toError(err).message,
})
})
for (const resource of deleted) {
if (isAborted()) break
await onEvent?.({
type: MothershipStreamV1EventType.resource,
payload: {
op: MothershipStreamV1ResourceOp.remove,
resource: { type: resource.type, id: resource.id, title: resource.title },
},
})
}
}
// Cheap early exit so we don't emit a span for tools that can never
// produce resources (most of them). The span only shows up for tools
// that might actually do resource work.
if (
!hasDeleteCapability(toolName) &&
!isResourceToolName(toolName) &&
!(result.resources && result.resources.length > 0)
) {
return
}
if (!isDeleteOp && !isAborted()) {
const resources =
result.resources && result.resources.length > 0
? result.resources
: isResourceToolName(toolName)
? extractResourcesFromToolResult(toolName, params, result.output)
: []
return withCopilotSpan(
TraceSpan.CopilotToolsHandleResourceSideEffects,
{
[TraceAttr.ToolName]: toolName,
[TraceAttr.ChatId]: chatId,
},
async (span) => {
let isDeleteOp = false
let removedCount = 0
let upsertedCount = 0
if (resources.length > 0) {
logger.info('[file-stream-server] Emitting resource upsert events', {
toolName,
chatId,
resources: resources.map((r) => ({ type: r.type, id: r.id, title: r.title })),
})
persistChatResources(chatId, resources).catch((err) => {
logger.warn('Failed to persist chat resources', {
chatId,
error: toError(err).message,
})
})
if (hasDeleteCapability(toolName)) {
const deleted = extractDeletedResourcesFromToolResult(toolName, params, result.output)
if (deleted.length > 0) {
isDeleteOp = true
removedCount = deleted.length
// Detached from the span lifecycle — the span ends before the
// DB call completes. That is intentional; we want the span to
// reflect the synchronous decision + event emission, not the
// best-effort persistence.
removeChatResources(chatId, deleted).catch((err) => {
logger.warn('Failed to remove chat resources after deletion', {
chatId,
error: toError(err).message,
})
})
for (const resource of resources) {
if (isAborted()) break
await onEvent?.({
type: MothershipStreamV1EventType.resource,
payload: {
op: MothershipStreamV1ResourceOp.upsert,
resource: { type: resource.type, id: resource.id, title: resource.title },
},
})
for (const resource of deleted) {
if (isAborted()) break
await onEvent?.({
type: MothershipStreamV1EventType.resource,
payload: {
op: MothershipStreamV1ResourceOp.remove,
resource: { type: resource.type, id: resource.id, title: resource.title },
},
})
}
}
}
if (!isDeleteOp && !isAborted()) {
const resources =
result.resources && result.resources.length > 0
? result.resources
: isResourceToolName(toolName)
? extractResourcesFromToolResult(toolName, params, result.output)
: []
if (resources.length > 0) {
upsertedCount = resources.length
logger.info('[file-stream-server] Emitting resource upsert events', {
toolName,
chatId,
resources: resources.map((r) => ({ type: r.type, id: r.id, title: r.title })),
})
persistChatResources(chatId, resources).catch((err) => {
logger.warn('Failed to persist chat resources', {
chatId,
error: toError(err).message,
})
})
for (const resource of resources) {
if (isAborted()) break
await onEvent?.({
type: MothershipStreamV1EventType.resource,
payload: {
op: MothershipStreamV1ResourceOp.upsert,
resource: { type: resource.type, id: resource.id, title: resource.title },
},
})
}
}
}
span.setAttributes({
[TraceAttr.CopilotResourcesOp]: isDeleteOp
? 'delete'
: upsertedCount > 0
? 'upsert'
: 'none',
[TraceAttr.CopilotResourcesRemovedCount]: removedCount,
[TraceAttr.CopilotResourcesUpsertedCount]: upsertedCount,
[TraceAttr.CopilotResourcesAborted]: isAborted(),
})
}
}
)
}

View File

@@ -5,6 +5,11 @@ import { toError } from '@sim/utils/errors'
import { parse as csvParse } from 'csv-parse/sync'
import { eq } from 'drizzle-orm'
import { FunctionExecute, Read as ReadTool } from '@/lib/copilot/generated/tool-catalog-v1'
import { CopilotTableOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { withCopilotSpan } from '@/lib/copilot/request/otel'
import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/request/types'
import { getTableById } from '@/lib/table/service'
@@ -26,105 +31,126 @@ export async function maybeWriteOutputToTable(
const outputTable = params?.outputTable as string | undefined
if (!outputTable) return result
try {
const table = await getTableById(outputTable)
if (!table) {
return {
success: false,
error: `Table "${outputTable}" not found`,
}
}
const rawOutput = result.output
let rows: Array<Record<string, unknown>>
if (rawOutput && typeof rawOutput === 'object' && 'result' in rawOutput) {
const inner = (rawOutput as Record<string, unknown>).result
if (Array.isArray(inner)) {
rows = inner
} else {
return {
success: false,
error: 'outputTable requires the code to return an array of objects',
return withCopilotSpan(
TraceSpan.CopilotToolsWriteOutputTable,
{
[TraceAttr.ToolName]: toolName,
[TraceAttr.CopilotTableId]: outputTable,
[TraceAttr.WorkspaceId]: context.workspaceId,
},
async (span) => {
try {
const table = await getTableById(outputTable)
if (!table) {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.TableNotFound)
return {
success: false,
error: `Table "${outputTable}" not found`,
}
}
}
} else if (Array.isArray(rawOutput)) {
rows = rawOutput
} else {
return {
success: false,
error: 'outputTable requires the code to return an array of objects',
}
}
if (rows.length > MAX_OUTPUT_TABLE_ROWS) {
return {
success: false,
error: `outputTable row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`,
}
}
const rawOutput = result.output
let rows: Array<Record<string, unknown>>
if (rows.length === 0) {
return {
success: false,
error: 'outputTable requires at least one row — code returned an empty array',
}
}
if (rawOutput && typeof rawOutput === 'object' && 'result' in rawOutput) {
const inner = (rawOutput as Record<string, unknown>).result
if (Array.isArray(inner)) {
rows = inner
} else {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidShape)
return {
success: false,
error: 'outputTable requires the code to return an array of objects',
}
}
} else if (Array.isArray(rawOutput)) {
rows = rawOutput
} else {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidShape)
return {
success: false,
error: 'outputTable requires the code to return an array of objects',
}
}
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
await db.transaction(async (tx) => {
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable))
span.setAttribute(TraceAttr.CopilotTableRowCount, rows.length)
if (rows.length > MAX_OUTPUT_TABLE_ROWS) {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.RowLimitExceeded)
return {
success: false,
error: `outputTable row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`,
}
}
if (rows.length === 0) {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyRows)
return {
success: false,
error: 'outputTable requires at least one row — code returned an empty array',
}
}
const now = new Date()
for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) {
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE)
const values = chunk.map((rowData, j) => ({
id: `row_${crypto.randomUUID().replace(/-/g, '')}`,
await db.transaction(async (tx) => {
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable))
const now = new Date()
for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) {
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE)
const values = chunk.map((rowData, j) => ({
id: `row_${crypto.randomUUID().replace(/-/g, '')}`,
tableId: outputTable,
workspaceId: context.workspaceId!,
data: rowData,
position: i + j,
createdAt: now,
updatedAt: now,
createdBy: context.userId,
}))
await tx.insert(userTableRows).values(values)
}
})
logger.info('Tool output written to table', {
toolName,
tableId: outputTable,
workspaceId: context.workspaceId!,
data: rowData,
position: i + j,
createdAt: now,
updatedAt: now,
createdBy: context.userId,
}))
await tx.insert(userTableRows).values(values)
rowCount: rows.length,
})
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Wrote)
return {
success: true,
output: {
message: `Wrote ${rows.length} rows to table ${outputTable}`,
tableId: outputTable,
rowCount: rows.length,
},
}
} catch (err) {
logger.warn('Failed to write tool output to table', {
toolName,
outputTable,
error: toError(err).message,
})
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Failed)
span.addEvent(TraceEvent.CopilotTableError, {
[TraceAttr.ErrorMessage]: toError(err).message.slice(0, 500),
})
return {
success: false,
error: `Failed to write to table: ${toError(err).message}`,
}
}
})
logger.info('Tool output written to table', {
toolName,
tableId: outputTable,
rowCount: rows.length,
})
return {
success: true,
output: {
message: `Wrote ${rows.length} rows to table ${outputTable}`,
tableId: outputTable,
rowCount: rows.length,
},
}
} catch (err) {
logger.warn('Failed to write tool output to table', {
toolName,
outputTable,
error: toError(err).message,
})
return {
success: false,
error: `Failed to write to table: ${toError(err).message}`,
}
}
)
}
export async function maybeWriteReadCsvToTable(
@@ -140,110 +166,136 @@ export async function maybeWriteReadCsvToTable(
const outputTable = params?.outputTable as string | undefined
if (!outputTable) return result
try {
const table = await getTableById(outputTable)
if (!table) {
return { success: false, error: `Table "${outputTable}" not found` }
}
const output = result.output as Record<string, unknown>
const content = (output.content as string) || ''
if (!content.trim()) {
return { success: false, error: 'File has no content to import into table' }
}
const filePath = (params?.path as string) || ''
const ext = filePath.split('.').pop()?.toLowerCase()
let rows: Record<string, unknown>[]
if (ext === 'json') {
const parsed = JSON.parse(content)
if (!Array.isArray(parsed)) {
return {
success: false,
error: 'JSON file must contain an array of objects for table import',
return withCopilotSpan(
TraceSpan.CopilotToolsWriteCsvToTable,
{
[TraceAttr.ToolName]: toolName,
[TraceAttr.CopilotTableId]: outputTable,
[TraceAttr.WorkspaceId]: context.workspaceId,
},
async (span) => {
try {
const table = await getTableById(outputTable)
if (!table) {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.TableNotFound)
return { success: false, error: `Table "${outputTable}" not found` }
}
}
rows = parsed
} else {
rows = csvParse(content, {
columns: true,
skip_empty_lines: true,
trim: true,
relax_column_count: true,
relax_quotes: true,
skip_records_with_error: true,
cast: false,
}) as Record<string, unknown>[]
}
if (rows.length === 0) {
return { success: false, error: 'File has no data rows to import' }
}
const output = result.output as Record<string, unknown>
const content = (output.content as string) || ''
if (!content.trim()) {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyContent)
return { success: false, error: 'File has no content to import into table' }
}
if (rows.length > MAX_OUTPUT_TABLE_ROWS) {
return {
success: false,
error: `Row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`,
}
}
const filePath = (params?.path as string) || ''
const ext = filePath.split('.').pop()?.toLowerCase()
span.setAttributes({
[TraceAttr.CopilotTableSourcePath]: filePath,
[TraceAttr.CopilotTableSourceFormat]: ext === 'json' ? 'json' : 'csv',
[TraceAttr.CopilotTableSourceContentBytes]: content.length,
})
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
await db.transaction(async (tx) => {
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable))
let rows: Record<string, unknown>[]
if (ext === 'json') {
const parsed = JSON.parse(content)
if (!Array.isArray(parsed)) {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidJsonShape)
return {
success: false,
error: 'JSON file must contain an array of objects for table import',
}
}
rows = parsed
} else {
rows = csvParse(content, {
columns: true,
skip_empty_lines: true,
trim: true,
relax_column_count: true,
relax_quotes: true,
skip_records_with_error: true,
cast: false,
}) as Record<string, unknown>[]
}
span.setAttribute(TraceAttr.CopilotTableRowCount, rows.length)
if (rows.length === 0) {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyRows)
return { success: false, error: 'File has no data rows to import' }
}
if (rows.length > MAX_OUTPUT_TABLE_ROWS) {
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.RowLimitExceeded)
return {
success: false,
error: `Row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`,
}
}
const now = new Date()
for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) {
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE)
const values = chunk.map((rowData, j) => ({
id: `row_${crypto.randomUUID().replace(/-/g, '')}`,
await db.transaction(async (tx) => {
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable))
const now = new Date()
for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) {
if (context.abortSignal?.aborted) {
throw new Error('Request aborted before tool mutation could be applied')
}
const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE)
const values = chunk.map((rowData, j) => ({
id: `row_${crypto.randomUUID().replace(/-/g, '')}`,
tableId: outputTable,
workspaceId: context.workspaceId!,
data: rowData,
position: i + j,
createdAt: now,
updatedAt: now,
createdBy: context.userId,
}))
await tx.insert(userTableRows).values(values)
}
})
logger.info('Read output written to table', {
toolName,
tableId: outputTable,
workspaceId: context.workspaceId!,
data: rowData,
position: i + j,
createdAt: now,
updatedAt: now,
createdBy: context.userId,
}))
await tx.insert(userTableRows).values(values)
tableName: table.name,
rowCount: rows.length,
filePath,
})
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Imported)
return {
success: true,
output: {
message: `Imported ${rows.length} rows from "${filePath}" into table "${table.name}"`,
tableId: outputTable,
tableName: table.name,
rowCount: rows.length,
},
}
} catch (err) {
logger.warn('Failed to write read output to table', {
toolName,
outputTable,
error: toError(err).message,
})
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Failed)
span.addEvent(TraceEvent.CopilotTableError, {
[TraceAttr.ErrorMessage]: toError(err).message.slice(0, 500),
})
return {
success: false,
error: `Failed to import into table: ${toError(err).message}`,
}
}
})
logger.info('Read output written to table', {
toolName,
tableId: outputTable,
tableName: table.name,
rowCount: rows.length,
filePath,
})
return {
success: true,
output: {
message: `Imported ${rows.length} rows from "${filePath}" into table "${table.name}"`,
tableId: outputTable,
tableName: table.name,
rowCount: rows.length,
},
}
} catch (err) {
logger.warn('Failed to write read output to table', {
toolName,
outputTable,
error: toError(err).message,
})
return {
success: false,
error: `Failed to import into table: ${toError(err).message}`,
}
}
)
}

View File

@@ -1,3 +1,4 @@
import type { Context } from '@opentelemetry/api'
import { createLogger } from '@sim/logger'
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
import {
@@ -9,6 +10,7 @@ import {
RequestTraceV1SpanStatus,
type RequestTraceV1UsageSummary,
} from '@/lib/copilot/generated/request-trace-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { env } from '@/lib/core/config/env'
const logger = createLogger('RequestTrace')
@@ -71,6 +73,13 @@ export class TraceCollector {
chatId?: string
runId?: string
executionId?: string
// Original user prompt, surfaced on the `request_traces.message`
// column at row-insert time so it's queryable from the DB without
// going through Tempo. Sim already has this at chat-POST time; it's
// threaded through here to the trace report so the row is complete
// the moment it's first written instead of waiting on the late
// analytics UPDATE.
userMessage?: string
usage?: { prompt: number; completion: number }
cost?: { input: number; output: number; total: number }
}): RequestTraceV1SimReport {
@@ -96,6 +105,7 @@ export class TraceCollector {
chatId: params.chatId,
runId: params.runId,
executionId: params.executionId,
...(params.userMessage ? { userMessage: params.userMessage } : {}),
startMs: this.startMs,
endMs,
durationMs: endMs - this.startMs,
@@ -107,14 +117,27 @@ export class TraceCollector {
}
}
export async function reportTrace(trace: RequestTraceV1SimReport): Promise<void> {
const response = await fetch(`${SIM_AGENT_API_URL}/api/traces`, {
export async function reportTrace(
trace: RequestTraceV1SimReport,
otelContext?: Context
): Promise<void> {
const { fetchGo } = await import('@/lib/copilot/request/go/fetch')
const body = JSON.stringify(trace)
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/traces`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
},
body: JSON.stringify(trace),
body,
otelContext,
spanName: 'sim → go /api/traces',
operation: 'report_trace',
attributes: {
[TraceAttr.RequestId]: trace.simRequestId ?? '',
[TraceAttr.HttpRequestContentLength]: body.length,
[TraceAttr.CopilotTraceSpanCount]: trace.spans?.length ?? 0,
},
})
if (!response.ok) {

View File

@@ -1,5 +1,6 @@
import type { AsyncCompletionSignal } from '@/lib/copilot/async-runs/lifecycle'
import { MothershipStreamV1ToolOutcome } from '@/lib/copilot/generated/mothership-stream-v1'
import type { RequestTraceV1Span } from '@/lib/copilot/generated/request-trace-v1'
import type { StreamEvent } from '@/lib/copilot/request/session'
import type { TraceCollector } from '@/lib/copilot/request/trace'
import type { ToolExecutionContext, ToolExecutionResult } from '@/lib/copilot/tool-executor/types'
@@ -99,6 +100,7 @@ export interface StreamingContext {
edit?: Record<string, unknown>
} | null
trace: TraceCollector
subAgentTraceSpans?: Map<string, RequestTraceV1Span>
}
export interface FileAttachment {
@@ -138,6 +140,19 @@ export interface OrchestratorOptions {
export interface OrchestratorResult {
success: boolean
/**
* True iff the non-success outcome was a user-initiated cancel
* (abort signal fired or client disconnected). Lets callers treat
* cancels differently from actual errors — notably, `buildOnComplete`
* must NOT finalize the chat row on cancel, because the browser's
* `/api/copilot/chat/stop` POST owns writing the partial assistant
* content and clearing `conversationId` in one UPDATE. Finalizing
* here would race and clear `conversationId` first, making the stop
* UPDATE match zero rows and the partial content vanish on refetch.
*
* Always false when `success=true`.
*/
cancelled?: boolean
content: string
contentBlocks: ContentBlock[]
toolCalls: ToolCallSummary[]

View File

@@ -9,6 +9,7 @@ import {
RunFromBlock,
RunWorkflowUntilBlock,
} from '@/lib/copilot/generated/tool-catalog-v1'
import { traceparentHeader } from '@/lib/copilot/tools/client/trace-context'
import { executeWorkflowWithFullLogging } from '@/app/workspace/[workspaceId]/w/[workflowId]/utils/workflow-execution-utils'
import { useExecutionStore } from '@/stores/execution/store'
import {
@@ -466,7 +467,7 @@ async function reportCompletion(
})
const res = await fetch(COPILOT_CONFIRM_API_PATH, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', ...traceparentHeader() },
body,
})
const LARGE_PAYLOAD_THRESHOLD = 10 * 1024 * 1024
@@ -480,7 +481,7 @@ async function reportCompletion(
})
const retryRes = await fetch(COPILOT_CONFIRM_API_PATH, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', ...traceparentHeader() },
body: JSON.stringify({
toolCallId,
status,

View File

@@ -0,0 +1,17 @@
// Browser-side W3C traceparent holder for the active copilot chat.
// Module-level singleton because client tool callbacks fire from deep
// inside runtime code that can't thread a React ref. The browser only
// has one active chat at a time (gated by the stop-barrier), so a
// singleton is safe.
let currentTraceparent: string | undefined
export function setCurrentChatTraceparent(value: string | undefined): void {
currentTraceparent = value
}
// `fetch` header spread: `headers: { ...traceparentHeader(), ... }`.
export function traceparentHeader(): Record<string, string> {
const tp = currentTraceparent
return tp ? { traceparent: tp } : {}
}

View File

@@ -54,13 +54,14 @@ describe('vfs handlers oversize policy', () => {
expect(result.error).toContain('context window')
})
it('fails oversized read results with grep guidance', async () => {
it('fails oversized read results from VFS with grep guidance', async () => {
const vfs = makeVfs()
vfs.readFileContent.mockResolvedValue(null)
vfs.read.mockReturnValue({ content: OVERSIZED_INLINE_CONTENT, totalLines: 1 })
getOrMaterializeVFS.mockResolvedValue(vfs)
const result = await executeVfsRead(
{ path: 'files/big.txt' },
{ path: 'workflows/My Workflow/state.json' },
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' }
)
@@ -70,9 +71,8 @@ describe('vfs handlers oversize policy', () => {
expect(result.error).toContain('context window')
})
it('fails file-backed oversized read placeholders with grep guidance', async () => {
it('fails file-backed oversized read placeholders with original message', async () => {
const vfs = makeVfs()
vfs.read.mockReturnValue(null)
vfs.readFileContent.mockResolvedValue({
content: '[File too large to display inline: big.txt (6000000 bytes, limit 5242880)]',
totalLines: 1,
@@ -85,8 +85,46 @@ describe('vfs handlers oversize policy', () => {
)
expect(result.success).toBe(false)
expect(result.error).toContain('Use grep')
expect(result.error).toContain('offset/limit')
expect(result.error).toContain('context window')
expect(result.error).toContain('File too large to display inline')
expect(result.error).toContain('big.txt')
})
it('passes through image reads with attachment even when oversized', async () => {
const vfs = makeVfs()
const largeBase64 = 'A'.repeat(TOOL_RESULT_MAX_INLINE_CHARS + 1)
vfs.readFileContent.mockResolvedValue({
content: 'Image: chess.png (500.0KB, image/png)',
totalLines: 1,
attachment: {
type: 'image',
source: { type: 'base64', media_type: 'image/png', data: largeBase64 },
},
})
getOrMaterializeVFS.mockResolvedValue(vfs)
const result = await executeVfsRead(
{ path: 'files/chess.png' },
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' }
)
expect(result.success).toBe(true)
expect((result.output as { attachment?: { type: string } })?.attachment?.type).toBe('image')
})
it('fails oversized image placeholder when image exceeds size limit', async () => {
const vfs = makeVfs()
vfs.readFileContent.mockResolvedValue({
content: '[Image too large: huge.png (10.0MB, limit 5MB)]',
totalLines: 1,
})
getOrMaterializeVFS.mockResolvedValue(vfs)
const result = await executeVfsRead(
{ path: 'files/huge.png' },
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' }
)
expect(result.success).toBe(false)
expect(result.error).toContain('too large')
})
})

View File

@@ -161,21 +161,30 @@ export async function executeVfsRead(
const filename = path.slice('uploads/'.length)
const uploadResult = await readChatUpload(filename, context.chatId)
if (uploadResult) {
const isImage = hasImageAttachment(uploadResult)
if (
!hasImageAttachment(uploadResult) &&
!isImage &&
(isOversizedReadPlaceholder(uploadResult.content) ||
serializedResultSize(uploadResult) > TOOL_RESULT_MAX_INLINE_CHARS)
) {
logger.warn('Upload read result too large', {
path,
hasAttachment: isImage,
contentLength: uploadResult.content.length,
serializedSize: serializedResultSize(uploadResult),
})
return {
success: false,
error:
'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.',
error: isOversizedReadPlaceholder(uploadResult.content)
? uploadResult.content
: 'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.',
}
}
const windowedUpload = applyWindow(uploadResult)
logger.debug('vfs_read resolved chat upload', {
path,
totalLines: uploadResult.totalLines,
hasAttachment: isImage,
offset,
limit,
})
@@ -188,34 +197,47 @@ export async function executeVfsRead(
}
const vfs = await getOrMaterializeVFS(workspaceId, context.userId)
const result = vfs.read(path, offset, limit)
if (!result) {
const fileContent = await vfs.readFileContent(path)
if (fileContent) {
if (
!hasImageAttachment(fileContent) &&
(isOversizedReadPlaceholder(fileContent.content) ||
serializedResultSize(fileContent) > TOOL_RESULT_MAX_INLINE_CHARS)
) {
return {
success: false,
error:
'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.',
}
}
const windowedFileContent = applyWindow(fileContent)
logger.debug('vfs_read resolved workspace file', {
// For workspace file paths (files/ or recently-deleted/files/), try readFileContent
// first so images, PDFs, and documents get proper attachment/parsing handling rather
// than being served as raw VFS metadata text.
const fileContent = await vfs.readFileContent(path)
if (fileContent) {
const isImage = hasImageAttachment(fileContent)
if (
!isImage &&
(isOversizedReadPlaceholder(fileContent.content) ||
serializedResultSize(fileContent) > TOOL_RESULT_MAX_INLINE_CHARS)
) {
logger.warn('File read result too large', {
path,
totalLines: fileContent.totalLines,
offset,
limit,
hasAttachment: isImage,
contentLength: fileContent.content.length,
serializedSize: serializedResultSize(fileContent),
})
return {
success: true,
output: windowedFileContent,
success: false,
error: isOversizedReadPlaceholder(fileContent.content)
? fileContent.content
: 'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.',
}
}
const windowedFileContent = applyWindow(fileContent)
logger.debug('vfs_read resolved workspace file', {
path,
totalLines: fileContent.totalLines,
hasAttachment: isImage,
offset,
limit,
})
return {
success: true,
output: windowedFileContent,
}
}
const result = vfs.read(path, offset, limit)
if (!result) {
const suggestions = vfs.suggestSimilar(path)
logger.warn('vfs_read file not found', { path, suggestions })
const hint =

View File

@@ -0,0 +1,94 @@
/**
* @vitest-environment node
*/
import { randomFillSync } from 'node:crypto'
import { loggerMock } from '@sim/testing'
import { describe, expect, it, vi } from 'vitest'
const { downloadWorkspaceFile } = vi.hoisted(() => ({
downloadWorkspaceFile: vi.fn(),
}))
vi.mock('@sim/logger', () => loggerMock)
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({
downloadWorkspaceFile,
}))
import { readFileRecord } from '@/lib/copilot/vfs/file-reader'
const MAX_IMAGE_READ_BYTES = 5 * 1024 * 1024
async function makeNoisePng(width: number, height: number): Promise<Buffer> {
const sharp = (await import('sharp')).default
const raw = Buffer.alloc(width * height * 3)
randomFillSync(raw)
return sharp(raw, { raw: { width, height, channels: 3 } })
.png()
.toBuffer()
}
describe('readFileRecord', () => {
it('returns small images as attachments without resize note', async () => {
const sharp = (await import('sharp')).default
const smallPng = await sharp({
create: {
width: 200,
height: 200,
channels: 3,
background: { r: 255, g: 0, b: 0 },
},
})
.png()
.toBuffer()
downloadWorkspaceFile.mockResolvedValue(smallPng)
const result = await readFileRecord({
id: 'wf_small',
workspaceId: 'ws_1',
name: 'small.png',
key: 'uploads/small.png',
path: '/api/files/serve/uploads%2Fsmall.png?context=mothership',
size: smallPng.length,
type: 'image/png',
uploadedBy: 'user_1',
uploadedAt: new Date(),
deletedAt: null,
storageContext: 'mothership',
})
expect(result?.attachment?.type).toBe('image')
expect(result?.attachment?.source.media_type).toBe('image/png')
expect(result?.content).not.toContain('resized for vision')
expect(Buffer.from(result?.attachment?.source.data ?? '', 'base64')).toEqual(smallPng)
})
it('downscales oversized images into attachments that fit the read limit', async () => {
const largePng = await makeNoisePng(1800, 1800)
expect(largePng.length).toBeGreaterThan(MAX_IMAGE_READ_BYTES)
downloadWorkspaceFile.mockResolvedValue(largePng)
const result = await readFileRecord({
id: 'wf_large',
workspaceId: 'ws_1',
name: 'chesspng.png',
key: 'uploads/chesspng.png',
path: '/api/files/serve/uploads%2Fchesspng.png?context=mothership',
size: largePng.length,
type: 'image/png',
uploadedBy: 'user_1',
uploadedAt: new Date(),
deletedAt: null,
storageContext: 'mothership',
})
expect(result?.attachment?.type).toBe('image')
expect(result?.content).toContain('resized for vision')
const decoded = Buffer.from(result?.attachment?.source.data ?? '', 'base64')
expect(decoded.length).toBeLessThanOrEqual(MAX_IMAGE_READ_BYTES)
expect(result?.attachment?.source.media_type).toMatch(/^image\/(jpeg|webp|png)$/)
})
})

View File

@@ -1,13 +1,39 @@
import { type Span, trace } from '@opentelemetry/api'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import {
CopilotVfsOutcome,
CopilotVfsReadOutcome,
CopilotVfsReadPath,
} from '@/lib/copilot/generated/trace-attribute-values-v1'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
import { markSpanForError } from '@/lib/copilot/request/otel'
import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
import { downloadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
import { isImageFileType } from '@/lib/uploads/utils/file-utils'
// Lazy tracer (same pattern as lib/copilot/request/otel.ts).
function getVfsTracer() {
return trace.getTracer('sim-copilot-vfs', '1.0.0')
}
function recordSpanError(span: Span, err: unknown) {
markSpanForError(span, err)
}
const logger = createLogger('FileReader')
const MAX_TEXT_READ_BYTES = 5 * 1024 * 1024 // 5 MB
const MAX_IMAGE_READ_BYTES = 5 * 1024 * 1024 // 5 MB
// Parseable-document byte cap. Large office/PDF files can still
// produce huge extracted text; reject up front to avoid wasting a
// download + parse only to blow past the tool-result budget.
const MAX_PARSEABLE_READ_BYTES = 5 * 1024 * 1024 // 5 MB
const MAX_IMAGE_DIMENSION = 1568
const IMAGE_RESIZE_DIMENSIONS = [1568, 1280, 1024, 768]
const IMAGE_QUALITY_STEPS = [85, 70, 55, 40]
const TEXT_TYPES = new Set([
'text/plain',
@@ -42,6 +68,194 @@ function detectImageMime(buf: Buffer, claimed: string): string {
return claimed
}
interface PreparedVisionImage {
buffer: Buffer
mediaType: string
resized: boolean
}
/**
* Prepare an image for vision models: detect media type, optionally
* resize/compress with sharp, and return the prepared buffer.
*
* Wrapped in a `copilot.vfs.prepare_image` span so the external trace
* shows exactly when an image read blocked the request on CPU-heavy
* encode attempts. Attributes record input dimensions, whether a resize
* was needed, how many encode attempts it took, and the final
* dimension/quality chosen.
*/
async function prepareImageForVision(
buffer: Buffer,
claimedType: string
): Promise<PreparedVisionImage | null> {
return getVfsTracer().startActiveSpan(
TraceSpan.CopilotVfsPrepareImage,
{
attributes: {
[TraceAttr.CopilotVfsInputBytes]: buffer.length,
[TraceAttr.CopilotVfsInputMediaTypeClaimed]: claimedType,
},
},
async (span) => {
try {
const mediaType = detectImageMime(buffer, claimedType)
span.setAttribute(TraceAttr.CopilotVfsInputMediaTypeDetected, mediaType)
let sharpModule: typeof import('sharp')
try {
sharpModule = (await import('sharp')).default
} catch (err) {
logger.warn('Failed to load sharp for image preparation', {
mediaType,
error: toError(err).message,
})
span.setAttribute(TraceAttr.CopilotVfsSharpLoadFailed, true)
const fitsWithoutSharp = buffer.length <= MAX_IMAGE_READ_BYTES
span.setAttribute(
TraceAttr.CopilotVfsOutcome,
fitsWithoutSharp ? 'passthrough_no_sharp' : 'rejected_no_sharp'
)
return fitsWithoutSharp ? { buffer, mediaType, resized: false } : null
}
let metadata: Awaited<ReturnType<ReturnType<typeof sharpModule>['metadata']>>
try {
metadata = await sharpModule(buffer, { limitInputPixels: false }).metadata()
} catch (err) {
logger.warn('Failed to read image metadata for VFS read', {
mediaType,
error: toError(err).message,
})
span.setAttribute(TraceAttr.CopilotVfsMetadataFailed, true)
const fitsWithoutSharp = buffer.length <= MAX_IMAGE_READ_BYTES
span.setAttribute(
TraceAttr.CopilotVfsOutcome,
fitsWithoutSharp ? 'passthrough_no_metadata' : 'rejected_no_metadata'
)
return fitsWithoutSharp ? { buffer, mediaType, resized: false } : null
}
const width = metadata.width ?? 0
const height = metadata.height ?? 0
span.setAttributes({
[TraceAttr.CopilotVfsInputWidth]: width,
[TraceAttr.CopilotVfsInputHeight]: height,
})
const needsResize =
buffer.length > MAX_IMAGE_READ_BYTES ||
width > MAX_IMAGE_DIMENSION ||
height > MAX_IMAGE_DIMENSION
if (!needsResize) {
span.setAttributes({
[TraceAttr.CopilotVfsResized]: false,
[TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.PassthroughFitsBudget,
[TraceAttr.CopilotVfsOutputBytes]: buffer.length,
[TraceAttr.CopilotVfsOutputMediaType]: mediaType,
})
return { buffer, mediaType, resized: false }
}
const hasAlpha = Boolean(
metadata.hasAlpha ||
mediaType === 'image/png' ||
mediaType === 'image/webp' ||
mediaType === 'image/gif'
)
span.setAttribute(TraceAttr.CopilotVfsHasAlpha, hasAlpha)
let attempts = 0
for (const dimension of IMAGE_RESIZE_DIMENSIONS) {
for (const quality of IMAGE_QUALITY_STEPS) {
attempts += 1
try {
const pipeline = sharpModule(buffer, { limitInputPixels: false }).rotate().resize({
width: dimension,
height: dimension,
fit: 'inside',
withoutEnlargement: true,
})
const transformed = hasAlpha
? {
buffer: await pipeline
.webp({ quality, alphaQuality: quality, effort: 4 })
.toBuffer(),
mediaType: 'image/webp',
}
: {
buffer: await pipeline
.jpeg({ quality, mozjpeg: true, chromaSubsampling: '4:4:4' })
.toBuffer(),
mediaType: 'image/jpeg',
}
span.addEvent(TraceEvent.CopilotVfsResizeAttempt, {
[TraceAttr.CopilotVfsResizeDimension]: dimension,
[TraceAttr.CopilotVfsResizeQuality]: quality,
[TraceAttr.CopilotVfsResizeOutputBytes]: transformed.buffer.length,
[TraceAttr.CopilotVfsResizeFitsBudget]:
transformed.buffer.length <= MAX_IMAGE_READ_BYTES,
})
if (transformed.buffer.length <= MAX_IMAGE_READ_BYTES) {
logger.info('Resized image for VFS read', {
originalBytes: buffer.length,
outputBytes: transformed.buffer.length,
originalWidth: width || undefined,
originalHeight: height || undefined,
maxDimension: dimension,
quality,
originalMediaType: mediaType,
outputMediaType: transformed.mediaType,
})
span.setAttributes({
[TraceAttr.CopilotVfsResized]: true,
[TraceAttr.CopilotVfsResizeAttempts]: attempts,
[TraceAttr.CopilotVfsResizeChosenDimension]: dimension,
[TraceAttr.CopilotVfsResizeChosenQuality]: quality,
[TraceAttr.CopilotVfsOutputBytes]: transformed.buffer.length,
[TraceAttr.CopilotVfsOutputMediaType]: transformed.mediaType,
[TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.Resized,
})
return {
buffer: transformed.buffer,
mediaType: transformed.mediaType,
resized: true,
}
}
} catch (err) {
logger.warn('Failed image resize attempt for VFS read', {
mediaType,
dimension,
quality,
error: toError(err).message,
})
span.addEvent(TraceEvent.CopilotVfsResizeAttemptFailed, {
[TraceAttr.CopilotVfsResizeDimension]: dimension,
[TraceAttr.CopilotVfsResizeQuality]: quality,
[TraceAttr.ErrorMessage]: toError(err).message.slice(0, 500),
})
}
}
}
span.setAttributes({
[TraceAttr.CopilotVfsResized]: false,
[TraceAttr.CopilotVfsResizeAttempts]: attempts,
[TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.RejectedTooLargeAfterResize,
})
return null
} catch (err) {
recordSpanError(span, err)
throw err
} finally {
span.end()
}
}
)
}
export interface FileReadResult {
content: string
totalLines: number
@@ -59,75 +273,140 @@ export interface FileReadResult {
* Read and return the content of a workspace file record.
* Handles images (base64 attachment), parseable documents (PDF, DOCX, etc.),
* binary files, and plain text with size guards.
*
* Wrapped in `copilot.vfs.read_file` so the parent mothership trace shows
* per-file read latency, the path taken (image / text / parseable /
* binary), and any size rejection. The `prepareImageForVision` span
* nests underneath for the image-resize path.
*/
export async function readFileRecord(record: WorkspaceFileRecord): Promise<FileReadResult | null> {
try {
if (isImageFileType(record.type)) {
if (record.size > MAX_IMAGE_READ_BYTES) {
return {
content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 5MB)]`,
totalLines: 1,
}
}
const buffer = await downloadWorkspaceFile(record)
const mime = detectImageMime(buffer, record.type)
return {
content: `Image: ${record.name} (${(record.size / 1024).toFixed(1)}KB, ${mime})`,
totalLines: 1,
attachment: {
type: 'image',
source: {
type: 'base64',
media_type: mime,
data: buffer.toString('base64'),
},
},
}
}
if (isReadableType(record.type)) {
if (record.size > MAX_TEXT_READ_BYTES) {
return {
content: `[File too large to display inline: ${record.name} (${record.size} bytes, limit ${MAX_TEXT_READ_BYTES})]`,
totalLines: 1,
}
}
const buffer = await downloadWorkspaceFile(record)
const content = buffer.toString('utf-8')
return { content, totalLines: content.split('\n').length }
}
const ext = getExtension(record.name)
if (PARSEABLE_EXTENSIONS.has(ext)) {
const buffer = await downloadWorkspaceFile(record)
return getVfsTracer().startActiveSpan(
TraceSpan.CopilotVfsReadFile,
{
attributes: {
[TraceAttr.CopilotVfsFileName]: record.name,
[TraceAttr.CopilotVfsFileMediaType]: record.type,
[TraceAttr.CopilotVfsFileSizeBytes]: record.size,
[TraceAttr.CopilotVfsFileExtension]: getExtension(record.name),
},
},
async (span) => {
try {
const { parseBuffer } = await import('@/lib/file-parsers')
const result = await parseBuffer(buffer, ext)
const content = result.content || ''
return { content, totalLines: content.split('\n').length }
} catch (parseErr) {
logger.warn('Failed to parse document', {
fileName: record.name,
ext,
error: toError(parseErr).message,
if (isImageFileType(record.type)) {
span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.Image)
const originalBuffer = await downloadWorkspaceFile(record)
const prepared = await prepareImageForVision(originalBuffer, record.type)
if (!prepared) {
span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ImageTooLarge)
return {
content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 5MB after resize/compression)]`,
totalLines: 1,
}
}
const sizeKb = (prepared.buffer.length / 1024).toFixed(1)
const resizeNote = prepared.resized ? ', resized for vision' : ''
span.setAttributes({
[TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.ImagePrepared,
[TraceAttr.CopilotVfsReadOutputBytes]: prepared.buffer.length,
[TraceAttr.CopilotVfsReadOutputMediaType]: prepared.mediaType,
[TraceAttr.CopilotVfsReadImageResized]: prepared.resized,
})
return {
content: `Image: ${record.name} (${sizeKb}KB, ${prepared.mediaType}${resizeNote})`,
totalLines: 1,
attachment: {
type: 'image',
source: {
type: 'base64' as const,
media_type: prepared.mediaType,
data: prepared.buffer.toString('base64'),
},
},
}
}
if (isReadableType(record.type)) {
span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.Text)
if (record.size > MAX_TEXT_READ_BYTES) {
span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.TextTooLarge)
return {
content: `[File too large to display inline: ${record.name} (${record.size} bytes, limit ${MAX_TEXT_READ_BYTES})]`,
totalLines: 1,
}
}
const buffer = await downloadWorkspaceFile(record)
const content = buffer.toString('utf-8')
const lines = content.split('\n').length
span.setAttributes({
[TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.TextRead,
[TraceAttr.CopilotVfsReadOutputBytes]: buffer.length,
[TraceAttr.CopilotVfsReadOutputLines]: lines,
})
return { content, totalLines: lines }
}
const ext = getExtension(record.name)
if (PARSEABLE_EXTENSIONS.has(ext)) {
span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.ParseableDocument)
if (record.size > MAX_PARSEABLE_READ_BYTES) {
span.setAttribute(
TraceAttr.CopilotVfsReadOutcome,
CopilotVfsReadOutcome.DocumentTooLarge
)
return {
content: `[Document too large to parse inline: ${record.name} (${record.size} bytes, limit ${MAX_PARSEABLE_READ_BYTES})]`,
totalLines: 1,
}
}
const buffer = await downloadWorkspaceFile(record)
try {
const { parseBuffer } = await import('@/lib/file-parsers')
const result = await parseBuffer(buffer, ext)
const content = result.content || ''
const lines = content.split('\n').length
span.setAttributes({
[TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.DocumentParsed,
[TraceAttr.CopilotVfsReadOutputBytes]: content.length,
[TraceAttr.CopilotVfsReadOutputLines]: lines,
})
return { content, totalLines: lines }
} catch (parseErr) {
logger.warn('Failed to parse document', {
fileName: record.name,
ext,
error: toError(parseErr).message,
})
span.addEvent(TraceEvent.CopilotVfsParseFailed, {
[TraceAttr.ErrorMessage]: toError(parseErr).message.slice(0, 500),
})
span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ParseFailed)
return {
content: `[Could not parse ${record.name} (${record.type}, ${record.size} bytes)]`,
totalLines: 1,
}
}
}
span.setAttributes({
[TraceAttr.CopilotVfsReadPath]: CopilotVfsReadPath.Binary,
[TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.BinaryPlaceholder,
})
return {
content: `[Could not parse ${record.name} (${record.type}, ${record.size} bytes)]`,
content: `[Binary file: ${record.name} (${record.type}, ${record.size} bytes). Cannot display as text.]`,
totalLines: 1,
}
} catch (err) {
logger.warn('Failed to read workspace file', {
fileName: record.name,
error: toError(err).message,
})
recordSpanError(span, err)
span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ReadFailed)
return null
} finally {
span.end()
}
}
return {
content: `[Binary file: ${record.name} (${record.type}, ${record.size} bytes). Cannot display as text.]`,
totalLines: 1,
}
} catch (err) {
logger.warn('Failed to read workspace file', {
fileName: record.name,
error: toError(err).message,
})
return null
}
)
}

View File

@@ -19,6 +19,7 @@
import { context, type Span, SpanStatusCode, trace } from '@opentelemetry/api'
import { createLogger } from '@sim/logger'
import { toError } from '@sim/utils/errors'
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
import type { TraceSpan } from '@/lib/logs/types'
/**
@@ -279,8 +280,8 @@ export function createOTelSpanFromTraceSpan(traceSpan: TraceSpan, parentSpan?: S
{
attributes: {
[GenAIAttributes.TOOL_NAME]: toolCall.name,
'tool.status': toolCall.status,
'tool.duration_ms': toolCall.duration || 0,
[TraceAttr.ToolStatus]: toolCall.status,
[TraceAttr.ToolDurationMs]: toolCall.duration || 0,
},
startTime: new Date(toolCall.startTime),
},
@@ -342,8 +343,8 @@ export function createOTelSpansForWorkflowExecution(params: {
[GenAIAttributes.WORKFLOW_ID]: params.workflowId,
[GenAIAttributes.WORKFLOW_NAME]: params.workflowName || params.workflowId,
[GenAIAttributes.WORKFLOW_EXECUTION_ID]: params.executionId,
'workflow.trigger': params.trigger,
'workflow.duration_ms': params.totalDurationMs,
[TraceAttr.WorkflowTrigger]: params.trigger,
[TraceAttr.WorkflowDurationMs]: params.totalDurationMs,
},
startTime: new Date(params.startTime),
},
@@ -404,9 +405,9 @@ export async function traceBlockExecution<T>(
blockMapping.spanName,
{
attributes: {
'block.type': blockType,
'block.id': blockId,
'block.name': blockName,
[TraceAttr.BlockType]: blockType,
[TraceAttr.BlockId]: blockId,
[TraceAttr.BlockName]: blockName,
},
},
async (span) => {
@@ -440,8 +441,8 @@ export function trackPlatformEvent(
const span = tracer.startSpan(eventName, {
attributes: {
...attributes,
'event.name': eventName,
'event.timestamp': Date.now(),
[TraceAttr.EventName]: eventName,
[TraceAttr.EventTimestamp]: Date.now(),
},
})
span.setStatus({ code: SpanStatusCode.OK })

View File

@@ -24,7 +24,7 @@
"test:watch": "vitest",
"test:coverage": "vitest run --coverage",
"email:dev": "email dev --dir components/emails",
"type-check": "tsc --noEmit",
"type-check": "NODE_OPTIONS='--max-old-space-size=8192' tsc --noEmit",
"lint": "biome check --write --unsafe .",
"lint:check": "biome check .",
"format": "biome format --write .",

View File

@@ -534,6 +534,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
updatedAt: '2026-04-16',
},
capabilities: {
temperature: { min: 0, max: 1 },
nativeStructuredOutputs: true,
maxOutputTokens: 128000,
thinking: {

View File

@@ -29,7 +29,7 @@ const socketDb = drizzle(
prepare: false,
idle_timeout: 10,
connect_timeout: 20,
max: 10,
max: 30,
onnotice: () => {},
}),
{ schema }

View File

@@ -1,6 +1,5 @@
{
"lockfileVersion": 1,
"configVersion": 0,
"workspaces": {
"": {
"name": "simstudio",

View File

@@ -27,8 +27,16 @@
"mship-tools:check": "bun run scripts/sync-tool-catalog.ts --check",
"trace-contracts:generate": "bun run scripts/sync-request-trace-contract.ts",
"trace-contracts:check": "bun run scripts/sync-request-trace-contract.ts --check",
"mship:generate": "bun run mship-contracts:generate && bun run mship-tools:generate && bun run trace-contracts:generate",
"mship:check": "bun run mship-contracts:check && bun run mship-tools:check && bun run trace-contracts:check",
"trace-spans-contract:generate": "bun run scripts/sync-trace-spans-contract.ts",
"trace-spans-contract:check": "bun run scripts/sync-trace-spans-contract.ts --check",
"trace-attributes-contract:generate": "bun run scripts/sync-trace-attributes-contract.ts",
"trace-attributes-contract:check": "bun run scripts/sync-trace-attributes-contract.ts --check",
"trace-attribute-values-contract:generate": "bun run scripts/sync-trace-attribute-values-contract.ts",
"trace-attribute-values-contract:check": "bun run scripts/sync-trace-attribute-values-contract.ts --check",
"trace-events-contract:generate": "bun run scripts/sync-trace-events-contract.ts",
"trace-events-contract:check": "bun run scripts/sync-trace-events-contract.ts --check",
"mship:generate": "bun run scripts/generate-mship-contracts.ts",
"mship:check": "bun run scripts/generate-mship-contracts.ts --check",
"prepare": "bun husky",
"type-check": "turbo run type-check",
"release": "bun run scripts/create-single-release.ts"

View File

@@ -14,7 +14,7 @@ const postgresClient = postgres(connectionString, {
prepare: false,
idle_timeout: 20,
connect_timeout: 30,
max: 10,
max: 30,
onnotice: () => {},
})

View File

@@ -0,0 +1,122 @@
#!/usr/bin/env bun
// Drive every mothership contract generator, then biome-format the
// outputs so the committed files match what biome produces on commit
// (avoids the stale-drift that comes from comparing raw json2ts output
// against biome-formatted source).
//
// `--check` regenerates into a temp directory, formats identically,
// and compares against the committed files — same semantics as the
// old per-script `--check`, but accounts for post-generate formatting.
import { spawnSync } from 'node:child_process'
import { copyFileSync, cpSync, mkdirSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { dirname, join, resolve } from 'node:path'
import { fileURLToPath } from 'node:url'
const ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '..')
const GENERATORS = [
'scripts/sync-mothership-stream-contract.ts',
'scripts/sync-tool-catalog.ts',
'scripts/sync-request-trace-contract.ts',
'scripts/sync-trace-spans-contract.ts',
'scripts/sync-trace-attributes-contract.ts',
'scripts/sync-trace-attribute-values-contract.ts',
'scripts/sync-trace-events-contract.ts',
]
// Generated files under this path. We biome-format this whole dir on
// each generate (and the temp copy on each check).
const GENERATED_DIR = 'apps/sim/lib/copilot/generated'
// `tool-schemas-v1.ts` goes through biome's `--unsafe` bracket-quote
// fixer which reformats every key of TOOL_RUNTIME_SCHEMAS. Strip it
// from the format pass so generator output stays stable on both sides.
const FORMAT_EXCLUDE = new Set(['tool-schemas-v1.ts'])
function run(cmd: string[], cwd: string, env: NodeJS.ProcessEnv = process.env): void {
const result = spawnSync(cmd[0], cmd.slice(1), {
cwd,
env,
stdio: 'inherit',
})
if (result.status !== 0) {
process.exit(result.status ?? 1)
}
}
function runGenerators(outputOverride?: string): void {
const env = { ...process.env }
for (const script of GENERATORS) {
const args = ['bun', 'run', script]
if (outputOverride) {
// Individual scripts don't accept a custom output dir; for
// --check we generate in place and snapshot before/after via
// git-index comparison (see runCheck).
}
run(args, ROOT, env)
}
}
function formatGenerated(dir: string): void {
const files = readdirNoThrow(dir).filter((f) => !FORMAT_EXCLUDE.has(f) && f.endsWith('.ts'))
if (files.length === 0) return
const paths = files.map((f) => join(dir, f))
run(['bunx', 'biome', 'check', '--write', ...paths], ROOT)
}
function readdirNoThrow(dir: string): string[] {
try {
// Bun has fs.readdirSync available as a CommonJS import
const fs = require('node:fs') as typeof import('node:fs')
return fs.readdirSync(dir)
} catch {
return []
}
}
function runCheck(): void {
const targetDir = resolve(ROOT, GENERATED_DIR)
// Snapshot current committed state
const committed: Record<string, string> = {}
for (const f of readdirNoThrow(targetDir)) {
if (!f.endsWith('.ts')) continue
committed[f] = readFileSync(join(targetDir, f), 'utf8')
}
// Regenerate in place + format, then diff against the snapshot
runGenerators()
formatGenerated(targetDir)
const stale: string[] = []
for (const [name, oldContent] of Object.entries(committed)) {
if (FORMAT_EXCLUDE.has(name)) continue
const newContent = readFileSync(join(targetDir, name), 'utf8')
if (newContent !== oldContent) stale.push(name)
}
// Restore the committed state regardless of outcome (--check is readonly).
for (const [name, content] of Object.entries(committed)) {
const fs = require('node:fs') as typeof import('node:fs')
fs.writeFileSync(join(targetDir, name), content, 'utf8')
}
if (stale.length > 0) {
console.error(
`Generated contracts are stale: ${stale.join(', ')}. Run: bun run mship:generate`,
)
process.exit(1)
}
console.log('All generated contracts up to date.')
}
function runGenerate(): void {
runGenerators()
formatGenerated(resolve(ROOT, GENERATED_DIR))
console.log('Generated + formatted mothership contracts.')
}
const checkOnly = process.argv.includes('--check')
if (checkOnly) runCheck()
else runGenerate()

View File

@@ -0,0 +1,155 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises'
import { dirname, resolve } from 'node:path'
import { fileURLToPath } from 'node:url'
/**
* Generate `apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts`
* from the Go-side `contracts/trace-attribute-values-v1.schema.json`
* contract.
*
* Unlike span-names / attribute-keys / event-names (each of which is a
* single enum), this contract carries MULTIPLE enums — one per span
* attribute whose value set is closed. The schema's `$defs` holds one
* definition per enum (e.g. `CopilotRequestCancelReason`,
* `CopilotAbortOutcome`, …). For each $def we emit a TS `as const`
* object named after the Go type, so call sites read as:
*
* span.setAttribute(
* TraceAttr.CopilotRequestCancelReason,
* CopilotRequestCancelReason.ExplicitStop,
* )
*
* Skipped $defs: anything that doesn't have a string-only `enum`
* array. That filters out wrapper structs the reflector adds
* incidentally (e.g. `TraceAttributeValuesV1AllDefs`).
*/
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url))
const ROOT = resolve(SCRIPT_DIR, '..')
const DEFAULT_CONTRACT_PATH = resolve(
ROOT,
'../copilot/copilot/contracts/trace-attribute-values-v1.schema.json',
)
const OUTPUT_PATH = resolve(
ROOT,
'apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts',
)
interface ExtractedEnum {
/** The Go type name — becomes the TS const + type name. */
name: string
/** The value strings, sorted for diff stability. */
values: string[]
}
function extractEnums(schema: Record<string, unknown>): ExtractedEnum[] {
const defs = (schema.$defs ?? {}) as Record<string, unknown>
const out: ExtractedEnum[] = []
for (const [name, def] of Object.entries(defs)) {
if (!def || typeof def !== 'object') continue
const enumValues = (def as Record<string, unknown>).enum
if (!Array.isArray(enumValues)) continue
if (!enumValues.every((v) => typeof v === 'string')) continue
out.push({ name, values: (enumValues as string[]).slice().sort() })
}
out.sort((a, b) => a.name.localeCompare(b.name))
return out
}
/**
* PascalCase identifier for a wire enum value. Mirrors the algorithm
* used by the span-names + attribute-keys scripts, so
* `explicit_stop` -> `ExplicitStop`, matching what a reader would
* guess from Go's exported constants.
*/
function toValueIdent(value: string): string {
const parts = value.split(/[^A-Za-z0-9]+/).filter(Boolean)
if (parts.length === 0) {
throw new Error(`Cannot derive identifier for enum value: ${value}`)
}
const ident = parts
.map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase())
.join('')
if (/^[0-9]/.test(ident)) {
throw new Error(
`Derived identifier "${ident}" for value "${value}" starts with a digit`,
)
}
return ident
}
function renderEnum(e: ExtractedEnum): string {
const seen = new Map<string, string>()
const lines = e.values.map((v) => {
const ident = toValueIdent(v)
const prev = seen.get(ident)
if (prev && prev !== v) {
throw new Error(
`Enum ${e.name}: identifier collision — "${prev}" and "${v}" both map to "${ident}"`,
)
}
seen.set(ident, v)
return ` ${ident}: ${JSON.stringify(v)},`
})
return `export const ${e.name} = {
${lines.join('\n')}
} as const;
export type ${e.name}Key = keyof typeof ${e.name};
export type ${e.name}Value = (typeof ${e.name})[${e.name}Key];`
}
function render(enums: ExtractedEnum[]): string {
const body = enums.map(renderEnum).join('\n\n')
return `// AUTO-GENERATED FILE. DO NOT EDIT.
//
// Source: copilot/copilot/contracts/trace-attribute-values-v1.schema.json
// Regenerate with: bun run trace-attribute-values-contract:generate
//
// Canonical closed-set value vocabularies for mothership OTel
// attributes. Call sites should reference e.g.
// \`CopilotRequestCancelReason.ExplicitStop\` rather than the raw
// string literal, so typos become compile errors and the Go contract
// remains the single source of truth.
${body}
`
}
async function main() {
const checkOnly = process.argv.includes('--check')
const inputArg = process.argv.find((a) => a.startsWith('--input='))
const inputPath = inputArg
? resolve(ROOT, inputArg.slice('--input='.length))
: DEFAULT_CONTRACT_PATH
const raw = await readFile(inputPath, 'utf8')
const schema = JSON.parse(raw)
const enums = extractEnums(schema)
if (enums.length === 0) {
throw new Error(
'No enum $defs found in trace-attribute-values-v1.schema.json — did you add the Go type to TraceAttributeValuesV1AllDefs?',
)
}
const rendered = render(enums)
if (checkOnly) {
const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null)
if (existing !== rendered) {
throw new Error(
'Generated trace attribute values contract is stale. Run: bun run trace-attribute-values-contract:generate',
)
}
console.log('Trace attribute values contract is up to date.')
return
}
await mkdir(dirname(OUTPUT_PATH), { recursive: true })
await writeFile(OUTPUT_PATH, rendered, 'utf8')
console.log(`Generated trace attribute values types -> ${OUTPUT_PATH}`)
}
main().catch((err) => {
console.error(err)
process.exit(1)
})

View File

@@ -0,0 +1,168 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises'
import { dirname, resolve } from 'node:path'
import { fileURLToPath } from 'node:url'
/**
* Generate `apps/sim/lib/copilot/generated/trace-attributes-v1.ts`
* from the Go-side `contracts/trace-attributes-v1.schema.json`
* contract.
*
* The contract is a single-enum JSON Schema listing every CUSTOM
* (non-OTel-semconv) span attribute key used in mothership. We emit:
* - A `TraceAttr` const object keyed by PascalCase identifier whose
* values are the exact wire strings, so call sites look like
* `span.setAttribute(TraceAttr.ChatId, …)` instead of the raw
* `span.setAttribute('chat.id', …)`.
* - A `TraceAttrKey` union and a `TraceAttrValue` union type so
* helpers that take an attribute key are well-typed.
* - A sorted `TraceAttrValues` readonly array for tests/enumeration.
*
* This is the attribute-key twin of `sync-trace-spans-contract.ts`
* (span names). The two files share the enum-extraction + identifier
* PascalCase + collision-detection pattern so a reader who understands
* one understands both.
*
* For OTel semantic-convention keys (e.g. `http.request.method`,
* `db.system`, `gen_ai.system`, `messaging.*`, `net.*`,
* `service.name`, `deployment.environment`), import from
* `@opentelemetry/semantic-conventions` directly — they live in the
* upstream package, not in this contract.
*/
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url))
const ROOT = resolve(SCRIPT_DIR, '..')
const DEFAULT_CONTRACT_PATH = resolve(
ROOT,
'../copilot/copilot/contracts/trace-attributes-v1.schema.json',
)
const OUTPUT_PATH = resolve(
ROOT,
'apps/sim/lib/copilot/generated/trace-attributes-v1.ts',
)
function extractAttrKeys(schema: Record<string, unknown>): string[] {
const defs = (schema.$defs ?? {}) as Record<string, unknown>
const nameDef = defs.TraceAttributesV1Name
if (
!nameDef ||
typeof nameDef !== 'object' ||
!Array.isArray((nameDef as Record<string, unknown>).enum)
) {
throw new Error(
'trace-attributes-v1.schema.json is missing $defs.TraceAttributesV1Name.enum',
)
}
const enumValues = (nameDef as Record<string, unknown>).enum as unknown[]
if (!enumValues.every((v) => typeof v === 'string')) {
throw new Error('TraceAttributesV1Name enum must be string-only')
}
return (enumValues as string[]).slice().sort()
}
/**
* Convert a wire attribute key like `copilot.vfs.input.media_type_claimed`
* into an identifier-safe PascalCase key like
* `CopilotVfsInputMediaTypeClaimed`.
*
* Same algorithm as the span-name sync script so readers can learn one
* and reuse it.
*/
function toIdentifier(name: string): string {
const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean)
if (parts.length === 0) {
throw new Error(`Cannot derive identifier for attribute key: ${name}`)
}
const ident = parts
.map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase())
.join('')
if (/^[0-9]/.test(ident)) {
throw new Error(
`Derived identifier "${ident}" for attribute "${name}" starts with a digit`,
)
}
return ident
}
function render(attrKeys: string[]): string {
const pairs = attrKeys.map((name) => ({ name, ident: toIdentifier(name) }))
// Identifier collisions silently override earlier keys and break
// type safety — fail loudly instead.
const seen = new Map<string, string>()
for (const p of pairs) {
const prev = seen.get(p.ident)
if (prev && prev !== p.name) {
throw new Error(
`Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`,
)
}
seen.set(p.ident, p.name)
}
const constLines = pairs
.map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`)
.join('\n')
const arrayEntries = attrKeys.map((n) => ` ${JSON.stringify(n)},`).join('\n')
return `// AUTO-GENERATED FILE. DO NOT EDIT.
//
// Source: copilot/copilot/contracts/trace-attributes-v1.schema.json
// Regenerate with: bun run trace-attributes-contract:generate
//
// Canonical custom mothership OTel span attribute keys. Call sites
// should reference \`TraceAttr.<Identifier>\` (e.g.
// \`TraceAttr.ChatId\`, \`TraceAttr.ToolCallId\`) rather than raw
// string literals, so the Go-side contract is the single source of
// truth and typos become compile errors.
//
// For OTel semantic-convention keys (\`http.*\`, \`db.*\`,
// \`gen_ai.*\`, \`net.*\`, \`messaging.*\`, \`service.*\`,
// \`deployment.environment\`), import from
// \`@opentelemetry/semantic-conventions\` directly — those are owned
// by the upstream OTel spec, not by this contract.
export const TraceAttr = {
${constLines}
} as const;
export type TraceAttrKey = keyof typeof TraceAttr;
export type TraceAttrValue = (typeof TraceAttr)[TraceAttrKey];
/** Readonly sorted list of every canonical custom attribute key. */
export const TraceAttrValues: readonly TraceAttrValue[] = [
${arrayEntries}
] as const;
`
}
async function main() {
const checkOnly = process.argv.includes('--check')
const inputArg = process.argv.find((a) => a.startsWith('--input='))
const inputPath = inputArg
? resolve(ROOT, inputArg.slice('--input='.length))
: DEFAULT_CONTRACT_PATH
const raw = await readFile(inputPath, 'utf8')
const schema = JSON.parse(raw)
const attrKeys = extractAttrKeys(schema)
const rendered = render(attrKeys)
if (checkOnly) {
const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null)
if (existing !== rendered) {
throw new Error(
'Generated trace attributes contract is stale. Run: bun run trace-attributes-contract:generate',
)
}
console.log('Trace attributes contract is up to date.')
return
}
await mkdir(dirname(OUTPUT_PATH), { recursive: true })
await writeFile(OUTPUT_PATH, rendered, 'utf8')
console.log(`Generated trace attributes types -> ${OUTPUT_PATH}`)
}
main().catch((err) => {
console.error(err)
process.exit(1)
})

View File

@@ -0,0 +1,137 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises'
import { dirname, resolve } from 'node:path'
import { fileURLToPath } from 'node:url'
/**
* Generate `apps/sim/lib/copilot/generated/trace-events-v1.ts` from
* the Go-side `contracts/trace-events-v1.schema.json` contract.
*
* Mirrors the span-names + attribute-keys sync scripts exactly — the
* only difference is the $defs key (`TraceEventsV1Name`), the output
* path, and the generated const name (`TraceEvent`). Keeping the
* scripts structurally identical means a reader who understands one
* understands all three, and drift between them gets caught
* immediately in code review.
*/
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url))
const ROOT = resolve(SCRIPT_DIR, '..')
const DEFAULT_CONTRACT_PATH = resolve(
ROOT,
'../copilot/copilot/contracts/trace-events-v1.schema.json',
)
const OUTPUT_PATH = resolve(
ROOT,
'apps/sim/lib/copilot/generated/trace-events-v1.ts',
)
function extractEventNames(schema: Record<string, unknown>): string[] {
const defs = (schema.$defs ?? {}) as Record<string, unknown>
const nameDef = defs.TraceEventsV1Name
if (
!nameDef ||
typeof nameDef !== 'object' ||
!Array.isArray((nameDef as Record<string, unknown>).enum)
) {
throw new Error(
'trace-events-v1.schema.json is missing $defs.TraceEventsV1Name.enum',
)
}
const enumValues = (nameDef as Record<string, unknown>).enum as unknown[]
if (!enumValues.every((v) => typeof v === 'string')) {
throw new Error('TraceEventsV1Name enum must be string-only')
}
return (enumValues as string[]).slice().sort()
}
function toIdentifier(name: string): string {
const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean)
if (parts.length === 0) {
throw new Error(`Cannot derive identifier for event name: ${name}`)
}
const ident = parts
.map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase())
.join('')
if (/^[0-9]/.test(ident)) {
throw new Error(
`Derived identifier "${ident}" for event "${name}" starts with a digit`,
)
}
return ident
}
function render(eventNames: string[]): string {
const pairs = eventNames.map((name) => ({ name, ident: toIdentifier(name) }))
const seen = new Map<string, string>()
for (const p of pairs) {
const prev = seen.get(p.ident)
if (prev && prev !== p.name) {
throw new Error(
`Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`,
)
}
seen.set(p.ident, p.name)
}
const constLines = pairs
.map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`)
.join('\n')
const arrayEntries = eventNames.map((n) => ` ${JSON.stringify(n)},`).join('\n')
return `// AUTO-GENERATED FILE. DO NOT EDIT.
//
// Source: copilot/copilot/contracts/trace-events-v1.schema.json
// Regenerate with: bun run trace-events-contract:generate
//
// Canonical mothership OTel span event names. Call sites should
// reference \`TraceEvent.<Identifier>\` (e.g.
// \`TraceEvent.RequestCancelled\`) rather than raw string literals,
// so the Go-side contract is the single source of truth and typos
// become compile errors.
export const TraceEvent = {
${constLines}
} as const;
export type TraceEventKey = keyof typeof TraceEvent;
export type TraceEventValue = (typeof TraceEvent)[TraceEventKey];
/** Readonly sorted list of every canonical event name. */
export const TraceEventValues: readonly TraceEventValue[] = [
${arrayEntries}
] as const;
`
}
async function main() {
const checkOnly = process.argv.includes('--check')
const inputArg = process.argv.find((a) => a.startsWith('--input='))
const inputPath = inputArg
? resolve(ROOT, inputArg.slice('--input='.length))
: DEFAULT_CONTRACT_PATH
const raw = await readFile(inputPath, 'utf8')
const schema = JSON.parse(raw)
const eventNames = extractEventNames(schema)
const rendered = render(eventNames)
if (checkOnly) {
const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null)
if (existing !== rendered) {
throw new Error(
'Generated trace events contract is stale. Run: bun run trace-events-contract:generate',
)
}
console.log('Trace events contract is up to date.')
return
}
await mkdir(dirname(OUTPUT_PATH), { recursive: true })
await writeFile(OUTPUT_PATH, rendered, 'utf8')
console.log(`Generated trace events types -> ${OUTPUT_PATH}`)
}
main().catch((err) => {
console.error(err)
process.exit(1)
})

View File

@@ -0,0 +1,155 @@
import { mkdir, readFile, writeFile } from 'node:fs/promises'
import { dirname, resolve } from 'node:path'
import { fileURLToPath } from 'node:url'
/**
* Generate `apps/sim/lib/copilot/generated/trace-spans-v1.ts` from the
* Go-side `contracts/trace-spans-v1.schema.json` contract.
*
* The contract is a single-enum JSON Schema. We emit:
* - A `TraceSpansV1Name` const object (key-as-value) for ergonomic
* access: `TraceSpansV1Name['copilot.vfs.read_file']`.
* - A `TraceSpansV1NameValue` union type.
* - A sorted `TraceSpansV1Names` readonly array (useful for tests that
* verify coverage, and for tooling that wants to enumerate names).
*
* We deliberately do NOT pass through `json-schema-to-typescript` —
* it would generate a noisy `TraceSpansV1` object type for the wrapper
* that drives reflection; the wrapper type has no runtime use on the Sim
* side and would obscure the actual enum.
*/
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url))
const ROOT = resolve(SCRIPT_DIR, '..')
const DEFAULT_CONTRACT_PATH = resolve(
ROOT,
'../copilot/copilot/contracts/trace-spans-v1.schema.json',
)
const OUTPUT_PATH = resolve(
ROOT,
'apps/sim/lib/copilot/generated/trace-spans-v1.ts',
)
function extractSpanNames(schema: Record<string, unknown>): string[] {
const defs = (schema.$defs ?? {}) as Record<string, unknown>
const nameDef = defs.TraceSpansV1Name
if (
!nameDef ||
typeof nameDef !== 'object' ||
!Array.isArray((nameDef as Record<string, unknown>).enum)
) {
throw new Error(
'trace-spans-v1.schema.json is missing $defs.TraceSpansV1Name.enum',
)
}
const enumValues = (nameDef as Record<string, unknown>).enum as unknown[]
if (!enumValues.every((v) => typeof v === 'string')) {
throw new Error('TraceSpansV1Name enum must be string-only')
}
return (enumValues as string[]).slice().sort()
}
/**
* Convert a wire name like "copilot.recovery.check_replay_gap" into an
* identifier-safe PascalCase key like "CopilotRecoveryCheckReplayGap",
* so call sites read as `TraceSpan.CopilotRecoveryCheckReplayGap`
* instead of `TraceSpan["copilot.recovery.check_replay_gap"]`.
*
* Splits on `.`, `_`, and non-alphanumeric characters; capitalizes each
* part; collapses. Strict mapping (not a best-effort heuristic), so the
* same input always produces the same identifier.
*/
function toIdentifier(name: string): string {
const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean)
if (parts.length === 0) {
throw new Error(`Cannot derive identifier for span name: ${name}`)
}
const ident = parts
.map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase())
.join('')
// Safety: identifiers may not start with a digit.
if (/^[0-9]/.test(ident)) {
throw new Error(
`Derived identifier "${ident}" for span "${name}" starts with a digit`,
)
}
return ident
}
function render(spanNames: string[]): string {
const pairs = spanNames.map((name) => ({ name, ident: toIdentifier(name) }))
// Guard against collisions: if two wire names ever collapse to the
// same PascalCase identifier, we want a clear build failure, not a
// silent override.
const seen = new Map<string, string>()
for (const p of pairs) {
const prev = seen.get(p.ident)
if (prev && prev !== p.name) {
throw new Error(
`Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`,
)
}
seen.set(p.ident, p.name)
}
const constLines = pairs
.map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`)
.join('\n')
const arrayEntries = spanNames.map((n) => ` ${JSON.stringify(n)},`).join('\n')
return `// AUTO-GENERATED FILE. DO NOT EDIT.
//
// Source: copilot/copilot/contracts/trace-spans-v1.schema.json
// Regenerate with: bun run trace-spans-contract:generate
//
// Canonical mothership OTel span names. Call sites should reference
// \`TraceSpan.<Identifier>\` (e.g. \`TraceSpan.CopilotVfsReadFile\`)
// rather than raw string literals, so the Go-side contract is the
// single source of truth and typos become compile errors.
export const TraceSpan = {
${constLines}
} as const;
export type TraceSpanKey = keyof typeof TraceSpan;
export type TraceSpanValue = (typeof TraceSpan)[TraceSpanKey];
/** Readonly sorted list of every canonical span name. */
export const TraceSpanValues: readonly TraceSpanValue[] = [
${arrayEntries}
] as const;
`
}
async function main() {
const checkOnly = process.argv.includes('--check')
const inputArg = process.argv.find((a) => a.startsWith('--input='))
const inputPath = inputArg
? resolve(ROOT, inputArg.slice('--input='.length))
: DEFAULT_CONTRACT_PATH
const raw = await readFile(inputPath, 'utf8')
const schema = JSON.parse(raw)
const spanNames = extractSpanNames(schema)
const rendered = render(spanNames)
if (checkOnly) {
const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null)
if (existing !== rendered) {
throw new Error(
'Generated trace spans contract is stale. Run: bun run trace-spans-contract:generate',
)
}
console.log('Trace spans contract is up to date.')
return
}
await mkdir(dirname(OUTPUT_PATH), { recursive: true })
await writeFile(OUTPUT_PATH, rendered, 'utf8')
console.log(`Generated trace spans types -> ${OUTPUT_PATH}`)
}
main().catch((err) => {
console.error(err)
process.exit(1)
})