mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-28 03:00:29 -04:00
v0.6.54: mothership tracing, db pool size increase
This commit is contained in:
@@ -5,7 +5,11 @@ import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { z } from 'zod'
|
||||
import { recordUsage } from '@/lib/billing/core/usage-log'
|
||||
import { checkAndBillOverageThreshold } from '@/lib/billing/threshold-billing'
|
||||
import { BillingRouteOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { checkInternalApiKey } from '@/lib/copilot/request/http'
|
||||
import { withIncomingGoSpan } from '@/lib/copilot/request/otel'
|
||||
import { isBillingEnabled } from '@/lib/core/config/feature-flags'
|
||||
import { type AtomicClaimResult, billingIdempotency } from '@/lib/core/idempotency/service'
|
||||
import { generateRequestId } from '@/lib/core/utils/request'
|
||||
@@ -28,8 +32,28 @@ const UpdateCostSchema = z.object({
|
||||
/**
|
||||
* POST /api/billing/update-cost
|
||||
* Update user cost with a pre-calculated cost value (internal API key auth required)
|
||||
*
|
||||
* Parented under the Go-side `sim.update_cost` span via W3C traceparent
|
||||
* propagation. Every mothership request that bills should therefore show
|
||||
* the Go client span AND this Sim server span sharing one trace, with
|
||||
* the actual usage/overage work nested below.
|
||||
*/
|
||||
export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
export const POST = withRouteHandler((req: NextRequest) =>
|
||||
withIncomingGoSpan(
|
||||
req.headers,
|
||||
TraceSpan.CopilotBillingUpdateCost,
|
||||
{
|
||||
[TraceAttr.HttpMethod]: 'POST',
|
||||
[TraceAttr.HttpRoute]: '/api/billing/update-cost',
|
||||
},
|
||||
async (span) => updateCostInner(req, span)
|
||||
)
|
||||
)
|
||||
|
||||
async function updateCostInner(
|
||||
req: NextRequest,
|
||||
span: import('@opentelemetry/api').Span
|
||||
): Promise<NextResponse> {
|
||||
const requestId = generateRequestId()
|
||||
const startTime = Date.now()
|
||||
let claim: AtomicClaimResult | null = null
|
||||
@@ -39,6 +63,8 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
logger.info(`[${requestId}] Update cost request started`)
|
||||
|
||||
if (!isBillingEnabled) {
|
||||
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.BillingDisabled)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 200)
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
message: 'Billing disabled, cost update skipped',
|
||||
@@ -54,6 +80,8 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
const authResult = checkInternalApiKey(req)
|
||||
if (!authResult.success) {
|
||||
logger.warn(`[${requestId}] Authentication failed: ${authResult.error}`)
|
||||
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.AuthFailed)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 401)
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
@@ -69,8 +97,9 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
if (!validation.success) {
|
||||
logger.warn(`[${requestId}] Invalid request body`, {
|
||||
errors: validation.error.issues,
|
||||
body,
|
||||
})
|
||||
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.InvalidBody)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 400)
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
@@ -85,6 +114,17 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
validation.data
|
||||
const isMcp = source === 'mcp_copilot'
|
||||
|
||||
span.setAttributes({
|
||||
[TraceAttr.UserId]: userId,
|
||||
[TraceAttr.GenAiRequestModel]: model,
|
||||
[TraceAttr.BillingSource]: source,
|
||||
[TraceAttr.BillingCostUsd]: cost,
|
||||
[TraceAttr.GenAiUsageInputTokens]: inputTokens,
|
||||
[TraceAttr.GenAiUsageOutputTokens]: outputTokens,
|
||||
[TraceAttr.BillingIsMcp]: isMcp,
|
||||
...(idempotencyKey ? { [TraceAttr.BillingIdempotencyKey]: idempotencyKey } : {}),
|
||||
})
|
||||
|
||||
claim = idempotencyKey
|
||||
? await billingIdempotency.atomicallyClaim('update-cost', idempotencyKey)
|
||||
: null
|
||||
@@ -95,6 +135,8 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
userId,
|
||||
source,
|
||||
})
|
||||
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.DuplicateIdempotencyKey)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 409)
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
@@ -159,6 +201,9 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
cost,
|
||||
})
|
||||
|
||||
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.Billed)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 200)
|
||||
span.setAttribute(TraceAttr.BillingDurationMs, duration)
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
data: {
|
||||
@@ -193,6 +238,9 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
)
|
||||
}
|
||||
|
||||
span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.InternalError)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 500)
|
||||
span.setAttribute(TraceAttr.BillingDurationMs, duration)
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
@@ -202,4 +250,4 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
{ status: 500 }
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@ import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { z } from 'zod'
|
||||
import { getSession } from '@/lib/auth'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
import { env } from '@/lib/core/config/env'
|
||||
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
|
||||
|
||||
@@ -33,13 +35,16 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
|
||||
const { name } = validationResult.data
|
||||
|
||||
const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/generate`, {
|
||||
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/generate`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
|
||||
},
|
||||
body: JSON.stringify({ userId, name }),
|
||||
spanName: 'sim → go /api/validate-key/generate',
|
||||
operation: 'generate_api_key',
|
||||
attributes: { [TraceAttr.UserId]: userId },
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
|
||||
@@ -20,6 +20,22 @@ vi.mock('@/lib/core/config/env', () => createEnvMock({ COPILOT_API_KEY: 'test-ap
|
||||
|
||||
import { DELETE, GET } from '@/app/api/copilot/api-keys/route'
|
||||
|
||||
// `fetchGo` reads `response.status` and `response.headers.get('content-length')`
|
||||
// to stamp span attributes, so mock responses need both fields or the call
|
||||
// path throws before the route handler sees the body.
|
||||
function buildMockResponse(init: {
|
||||
ok: boolean
|
||||
status?: number
|
||||
json: () => Promise<unknown>
|
||||
}): Record<string, unknown> {
|
||||
return {
|
||||
ok: init.ok,
|
||||
status: init.status ?? (init.ok ? 200 : 500),
|
||||
headers: new Headers(),
|
||||
json: init.json,
|
||||
}
|
||||
}
|
||||
|
||||
describe('Copilot API Keys API Route', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
@@ -60,10 +76,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
},
|
||||
]
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockApiKeys),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockApiKeys),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
|
||||
const response = await GET(request)
|
||||
@@ -83,10 +101,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve([]),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve([]),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
|
||||
const response = await GET(request)
|
||||
@@ -101,10 +121,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve([]),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve([]),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
|
||||
await GET(request)
|
||||
@@ -127,11 +149,13 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 503,
|
||||
json: () => Promise.resolve({ error: 'Service unavailable' }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: false,
|
||||
status: 503,
|
||||
json: () => Promise.resolve({ error: 'Service unavailable' }),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
|
||||
const response = await GET(request)
|
||||
@@ -146,10 +170,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ invalid: 'response' }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ invalid: 'response' }),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
|
||||
const response = await GET(request)
|
||||
@@ -189,10 +215,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
},
|
||||
]
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockApiKeys),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockApiKeys),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
|
||||
const response = await GET(request)
|
||||
@@ -207,10 +235,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.reject(new Error('Invalid JSON')),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.reject(new Error('Invalid JSON')),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys')
|
||||
const response = await GET(request)
|
||||
@@ -251,10 +281,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ success: true }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ success: true }),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys?id=key-123')
|
||||
const response = await DELETE(request)
|
||||
@@ -281,11 +313,13 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status: 404,
|
||||
json: () => Promise.resolve({ error: 'Key not found' }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: false,
|
||||
status: 404,
|
||||
json: () => Promise.resolve({ error: 'Key not found' }),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys?id=non-existent')
|
||||
const response = await DELETE(request)
|
||||
@@ -300,10 +334,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ success: false }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ success: false }),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys?id=key-123')
|
||||
const response = await DELETE(request)
|
||||
@@ -333,10 +369,12 @@ describe('Copilot API Keys API Route', () => {
|
||||
user: { id: 'user-123', email: 'test@example.com' },
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.reject(new Error('Invalid JSON')),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.reject(new Error('Invalid JSON')),
|
||||
})
|
||||
)
|
||||
|
||||
const request = new NextRequest('http://localhost:3000/api/copilot/api-keys?id=key-123')
|
||||
const response = await DELETE(request)
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { getSession } from '@/lib/auth'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
import { env } from '@/lib/core/config/env'
|
||||
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
|
||||
|
||||
@@ -13,13 +15,16 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
|
||||
|
||||
const userId = session.user.id
|
||||
|
||||
const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/get-api-keys`, {
|
||||
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/get-api-keys`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
|
||||
},
|
||||
body: JSON.stringify({ userId }),
|
||||
spanName: 'sim → go /api/validate-key/get-api-keys',
|
||||
operation: 'get_api_keys',
|
||||
attributes: { [TraceAttr.UserId]: userId },
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
@@ -67,13 +72,16 @@ export const DELETE = withRouteHandler(async (request: NextRequest) => {
|
||||
return NextResponse.json({ error: 'id is required' }, { status: 400 })
|
||||
}
|
||||
|
||||
const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/delete`, {
|
||||
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/delete`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
|
||||
},
|
||||
body: JSON.stringify({ userId, apiKeyId: id }),
|
||||
spanName: 'sim → go /api/validate-key/delete',
|
||||
operation: 'delete_api_key',
|
||||
attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ApiKeyId]: id },
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
|
||||
@@ -5,7 +5,11 @@ import { eq } from 'drizzle-orm'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { z } from 'zod'
|
||||
import { checkServerSideUsageLimits } from '@/lib/billing/calculations/usage-monitor'
|
||||
import { CopilotValidateOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { checkInternalApiKey } from '@/lib/copilot/request/http'
|
||||
import { withIncomingGoSpan } from '@/lib/copilot/request/otel'
|
||||
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
|
||||
|
||||
const logger = createLogger('CopilotApiKeysValidate')
|
||||
@@ -14,55 +18,87 @@ const ValidateApiKeySchema = z.object({
|
||||
userId: z.string().min(1, 'userId is required'),
|
||||
})
|
||||
|
||||
export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
try {
|
||||
const auth = checkInternalApiKey(req)
|
||||
if (!auth.success) {
|
||||
return new NextResponse(null, { status: 401 })
|
||||
// Incoming-from-Go: extracts traceparent so this handler's work shows
|
||||
// up as a child of the Go-side `sim.validate_api_key` span in the same
|
||||
// trace. If there's no traceparent (manual curl / browser), the helper
|
||||
// falls back to a new root span.
|
||||
export const POST = withRouteHandler((req: NextRequest) =>
|
||||
withIncomingGoSpan(
|
||||
req.headers,
|
||||
TraceSpan.CopilotAuthValidateApiKey,
|
||||
{
|
||||
[TraceAttr.HttpMethod]: 'POST',
|
||||
[TraceAttr.HttpRoute]: '/api/copilot/api-keys/validate',
|
||||
},
|
||||
async (span) => {
|
||||
try {
|
||||
const auth = checkInternalApiKey(req)
|
||||
if (!auth.success) {
|
||||
span.setAttribute(
|
||||
TraceAttr.CopilotValidateOutcome,
|
||||
CopilotValidateOutcome.InternalAuthFailed
|
||||
)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 401)
|
||||
return new NextResponse(null, { status: 401 })
|
||||
}
|
||||
|
||||
const body = await req.json().catch(() => null)
|
||||
const validationResult = ValidateApiKeySchema.safeParse(body)
|
||||
if (!validationResult.success) {
|
||||
logger.warn('Invalid validation request', { errors: validationResult.error.errors })
|
||||
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.InvalidBody)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 400)
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: 'userId is required',
|
||||
details: validationResult.error.errors,
|
||||
},
|
||||
{ status: 400 }
|
||||
)
|
||||
}
|
||||
|
||||
const { userId } = validationResult.data
|
||||
span.setAttribute(TraceAttr.UserId, userId)
|
||||
|
||||
const [existingUser] = await db.select().from(user).where(eq(user.id, userId)).limit(1)
|
||||
if (!existingUser) {
|
||||
logger.warn('[API VALIDATION] userId does not exist', { userId })
|
||||
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.UserNotFound)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 403)
|
||||
return NextResponse.json({ error: 'User not found' }, { status: 403 })
|
||||
}
|
||||
|
||||
logger.info('[API VALIDATION] Validating usage limit', { userId })
|
||||
const { isExceeded, currentUsage, limit } = await checkServerSideUsageLimits(userId)
|
||||
span.setAttributes({
|
||||
[TraceAttr.BillingUsageCurrent]: currentUsage,
|
||||
[TraceAttr.BillingUsageLimit]: limit,
|
||||
[TraceAttr.BillingUsageExceeded]: isExceeded,
|
||||
})
|
||||
|
||||
logger.info('[API VALIDATION] Usage limit validated', {
|
||||
userId,
|
||||
currentUsage,
|
||||
limit,
|
||||
isExceeded,
|
||||
})
|
||||
|
||||
if (isExceeded) {
|
||||
logger.info('[API VALIDATION] Usage exceeded', { userId, currentUsage, limit })
|
||||
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.UsageExceeded)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 402)
|
||||
return new NextResponse(null, { status: 402 })
|
||||
}
|
||||
|
||||
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.Ok)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 200)
|
||||
return new NextResponse(null, { status: 200 })
|
||||
} catch (error) {
|
||||
logger.error('Error validating usage limit', { error })
|
||||
span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.InternalError)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, 500)
|
||||
return NextResponse.json({ error: 'Failed to validate usage' }, { status: 500 })
|
||||
}
|
||||
}
|
||||
|
||||
const body = await req.json().catch(() => null)
|
||||
|
||||
const validationResult = ValidateApiKeySchema.safeParse(body)
|
||||
|
||||
if (!validationResult.success) {
|
||||
logger.warn('Invalid validation request', { errors: validationResult.error.errors })
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: 'userId is required',
|
||||
details: validationResult.error.errors,
|
||||
},
|
||||
{ status: 400 }
|
||||
)
|
||||
}
|
||||
|
||||
const { userId } = validationResult.data
|
||||
|
||||
const [existingUser] = await db.select().from(user).where(eq(user.id, userId)).limit(1)
|
||||
if (!existingUser) {
|
||||
logger.warn('[API VALIDATION] userId does not exist', { userId })
|
||||
return NextResponse.json({ error: 'User not found' }, { status: 403 })
|
||||
}
|
||||
|
||||
logger.info('[API VALIDATION] Validating usage limit', { userId })
|
||||
|
||||
const { isExceeded, currentUsage, limit } = await checkServerSideUsageLimits(userId)
|
||||
|
||||
logger.info('[API VALIDATION] Usage limit validated', {
|
||||
userId,
|
||||
currentUsage,
|
||||
limit,
|
||||
isExceeded,
|
||||
})
|
||||
|
||||
if (isExceeded) {
|
||||
logger.info('[API VALIDATION] Usage exceeded', { userId, currentUsage, limit })
|
||||
return new NextResponse(null, { status: 402 })
|
||||
}
|
||||
|
||||
return new NextResponse(null, { status: 200 })
|
||||
} catch (error) {
|
||||
logger.error('Error validating usage limit', { error })
|
||||
return NextResponse.json({ error: 'Failed to validate usage' }, { status: 500 })
|
||||
}
|
||||
})
|
||||
)
|
||||
)
|
||||
|
||||
@@ -2,6 +2,8 @@ import { createLogger } from '@sim/logger'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { getSession } from '@/lib/auth'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
import { env } from '@/lib/core/config/env'
|
||||
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
|
||||
|
||||
@@ -31,9 +33,15 @@ export const GET = withRouteHandler(async () => {
|
||||
|
||||
const userId = session.user.id
|
||||
|
||||
const res = await fetch(
|
||||
const res = await fetchGo(
|
||||
`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed?userId=${encodeURIComponent(userId)}`,
|
||||
{ method: 'GET', headers: copilotHeaders() }
|
||||
{
|
||||
method: 'GET',
|
||||
headers: copilotHeaders(),
|
||||
spanName: 'sim → go /api/tool-preferences/auto-allowed',
|
||||
operation: 'list_auto_allowed_tools',
|
||||
attributes: { [TraceAttr.UserId]: userId },
|
||||
}
|
||||
)
|
||||
|
||||
if (!res.ok) {
|
||||
@@ -67,10 +75,13 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
|
||||
return NextResponse.json({ error: 'toolId must be a string' }, { status: 400 })
|
||||
}
|
||||
|
||||
const res = await fetch(`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed`, {
|
||||
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed`, {
|
||||
method: 'POST',
|
||||
headers: copilotHeaders(),
|
||||
body: JSON.stringify({ userId, toolId: body.toolId }),
|
||||
spanName: 'sim → go /api/tool-preferences/auto-allowed',
|
||||
operation: 'add_auto_allowed_tool',
|
||||
attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ToolId]: body.toolId },
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
@@ -108,9 +119,15 @@ export const DELETE = withRouteHandler(async (request: NextRequest) => {
|
||||
return NextResponse.json({ error: 'toolId query parameter is required' }, { status: 400 })
|
||||
}
|
||||
|
||||
const res = await fetch(
|
||||
const res = await fetchGo(
|
||||
`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed?userId=${encodeURIComponent(userId)}&toolId=${encodeURIComponent(toolId)}`,
|
||||
{ method: 'DELETE', headers: copilotHeaders() }
|
||||
{
|
||||
method: 'DELETE',
|
||||
headers: copilotHeaders(),
|
||||
spanName: 'sim → go /api/tool-preferences/auto-allowed',
|
||||
operation: 'remove_auto_allowed_tool',
|
||||
attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ToolId]: toolId },
|
||||
}
|
||||
)
|
||||
|
||||
if (!res.ok) {
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { NextResponse } from 'next/server'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { getLatestRunForStream } from '@/lib/copilot/async-runs/repository'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { CopilotAbortOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http'
|
||||
import { withCopilotSpan, withIncomingGoSpan } from '@/lib/copilot/request/otel'
|
||||
import { abortActiveStream, waitForPendingChatStream } from '@/lib/copilot/request/session'
|
||||
import { env } from '@/lib/core/config/env'
|
||||
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
|
||||
@@ -12,81 +16,136 @@ const logger = createLogger('CopilotChatAbortAPI')
|
||||
const GO_EXPLICIT_ABORT_TIMEOUT_MS = 3000
|
||||
const STREAM_ABORT_SETTLE_TIMEOUT_MS = 8000
|
||||
|
||||
export const POST = withRouteHandler(async (request: Request) => {
|
||||
const { userId: authenticatedUserId, isAuthenticated } =
|
||||
await authenticateCopilotRequestSessionOnly()
|
||||
// POST /api/copilot/chat/abort — fires on user Stop; marks the Go
|
||||
// side aborted then waits for the prior stream to settle.
|
||||
export const POST = withRouteHandler((request: NextRequest) =>
|
||||
withIncomingGoSpan(
|
||||
request.headers,
|
||||
TraceSpan.CopilotChatAbortStream,
|
||||
undefined,
|
||||
async (rootSpan) => {
|
||||
const { userId: authenticatedUserId, isAuthenticated } =
|
||||
await authenticateCopilotRequestSessionOnly()
|
||||
|
||||
if (!isAuthenticated || !authenticatedUserId) {
|
||||
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
|
||||
}
|
||||
if (!isAuthenticated || !authenticatedUserId) {
|
||||
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.Unauthorized)
|
||||
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
|
||||
}
|
||||
|
||||
const body = await request.json().catch((err) => {
|
||||
logger.warn('Abort request body parse failed; continuing with empty object', {
|
||||
error: toError(err).message,
|
||||
})
|
||||
return {}
|
||||
})
|
||||
const streamId = typeof body.streamId === 'string' ? body.streamId : ''
|
||||
let chatId = typeof body.chatId === 'string' ? body.chatId : ''
|
||||
|
||||
if (!streamId) {
|
||||
return NextResponse.json({ error: 'streamId is required' }, { status: 400 })
|
||||
}
|
||||
|
||||
if (!chatId) {
|
||||
const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => {
|
||||
logger.warn('getLatestRunForStream failed while resolving chatId for abort', {
|
||||
streamId,
|
||||
error: toError(err).message,
|
||||
const body = await request.json().catch((err) => {
|
||||
logger.warn('Abort request body parse failed; continuing with empty object', {
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
return {}
|
||||
})
|
||||
return null
|
||||
})
|
||||
if (run?.chatId) {
|
||||
chatId = run.chatId
|
||||
}
|
||||
}
|
||||
const streamId = typeof body.streamId === 'string' ? body.streamId : ''
|
||||
let chatId = typeof body.chatId === 'string' ? body.chatId : ''
|
||||
|
||||
try {
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
if (env.COPILOT_API_KEY) {
|
||||
headers['x-api-key'] = env.COPILOT_API_KEY
|
||||
}
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(
|
||||
() => controller.abort('timeout:go_explicit_abort_fetch'),
|
||||
GO_EXPLICIT_ABORT_TIMEOUT_MS
|
||||
)
|
||||
const response = await fetch(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
body: JSON.stringify({
|
||||
messageId: streamId,
|
||||
userId: authenticatedUserId,
|
||||
...(chatId ? { chatId } : {}),
|
||||
}),
|
||||
}).finally(() => clearTimeout(timeout))
|
||||
if (!response.ok) {
|
||||
throw new Error(`Explicit abort marker request failed: ${response.status}`)
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('Explicit abort marker request failed; proceeding with local abort', {
|
||||
streamId,
|
||||
error: toError(err).message,
|
||||
})
|
||||
}
|
||||
if (!streamId) {
|
||||
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.MissingStreamId)
|
||||
return NextResponse.json({ error: 'streamId is required' }, { status: 400 })
|
||||
}
|
||||
rootSpan.setAttributes({
|
||||
[TraceAttr.StreamId]: streamId,
|
||||
[TraceAttr.UserId]: authenticatedUserId,
|
||||
})
|
||||
|
||||
const aborted = await abortActiveStream(streamId)
|
||||
if (chatId) {
|
||||
const settled = await waitForPendingChatStream(chatId, STREAM_ABORT_SETTLE_TIMEOUT_MS, streamId)
|
||||
if (!settled) {
|
||||
return NextResponse.json(
|
||||
{ error: 'Previous response is still shutting down', aborted, settled: false },
|
||||
{ status: 409 }
|
||||
)
|
||||
}
|
||||
return NextResponse.json({ aborted, settled: true })
|
||||
}
|
||||
if (!chatId) {
|
||||
const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => {
|
||||
logger.warn('getLatestRunForStream failed while resolving chatId for abort', {
|
||||
streamId,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
return null
|
||||
})
|
||||
if (run?.chatId) {
|
||||
chatId = run.chatId
|
||||
}
|
||||
}
|
||||
if (chatId) rootSpan.setAttribute(TraceAttr.ChatId, chatId)
|
||||
|
||||
return NextResponse.json({ aborted })
|
||||
})
|
||||
// Local abort before Go — lets the lifecycle classifier see
|
||||
// `signal.aborted` with an explicit-stop reason before Go's
|
||||
// context-canceled error propagates back. Go's endpoint runs
|
||||
// second for billing-ledger flush; Go's context is already
|
||||
// cancelled by then.
|
||||
const aborted = await abortActiveStream(streamId)
|
||||
rootSpan.setAttribute(TraceAttr.CopilotAbortLocalAborted, aborted)
|
||||
|
||||
let goAbortOk = false
|
||||
try {
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
if (env.COPILOT_API_KEY) {
|
||||
headers['x-api-key'] = env.COPILOT_API_KEY
|
||||
}
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(
|
||||
() => controller.abort('timeout:go_explicit_abort_fetch'),
|
||||
GO_EXPLICIT_ABORT_TIMEOUT_MS
|
||||
)
|
||||
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
body: JSON.stringify({
|
||||
messageId: streamId,
|
||||
userId: authenticatedUserId,
|
||||
...(chatId ? { chatId } : {}),
|
||||
}),
|
||||
spanName: 'sim → go /api/streams/explicit-abort',
|
||||
operation: 'explicit_abort',
|
||||
attributes: {
|
||||
[TraceAttr.StreamId]: streamId,
|
||||
...(chatId ? { [TraceAttr.ChatId]: chatId } : {}),
|
||||
},
|
||||
}).finally(() => clearTimeout(timeout))
|
||||
if (!response.ok) {
|
||||
throw new Error(`Explicit abort marker request failed: ${response.status}`)
|
||||
}
|
||||
goAbortOk = true
|
||||
} catch (err) {
|
||||
logger.warn('Explicit abort marker request failed after local abort', {
|
||||
streamId,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
}
|
||||
rootSpan.setAttribute(TraceAttr.CopilotAbortGoMarkerOk, goAbortOk)
|
||||
|
||||
if (chatId) {
|
||||
const settled = await withCopilotSpan(
|
||||
TraceSpan.CopilotChatAbortWaitSettle,
|
||||
{
|
||||
[TraceAttr.ChatId]: chatId,
|
||||
[TraceAttr.StreamId]: streamId,
|
||||
[TraceAttr.SettleTimeoutMs]: STREAM_ABORT_SETTLE_TIMEOUT_MS,
|
||||
},
|
||||
async (settleSpan) => {
|
||||
const start = Date.now()
|
||||
const ok = await waitForPendingChatStream(
|
||||
chatId,
|
||||
STREAM_ABORT_SETTLE_TIMEOUT_MS,
|
||||
streamId
|
||||
)
|
||||
settleSpan.setAttributes({
|
||||
[TraceAttr.SettleWaitMs]: Date.now() - start,
|
||||
[TraceAttr.SettleCompleted]: ok,
|
||||
})
|
||||
return ok
|
||||
}
|
||||
)
|
||||
if (!settled) {
|
||||
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.SettleTimeout)
|
||||
return NextResponse.json(
|
||||
{ error: 'Previous response is still shutting down', aborted, settled: false },
|
||||
{ status: 409 }
|
||||
)
|
||||
}
|
||||
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.Settled)
|
||||
return NextResponse.json({ aborted, settled: true })
|
||||
}
|
||||
|
||||
rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.NoChatId)
|
||||
return NextResponse.json({ aborted })
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
@@ -7,6 +7,10 @@ import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { z } from 'zod'
|
||||
import { getSession } from '@/lib/auth'
|
||||
import { normalizeMessage, type PersistedMessage } from '@/lib/copilot/chat/persisted-message'
|
||||
import { CopilotStopOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { withIncomingGoSpan } from '@/lib/copilot/request/otel'
|
||||
import { taskPubSub } from '@/lib/copilot/tasks'
|
||||
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
|
||||
|
||||
@@ -55,95 +59,121 @@ const StopSchema = z.object({
|
||||
streamId: z.string(),
|
||||
content: z.string(),
|
||||
contentBlocks: z.array(ContentBlockSchema).optional(),
|
||||
// Optional for older clients; when present, flows into msg.requestId
|
||||
// so the UI's copy-request-ID button survives a stopped turn.
|
||||
requestId: z.string().optional(),
|
||||
})
|
||||
|
||||
/**
|
||||
* POST /api/copilot/chat/stop
|
||||
* Persists partial assistant content when the user stops a stream mid-response.
|
||||
* Clears conversationId so the server-side onComplete won't duplicate the message.
|
||||
* The chat stream lock is intentionally left alone here; it is released only once
|
||||
* the aborted server stream actually unwinds.
|
||||
*/
|
||||
export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
try {
|
||||
const session = await getSession()
|
||||
if (!session?.user?.id) {
|
||||
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
|
||||
}
|
||||
// POST /api/copilot/chat/stop — persists partial assistant content
|
||||
// when the user stops mid-stream. Lock release is handled by the
|
||||
// aborted server stream unwinding, not this handler.
|
||||
export const POST = withRouteHandler((req: NextRequest) =>
|
||||
withIncomingGoSpan(req.headers, TraceSpan.CopilotChatStopStream, undefined, async (span) => {
|
||||
try {
|
||||
const session = await getSession()
|
||||
if (!session?.user?.id) {
|
||||
span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.Unauthorized)
|
||||
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
|
||||
}
|
||||
|
||||
const { chatId, streamId, content, contentBlocks } = StopSchema.parse(await req.json())
|
||||
const [row] = await db
|
||||
.select({
|
||||
workspaceId: copilotChats.workspaceId,
|
||||
messages: copilotChats.messages,
|
||||
const { chatId, streamId, content, contentBlocks, requestId } = StopSchema.parse(
|
||||
await req.json()
|
||||
)
|
||||
span.setAttributes({
|
||||
[TraceAttr.ChatId]: chatId,
|
||||
[TraceAttr.StreamId]: streamId,
|
||||
[TraceAttr.UserId]: session.user.id,
|
||||
[TraceAttr.CopilotStopContentLength]: content.length,
|
||||
[TraceAttr.CopilotStopBlocksCount]: contentBlocks?.length ?? 0,
|
||||
...(requestId ? { [TraceAttr.RequestId]: requestId } : {}),
|
||||
})
|
||||
.from(copilotChats)
|
||||
.where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, session.user.id)))
|
||||
.limit(1)
|
||||
|
||||
if (!row) {
|
||||
const [row] = await db
|
||||
.select({
|
||||
workspaceId: copilotChats.workspaceId,
|
||||
messages: copilotChats.messages,
|
||||
})
|
||||
.from(copilotChats)
|
||||
.where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, session.user.id)))
|
||||
.limit(1)
|
||||
|
||||
if (!row) {
|
||||
span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.ChatNotFound)
|
||||
return NextResponse.json({ success: true })
|
||||
}
|
||||
|
||||
const messages: Record<string, unknown>[] = Array.isArray(row.messages) ? row.messages : []
|
||||
const userIdx = messages.findIndex((message) => message.id === streamId)
|
||||
const alreadyHasResponse =
|
||||
userIdx >= 0 &&
|
||||
userIdx + 1 < messages.length &&
|
||||
(messages[userIdx + 1] as Record<string, unknown>)?.role === 'assistant'
|
||||
const canAppendAssistant =
|
||||
userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse
|
||||
|
||||
const updateWhere = and(
|
||||
eq(copilotChats.id, chatId),
|
||||
eq(copilotChats.userId, session.user.id),
|
||||
eq(copilotChats.conversationId, streamId)
|
||||
)
|
||||
|
||||
const setClause: Record<string, unknown> = {
|
||||
conversationId: null,
|
||||
updatedAt: new Date(),
|
||||
}
|
||||
|
||||
const hasContent = content.trim().length > 0
|
||||
const hasBlocks = Array.isArray(contentBlocks) && contentBlocks.length > 0
|
||||
const synthesizedStoppedBlocks = hasBlocks
|
||||
? contentBlocks
|
||||
: hasContent
|
||||
? [{ type: 'text', channel: 'assistant', content }, { type: 'stopped' }]
|
||||
: [{ type: 'stopped' }]
|
||||
if (canAppendAssistant) {
|
||||
const normalized = normalizeMessage({
|
||||
id: generateId(),
|
||||
role: 'assistant',
|
||||
content,
|
||||
timestamp: new Date().toISOString(),
|
||||
contentBlocks: synthesizedStoppedBlocks,
|
||||
// Persist so the UI copy-request-id button survives refetch.
|
||||
...(requestId ? { requestId } : {}),
|
||||
})
|
||||
const assistantMessage: PersistedMessage = normalized
|
||||
setClause.messages = sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`
|
||||
}
|
||||
span.setAttribute(TraceAttr.CopilotStopAppendedAssistant, canAppendAssistant)
|
||||
|
||||
const [updated] = await db
|
||||
.update(copilotChats)
|
||||
.set(setClause)
|
||||
.where(updateWhere)
|
||||
.returning({ workspaceId: copilotChats.workspaceId })
|
||||
|
||||
if (updated?.workspaceId) {
|
||||
taskPubSub?.publishStatusChanged({
|
||||
workspaceId: updated.workspaceId,
|
||||
chatId,
|
||||
type: 'completed',
|
||||
})
|
||||
}
|
||||
|
||||
span.setAttribute(
|
||||
TraceAttr.CopilotStopOutcome,
|
||||
updated ? CopilotStopOutcome.Persisted : CopilotStopOutcome.NoMatchingRow
|
||||
)
|
||||
return NextResponse.json({ success: true })
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.ValidationError)
|
||||
return NextResponse.json(
|
||||
{ error: 'Invalid request data', details: error.errors },
|
||||
{ status: 400 }
|
||||
)
|
||||
}
|
||||
logger.error('Error stopping chat stream:', error)
|
||||
span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.InternalError)
|
||||
return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
|
||||
}
|
||||
|
||||
const messages: Record<string, unknown>[] = Array.isArray(row.messages) ? row.messages : []
|
||||
const userIdx = messages.findIndex((message) => message.id === streamId)
|
||||
const alreadyHasResponse =
|
||||
userIdx >= 0 &&
|
||||
userIdx + 1 < messages.length &&
|
||||
(messages[userIdx + 1] as Record<string, unknown>)?.role === 'assistant'
|
||||
const canAppendAssistant =
|
||||
userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse
|
||||
|
||||
const updateWhere = and(
|
||||
eq(copilotChats.id, chatId),
|
||||
eq(copilotChats.userId, session.user.id),
|
||||
eq(copilotChats.conversationId, streamId)
|
||||
)
|
||||
|
||||
const setClause: Record<string, unknown> = {
|
||||
conversationId: null,
|
||||
updatedAt: new Date(),
|
||||
}
|
||||
|
||||
const hasContent = content.trim().length > 0
|
||||
const hasBlocks = Array.isArray(contentBlocks) && contentBlocks.length > 0
|
||||
const synthesizedStoppedBlocks = hasBlocks
|
||||
? contentBlocks
|
||||
: hasContent
|
||||
? [{ type: 'text', channel: 'assistant', content }, { type: 'stopped' }]
|
||||
: [{ type: 'stopped' }]
|
||||
if (canAppendAssistant) {
|
||||
const normalized = normalizeMessage({
|
||||
id: generateId(),
|
||||
role: 'assistant',
|
||||
content,
|
||||
timestamp: new Date().toISOString(),
|
||||
contentBlocks: synthesizedStoppedBlocks,
|
||||
})
|
||||
const assistantMessage: PersistedMessage = normalized
|
||||
setClause.messages = sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`
|
||||
}
|
||||
|
||||
const [updated] = await db
|
||||
.update(copilotChats)
|
||||
.set(setClause)
|
||||
.where(updateWhere)
|
||||
.returning({ workspaceId: copilotChats.workspaceId })
|
||||
|
||||
if (updated?.workspaceId) {
|
||||
taskPubSub?.publishStatusChanged({
|
||||
workspaceId: updated.workspaceId,
|
||||
chatId,
|
||||
type: 'completed',
|
||||
})
|
||||
}
|
||||
|
||||
return NextResponse.json({ success: true })
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
return NextResponse.json({ error: 'Invalid request' }, { status: 400 })
|
||||
}
|
||||
logger.error('Error stopping chat stream:', error)
|
||||
return NextResponse.json({ error: 'Internal server error' }, { status: 500 })
|
||||
}
|
||||
})
|
||||
})
|
||||
)
|
||||
|
||||
@@ -160,4 +160,42 @@ describe('copilot chat stream replay route', () => {
|
||||
expect(body).toContain('"code":"resume_run_unavailable"')
|
||||
expect(body).toContain(`"type":"${MothershipStreamV1EventType.complete}"`)
|
||||
})
|
||||
|
||||
it('uses the latest live request id for synthetic terminal replay events', async () => {
|
||||
getLatestRunForStream
|
||||
.mockResolvedValueOnce({
|
||||
status: 'active',
|
||||
executionId: 'exec-1',
|
||||
id: 'run-1',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
status: 'cancelled',
|
||||
executionId: 'exec-1',
|
||||
id: 'run-1',
|
||||
})
|
||||
readEvents
|
||||
.mockResolvedValueOnce([
|
||||
{
|
||||
stream: { streamId: 'stream-1', cursor: '1' },
|
||||
seq: 1,
|
||||
trace: { requestId: 'req-live-123' },
|
||||
type: MothershipStreamV1EventType.text,
|
||||
payload: {
|
||||
channel: 'assistant',
|
||||
text: 'hello',
|
||||
},
|
||||
},
|
||||
])
|
||||
.mockResolvedValueOnce([])
|
||||
|
||||
const response = await GET(
|
||||
new NextRequest('http://localhost:3000/api/copilot/chat/stream?streamId=stream-1&after=0')
|
||||
)
|
||||
|
||||
const chunks = await readAllChunks(response)
|
||||
const terminalChunk = chunks[chunks.length - 1] ?? ''
|
||||
expect(terminalChunk).toContain(`"type":"${MothershipStreamV1EventType.complete}"`)
|
||||
expect(terminalChunk).toContain('"requestId":"req-live-123"')
|
||||
expect(terminalChunk).toContain('"status":"cancelled"')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,13 +1,20 @@
|
||||
import { context as otelContext, trace } from '@opentelemetry/api'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { sleep } from '@sim/utils/helpers'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { getLatestRunForStream } from '@/lib/copilot/async-runs/repository'
|
||||
import {
|
||||
MothershipStreamV1CompletionStatus,
|
||||
MothershipStreamV1EventType,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import {
|
||||
CopilotResumeOutcome,
|
||||
CopilotTransport,
|
||||
} from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { contextFromRequestHeaders } from '@/lib/copilot/request/go/propagation'
|
||||
import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http'
|
||||
import { getCopilotTracer, markSpanForError } from '@/lib/copilot/request/otel'
|
||||
import {
|
||||
checkForReplayGap,
|
||||
createEvent,
|
||||
@@ -25,6 +32,25 @@ const logger = createLogger('CopilotChatStreamAPI')
|
||||
const POLL_INTERVAL_MS = 250
|
||||
const MAX_STREAM_MS = 60 * 60 * 1000
|
||||
|
||||
function extractCanonicalRequestId(value: unknown): string {
|
||||
return typeof value === 'string' && value.length > 0 ? value : ''
|
||||
}
|
||||
|
||||
function extractRunRequestId(run: { requestContext?: unknown } | null | undefined): string {
|
||||
if (!run || typeof run.requestContext !== 'object' || run.requestContext === null) {
|
||||
return ''
|
||||
}
|
||||
const requestContext = run.requestContext as Record<string, unknown>
|
||||
return (
|
||||
extractCanonicalRequestId(requestContext.requestId) ||
|
||||
extractCanonicalRequestId(requestContext.simRequestId)
|
||||
)
|
||||
}
|
||||
|
||||
function extractEnvelopeRequestId(envelope: { trace?: { requestId?: unknown } }): string {
|
||||
return extractCanonicalRequestId(envelope.trace?.requestId)
|
||||
}
|
||||
|
||||
function isTerminalStatus(
|
||||
status: string | null | undefined
|
||||
): status is MothershipStreamV1CompletionStatus {
|
||||
@@ -42,10 +68,12 @@ function buildResumeTerminalEnvelopes(options: {
|
||||
message?: string
|
||||
code: string
|
||||
reason?: string
|
||||
requestId?: string
|
||||
}) {
|
||||
const baseSeq = Number(options.afterCursor || '0')
|
||||
const seq = Number.isFinite(baseSeq) ? baseSeq : 0
|
||||
const envelopes: ReturnType<typeof createEvent>[] = []
|
||||
const rid = options.requestId ?? ''
|
||||
|
||||
if (options.status === MothershipStreamV1CompletionStatus.error) {
|
||||
envelopes.push(
|
||||
@@ -53,7 +81,7 @@ function buildResumeTerminalEnvelopes(options: {
|
||||
streamId: options.streamId,
|
||||
cursor: String(seq + 1),
|
||||
seq: seq + 1,
|
||||
requestId: '',
|
||||
requestId: rid,
|
||||
type: MothershipStreamV1EventType.error,
|
||||
payload: {
|
||||
message: options.message || 'Stream recovery failed before completion.',
|
||||
@@ -68,7 +96,7 @@ function buildResumeTerminalEnvelopes(options: {
|
||||
streamId: options.streamId,
|
||||
cursor: String(seq + envelopes.length + 1),
|
||||
seq: seq + envelopes.length + 1,
|
||||
requestId: '',
|
||||
requestId: rid,
|
||||
type: MothershipStreamV1EventType.complete,
|
||||
payload: {
|
||||
status: options.status,
|
||||
@@ -97,10 +125,77 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
|
||||
return NextResponse.json({ error: 'streamId is required' }, { status: 400 })
|
||||
}
|
||||
|
||||
// Root span for the whole resume/reconnect request. In stream mode the
|
||||
// work happens inside `ReadableStream.start`, which the Node runtime
|
||||
// invokes after this function returns and OUTSIDE the AsyncLocalStorage
|
||||
// scope installed by `startActiveSpan`. We therefore start the span
|
||||
// manually, capture its context, and re-enter that context inside the
|
||||
// stream callback so every nested `withCopilotSpan` / `withDbSpan` call
|
||||
// attaches to this root.
|
||||
//
|
||||
// `contextFromRequestHeaders` extracts the W3C `traceparent` the
|
||||
// client echoed (set via `streamTraceparentRef` on Sim's chat POST
|
||||
// response), so the resume span becomes a child of the original
|
||||
// chat's `gen_ai.agent.execute` trace instead of a disconnected
|
||||
// new root. On reconnects after page reload (client ref was wiped)
|
||||
// the header is absent and extraction leaves the ambient context
|
||||
// alone → the resume span becomes its own root. Same as pre-
|
||||
// linking behavior; no regression.
|
||||
const incomingContext = contextFromRequestHeaders(request.headers)
|
||||
const rootSpan = getCopilotTracer().startSpan(
|
||||
TraceSpan.CopilotResumeRequest,
|
||||
{
|
||||
attributes: {
|
||||
[TraceAttr.CopilotTransport]: batchMode ? CopilotTransport.Batch : CopilotTransport.Stream,
|
||||
[TraceAttr.StreamId]: streamId,
|
||||
[TraceAttr.UserId]: authenticatedUserId,
|
||||
[TraceAttr.CopilotResumeAfterCursor]: afterCursor || '0',
|
||||
},
|
||||
},
|
||||
incomingContext
|
||||
)
|
||||
const rootContext = trace.setSpan(incomingContext, rootSpan)
|
||||
|
||||
try {
|
||||
return await otelContext.with(rootContext, () =>
|
||||
handleResumeRequestBody({
|
||||
request,
|
||||
streamId,
|
||||
afterCursor,
|
||||
batchMode,
|
||||
authenticatedUserId,
|
||||
rootSpan,
|
||||
rootContext,
|
||||
})
|
||||
)
|
||||
} catch (err) {
|
||||
markSpanForError(rootSpan, err)
|
||||
rootSpan.end()
|
||||
throw err
|
||||
}
|
||||
})
|
||||
|
||||
async function handleResumeRequestBody({
|
||||
request,
|
||||
streamId,
|
||||
afterCursor,
|
||||
batchMode,
|
||||
authenticatedUserId,
|
||||
rootSpan,
|
||||
rootContext,
|
||||
}: {
|
||||
request: NextRequest
|
||||
streamId: string
|
||||
afterCursor: string
|
||||
batchMode: boolean
|
||||
authenticatedUserId: string
|
||||
rootSpan: import('@opentelemetry/api').Span
|
||||
rootContext: import('@opentelemetry/api').Context
|
||||
}) {
|
||||
const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => {
|
||||
logger.warn('Failed to fetch latest run for stream', {
|
||||
streamId,
|
||||
error: toError(err).message,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
return null
|
||||
})
|
||||
@@ -112,8 +207,11 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
|
||||
runStatus: run?.status,
|
||||
})
|
||||
if (!run) {
|
||||
rootSpan.setAttribute(TraceAttr.CopilotResumeOutcome, CopilotResumeOutcome.StreamNotFound)
|
||||
rootSpan.end()
|
||||
return NextResponse.json({ error: 'Stream not found' }, { status: 404 })
|
||||
}
|
||||
rootSpan.setAttribute(TraceAttr.CopilotRunStatus, run.status)
|
||||
|
||||
if (batchMode) {
|
||||
const afterSeq = afterCursor || '0'
|
||||
@@ -122,7 +220,7 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
|
||||
readFilePreviewSessions(streamId).catch((error) => {
|
||||
logger.warn('Failed to read preview sessions for stream batch', {
|
||||
streamId,
|
||||
error: toError(error).message,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
return []
|
||||
}),
|
||||
@@ -135,6 +233,12 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
|
||||
previewSessionCount: previewSessions.length,
|
||||
runStatus: run.status,
|
||||
})
|
||||
rootSpan.setAttributes({
|
||||
[TraceAttr.CopilotResumeOutcome]: CopilotResumeOutcome.BatchDelivered,
|
||||
[TraceAttr.CopilotResumeEventCount]: batchEvents.length,
|
||||
[TraceAttr.CopilotResumePreviewSessionCount]: previewSessions.length,
|
||||
})
|
||||
rootSpan.end()
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
events: batchEvents,
|
||||
@@ -144,165 +248,203 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
|
||||
}
|
||||
|
||||
const startTime = Date.now()
|
||||
let totalEventsFlushed = 0
|
||||
let pollIterations = 0
|
||||
|
||||
const stream = new ReadableStream({
|
||||
async start(controller) {
|
||||
let cursor = afterCursor || '0'
|
||||
let controllerClosed = false
|
||||
let sawTerminalEvent = false
|
||||
|
||||
const closeController = () => {
|
||||
if (controllerClosed) return
|
||||
controllerClosed = true
|
||||
try {
|
||||
controller.close()
|
||||
} catch {
|
||||
// Controller already closed by runtime/client
|
||||
}
|
||||
}
|
||||
|
||||
const enqueueEvent = (payload: unknown) => {
|
||||
if (controllerClosed) return false
|
||||
try {
|
||||
controller.enqueue(encodeSSEEnvelope(payload))
|
||||
return true
|
||||
} catch {
|
||||
controllerClosed = true
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const abortListener = () => {
|
||||
controllerClosed = true
|
||||
}
|
||||
request.signal.addEventListener('abort', abortListener, { once: true })
|
||||
|
||||
const flushEvents = async () => {
|
||||
const events = await readEvents(streamId, cursor)
|
||||
if (events.length > 0) {
|
||||
logger.info('[Resume] Flushing events', {
|
||||
streamId,
|
||||
afterCursor: cursor,
|
||||
eventCount: events.length,
|
||||
})
|
||||
}
|
||||
for (const envelope of events) {
|
||||
cursor = envelope.stream.cursor ?? String(envelope.seq)
|
||||
if (envelope.type === MothershipStreamV1EventType.complete) {
|
||||
sawTerminalEvent = true
|
||||
}
|
||||
if (!enqueueEvent(envelope)) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const emitTerminalIfMissing = (
|
||||
status: MothershipStreamV1CompletionStatus,
|
||||
options?: { message?: string; code: string; reason?: string }
|
||||
) => {
|
||||
if (controllerClosed || sawTerminalEvent) {
|
||||
return
|
||||
}
|
||||
for (const envelope of buildResumeTerminalEnvelopes({
|
||||
streamId,
|
||||
afterCursor: cursor,
|
||||
status,
|
||||
message: options?.message,
|
||||
code: options?.code ?? 'resume_terminal',
|
||||
reason: options?.reason,
|
||||
})) {
|
||||
cursor = envelope.stream.cursor ?? String(envelope.seq)
|
||||
if (envelope.type === MothershipStreamV1EventType.complete) {
|
||||
sawTerminalEvent = true
|
||||
}
|
||||
if (!enqueueEvent(envelope)) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const gap = await checkForReplayGap(streamId, afterCursor)
|
||||
if (gap) {
|
||||
for (const envelope of gap.envelopes) {
|
||||
enqueueEvent(envelope)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
await flushEvents()
|
||||
|
||||
while (!controllerClosed && Date.now() - startTime < MAX_STREAM_MS) {
|
||||
const currentRun = await getLatestRunForStream(streamId, authenticatedUserId).catch(
|
||||
(err) => {
|
||||
logger.warn('Failed to poll latest run for stream', {
|
||||
streamId,
|
||||
error: toError(err).message,
|
||||
})
|
||||
return null
|
||||
}
|
||||
)
|
||||
if (!currentRun) {
|
||||
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
|
||||
message: 'The stream could not be recovered because its run metadata is unavailable.',
|
||||
code: 'resume_run_unavailable',
|
||||
reason: 'run_unavailable',
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
await flushEvents()
|
||||
|
||||
if (controllerClosed) {
|
||||
break
|
||||
}
|
||||
if (isTerminalStatus(currentRun.status)) {
|
||||
emitTerminalIfMissing(currentRun.status, {
|
||||
message:
|
||||
currentRun.status === MothershipStreamV1CompletionStatus.error
|
||||
? typeof currentRun.error === 'string'
|
||||
? currentRun.error
|
||||
: 'The recovered stream ended with an error.'
|
||||
: undefined,
|
||||
code: 'resume_terminal_status',
|
||||
reason: 'terminal_status',
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
if (request.signal.aborted) {
|
||||
controllerClosed = true
|
||||
break
|
||||
}
|
||||
|
||||
await sleep(POLL_INTERVAL_MS)
|
||||
}
|
||||
if (!controllerClosed && Date.now() - startTime >= MAX_STREAM_MS) {
|
||||
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
|
||||
message: 'The stream recovery timed out before completion.',
|
||||
code: 'resume_timeout',
|
||||
reason: 'timeout',
|
||||
})
|
||||
}
|
||||
} catch (error) {
|
||||
if (!controllerClosed && !request.signal.aborted) {
|
||||
logger.warn('Stream replay failed', {
|
||||
streamId,
|
||||
error: toError(error).message,
|
||||
})
|
||||
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
|
||||
message: 'The stream replay failed before completion.',
|
||||
code: 'resume_internal',
|
||||
reason: 'stream_replay_failed',
|
||||
})
|
||||
}
|
||||
} finally {
|
||||
request.signal.removeEventListener('abort', abortListener)
|
||||
closeController()
|
||||
}
|
||||
// Re-enter the root OTel context so any `withCopilotSpan` call below
|
||||
// (inside flushEvents/checkForReplayGap/etc.) parents under
|
||||
// copilot.resume.request instead of becoming an orphan.
|
||||
return otelContext.with(rootContext, () => startInner(controller))
|
||||
},
|
||||
})
|
||||
|
||||
async function startInner(controller: ReadableStreamDefaultController) {
|
||||
let cursor = afterCursor || '0'
|
||||
let controllerClosed = false
|
||||
let sawTerminalEvent = false
|
||||
let currentRequestId = extractRunRequestId(run)
|
||||
// Stamp the logical request id + chat id on the resume root as soon
|
||||
// as we resolve them from the run row, so TraceQL joins work on
|
||||
// resume legs the same way they do on the original POST.
|
||||
if (currentRequestId) {
|
||||
rootSpan.setAttribute(TraceAttr.RequestId, currentRequestId)
|
||||
rootSpan.setAttribute(TraceAttr.SimRequestId, currentRequestId)
|
||||
}
|
||||
if (run?.chatId) {
|
||||
rootSpan.setAttribute(TraceAttr.ChatId, run.chatId)
|
||||
}
|
||||
|
||||
const closeController = () => {
|
||||
if (controllerClosed) return
|
||||
controllerClosed = true
|
||||
try {
|
||||
controller.close()
|
||||
} catch {
|
||||
// Controller already closed by runtime/client
|
||||
}
|
||||
}
|
||||
|
||||
const enqueueEvent = (payload: unknown) => {
|
||||
if (controllerClosed) return false
|
||||
try {
|
||||
controller.enqueue(encodeSSEEnvelope(payload))
|
||||
return true
|
||||
} catch {
|
||||
controllerClosed = true
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const abortListener = () => {
|
||||
controllerClosed = true
|
||||
}
|
||||
request.signal.addEventListener('abort', abortListener, { once: true })
|
||||
|
||||
const flushEvents = async () => {
|
||||
const events = await readEvents(streamId, cursor)
|
||||
if (events.length > 0) {
|
||||
totalEventsFlushed += events.length
|
||||
logger.debug('[Resume] Flushing events', {
|
||||
streamId,
|
||||
afterCursor: cursor,
|
||||
eventCount: events.length,
|
||||
})
|
||||
}
|
||||
for (const envelope of events) {
|
||||
cursor = envelope.stream.cursor ?? String(envelope.seq)
|
||||
currentRequestId = extractEnvelopeRequestId(envelope) || currentRequestId
|
||||
if (envelope.type === MothershipStreamV1EventType.complete) {
|
||||
sawTerminalEvent = true
|
||||
}
|
||||
if (!enqueueEvent(envelope)) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const emitTerminalIfMissing = (
|
||||
status: MothershipStreamV1CompletionStatus,
|
||||
options?: { message?: string; code: string; reason?: string }
|
||||
) => {
|
||||
if (controllerClosed || sawTerminalEvent) {
|
||||
return
|
||||
}
|
||||
for (const envelope of buildResumeTerminalEnvelopes({
|
||||
streamId,
|
||||
afterCursor: cursor,
|
||||
status,
|
||||
message: options?.message,
|
||||
code: options?.code ?? 'resume_terminal',
|
||||
reason: options?.reason,
|
||||
requestId: currentRequestId,
|
||||
})) {
|
||||
cursor = envelope.stream.cursor ?? String(envelope.seq)
|
||||
if (envelope.type === MothershipStreamV1EventType.complete) {
|
||||
sawTerminalEvent = true
|
||||
}
|
||||
if (!enqueueEvent(envelope)) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const gap = await checkForReplayGap(streamId, afterCursor, currentRequestId)
|
||||
if (gap) {
|
||||
for (const envelope of gap.envelopes) {
|
||||
enqueueEvent(envelope)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
await flushEvents()
|
||||
|
||||
while (!controllerClosed && Date.now() - startTime < MAX_STREAM_MS) {
|
||||
pollIterations += 1
|
||||
const currentRun = await getLatestRunForStream(streamId, authenticatedUserId).catch(
|
||||
(err) => {
|
||||
logger.warn('Failed to poll latest run for stream', {
|
||||
streamId,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
return null
|
||||
}
|
||||
)
|
||||
if (!currentRun) {
|
||||
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
|
||||
message: 'The stream could not be recovered because its run metadata is unavailable.',
|
||||
code: 'resume_run_unavailable',
|
||||
reason: 'run_unavailable',
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
currentRequestId = extractRunRequestId(currentRun) || currentRequestId
|
||||
|
||||
await flushEvents()
|
||||
|
||||
if (controllerClosed) {
|
||||
break
|
||||
}
|
||||
if (isTerminalStatus(currentRun.status)) {
|
||||
emitTerminalIfMissing(currentRun.status, {
|
||||
message:
|
||||
currentRun.status === MothershipStreamV1CompletionStatus.error
|
||||
? typeof currentRun.error === 'string'
|
||||
? currentRun.error
|
||||
: 'The recovered stream ended with an error.'
|
||||
: undefined,
|
||||
code: 'resume_terminal_status',
|
||||
reason: 'terminal_status',
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
if (request.signal.aborted) {
|
||||
controllerClosed = true
|
||||
break
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
|
||||
}
|
||||
if (!controllerClosed && Date.now() - startTime >= MAX_STREAM_MS) {
|
||||
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
|
||||
message: 'The stream recovery timed out before completion.',
|
||||
code: 'resume_timeout',
|
||||
reason: 'timeout',
|
||||
})
|
||||
}
|
||||
} catch (error) {
|
||||
if (!controllerClosed && !request.signal.aborted) {
|
||||
logger.warn('Stream replay failed', {
|
||||
streamId,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, {
|
||||
message: 'The stream replay failed before completion.',
|
||||
code: 'resume_internal',
|
||||
reason: 'stream_replay_failed',
|
||||
})
|
||||
}
|
||||
markSpanForError(rootSpan, error)
|
||||
} finally {
|
||||
request.signal.removeEventListener('abort', abortListener)
|
||||
closeController()
|
||||
rootSpan.setAttributes({
|
||||
[TraceAttr.CopilotResumeOutcome]: sawTerminalEvent
|
||||
? CopilotResumeOutcome.TerminalDelivered
|
||||
: controllerClosed
|
||||
? CopilotResumeOutcome.ClientDisconnected
|
||||
: CopilotResumeOutcome.EndedWithoutTerminal,
|
||||
[TraceAttr.CopilotResumeEventCount]: totalEventsFlushed,
|
||||
[TraceAttr.CopilotResumePollIterations]: pollIterations,
|
||||
[TraceAttr.CopilotResumeDurationMs]: Date.now() - startTime,
|
||||
})
|
||||
rootSpan.end()
|
||||
}
|
||||
}
|
||||
|
||||
return new Response(stream, { headers: SSE_RESPONSE_HEADERS })
|
||||
})
|
||||
}
|
||||
|
||||
@@ -206,7 +206,7 @@ describe('Copilot Confirm API Route', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('returns 400 when the durable write fails before publish', async () => {
|
||||
it('returns 500 when the durable write fails before publish', async () => {
|
||||
completeAsyncToolCall.mockRejectedValueOnce(new Error('db down'))
|
||||
|
||||
const response = await POST(
|
||||
@@ -216,7 +216,7 @@ describe('Copilot Confirm API Route', () => {
|
||||
})
|
||||
)
|
||||
|
||||
expect(response.status).toBe(400)
|
||||
expect(response.status).toBe(500)
|
||||
expect(publishToolConfirmation).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -14,6 +14,9 @@ import {
|
||||
getRunSegment,
|
||||
upsertAsyncToolCall,
|
||||
} from '@/lib/copilot/async-runs/repository'
|
||||
import { CopilotConfirmOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { publishToolConfirmation } from '@/lib/copilot/persistence/tool-confirm'
|
||||
import {
|
||||
authenticateCopilotRequestSessionOnly,
|
||||
@@ -23,6 +26,7 @@ import {
|
||||
createRequestTracker,
|
||||
createUnauthorizedResponse,
|
||||
} from '@/lib/copilot/request/http'
|
||||
import { withIncomingGoSpan } from '@/lib/copilot/request/otel'
|
||||
import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
|
||||
|
||||
const logger = createLogger('CopilotConfirmAPI')
|
||||
@@ -114,93 +118,112 @@ async function updateToolCallStatus(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/copilot/confirm
|
||||
* Accept client tool completion or detach confirmations.
|
||||
*/
|
||||
export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
// POST /api/copilot/confirm — delivery path for client-executed tool
|
||||
// results. Correlate via `toolCallId` when the awaiting chat stream
|
||||
// stalls.
|
||||
export const POST = withRouteHandler((req: NextRequest) => {
|
||||
const tracker = createRequestTracker()
|
||||
|
||||
try {
|
||||
// Authenticate user using consolidated helper
|
||||
const { userId: authenticatedUserId, isAuthenticated } =
|
||||
await authenticateCopilotRequestSessionOnly()
|
||||
return withIncomingGoSpan(
|
||||
req.headers,
|
||||
TraceSpan.CopilotConfirmToolResult,
|
||||
{ [TraceAttr.RequestId]: tracker.requestId },
|
||||
async (span) => {
|
||||
try {
|
||||
const { userId: authenticatedUserId, isAuthenticated } =
|
||||
await authenticateCopilotRequestSessionOnly()
|
||||
|
||||
if (!isAuthenticated) {
|
||||
return createUnauthorizedResponse()
|
||||
if (!isAuthenticated || !authenticatedUserId) {
|
||||
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Unauthorized)
|
||||
return createUnauthorizedResponse()
|
||||
}
|
||||
|
||||
const body = await req.json()
|
||||
const { toolCallId, status, message, data } = ConfirmationSchema.parse(body)
|
||||
span.setAttributes({
|
||||
[TraceAttr.ToolCallId]: toolCallId,
|
||||
[TraceAttr.ToolConfirmationStatus]: status,
|
||||
[TraceAttr.UserId]: authenticatedUserId,
|
||||
})
|
||||
|
||||
const existing = await getAsyncToolCall(toolCallId).catch((err) => {
|
||||
logger.warn('Failed to fetch async tool call', {
|
||||
toolCallId,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
return null
|
||||
})
|
||||
|
||||
if (!existing) {
|
||||
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.ToolCallNotFound)
|
||||
return createNotFoundResponse('Tool call not found')
|
||||
}
|
||||
if (existing.toolName) span.setAttribute(TraceAttr.ToolName, existing.toolName)
|
||||
if (existing.runId) span.setAttribute(TraceAttr.RunId, existing.runId)
|
||||
|
||||
const run = await getRunSegment(existing.runId).catch((err) => {
|
||||
logger.warn('Failed to fetch run segment', {
|
||||
runId: existing.runId,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
return null
|
||||
})
|
||||
if (!run) {
|
||||
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.RunNotFound)
|
||||
return createNotFoundResponse('Tool call run not found')
|
||||
}
|
||||
if (run.userId !== authenticatedUserId) {
|
||||
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Forbidden)
|
||||
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
|
||||
}
|
||||
|
||||
const updated = await updateToolCallStatus(existing, status, message, data)
|
||||
|
||||
if (!updated) {
|
||||
logger.error(`[${tracker.requestId}] Failed to update tool call status`, {
|
||||
userId: authenticatedUserId,
|
||||
toolCallId,
|
||||
status,
|
||||
internalStatus: status,
|
||||
message,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.UpdateFailed)
|
||||
// DB write failed — 500, not 400. 400 is a client-shape error.
|
||||
return createInternalServerErrorResponse('Failed to update tool call status')
|
||||
}
|
||||
|
||||
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Delivered)
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
message: message || `Tool call ${toolCallId} has been ${status.toLowerCase()}`,
|
||||
toolCallId,
|
||||
status,
|
||||
})
|
||||
} catch (error) {
|
||||
const duration = tracker.getDuration()
|
||||
|
||||
if (error instanceof z.ZodError) {
|
||||
logger.error(`[${tracker.requestId}] Request validation error:`, {
|
||||
duration,
|
||||
errors: error.errors,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.ValidationError)
|
||||
return createBadRequestResponse(
|
||||
`Invalid request data: ${error.errors.map((e) => e.message).join(', ')}`
|
||||
)
|
||||
}
|
||||
|
||||
logger.error(`[${tracker.requestId}] Unexpected error:`, {
|
||||
duration,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
stack: error instanceof Error ? error.stack : undefined,
|
||||
})
|
||||
|
||||
span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.InternalError)
|
||||
return createInternalServerErrorResponse(
|
||||
error instanceof Error ? error.message : 'Internal server error'
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
const body = await req.json()
|
||||
const { toolCallId, status, message, data } = ConfirmationSchema.parse(body)
|
||||
const existing = await getAsyncToolCall(toolCallId).catch((err) => {
|
||||
logger.warn('Failed to fetch async tool call', {
|
||||
toolCallId,
|
||||
error: toError(err).message,
|
||||
})
|
||||
return null
|
||||
})
|
||||
|
||||
if (!existing) {
|
||||
return createNotFoundResponse('Tool call not found')
|
||||
}
|
||||
|
||||
const run = await getRunSegment(existing.runId).catch((err) => {
|
||||
logger.warn('Failed to fetch run segment', {
|
||||
runId: existing.runId,
|
||||
error: toError(err).message,
|
||||
})
|
||||
return null
|
||||
})
|
||||
if (!run) {
|
||||
return createNotFoundResponse('Tool call run not found')
|
||||
}
|
||||
if (run.userId !== authenticatedUserId) {
|
||||
return NextResponse.json({ error: 'Forbidden' }, { status: 403 })
|
||||
}
|
||||
|
||||
// Update the durable tool call status and wake any waiters.
|
||||
const updated = await updateToolCallStatus(existing, status, message, data)
|
||||
|
||||
if (!updated) {
|
||||
logger.error(`[${tracker.requestId}] Failed to update tool call status`, {
|
||||
userId: authenticatedUserId,
|
||||
toolCallId,
|
||||
status,
|
||||
internalStatus: status,
|
||||
message,
|
||||
})
|
||||
return createBadRequestResponse('Failed to update tool call status or tool call not found')
|
||||
}
|
||||
|
||||
const duration = tracker.getDuration()
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
message: message || `Tool call ${toolCallId} has been ${status.toLowerCase()}`,
|
||||
toolCallId,
|
||||
status,
|
||||
})
|
||||
} catch (error) {
|
||||
const duration = tracker.getDuration()
|
||||
|
||||
if (error instanceof z.ZodError) {
|
||||
logger.error(`[${tracker.requestId}] Request validation error:`, {
|
||||
duration,
|
||||
errors: error.errors,
|
||||
})
|
||||
return createBadRequestResponse(
|
||||
`Invalid request data: ${error.errors.map((e) => e.message).join(', ')}`
|
||||
)
|
||||
}
|
||||
|
||||
logger.error(`[${tracker.requestId}] Unexpected error:`, {
|
||||
duration,
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
stack: error instanceof Error ? error.stack : undefined,
|
||||
})
|
||||
|
||||
return createInternalServerErrorResponse(
|
||||
error instanceof Error ? error.message : 'Internal server error'
|
||||
)
|
||||
}
|
||||
)
|
||||
})
|
||||
|
||||
@@ -2,6 +2,7 @@ import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http'
|
||||
|
||||
interface AvailableModel {
|
||||
@@ -45,10 +46,12 @@ export const GET = withRouteHandler(async (_req: NextRequest) => {
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${SIM_AGENT_API_URL}/api/get-available-models`, {
|
||||
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/get-available-models`, {
|
||||
method: 'GET',
|
||||
headers,
|
||||
cache: 'no-store',
|
||||
spanName: 'sim → go /api/get-available-models',
|
||||
operation: 'get_available_models',
|
||||
})
|
||||
|
||||
const payload = await response.json().catch(() => ({}))
|
||||
|
||||
@@ -22,6 +22,22 @@ vi.mock('@/lib/core/config/env', () => createEnvMock({ COPILOT_API_KEY: 'test-ap
|
||||
|
||||
import { POST } from '@/app/api/copilot/stats/route'
|
||||
|
||||
// `fetchGo` reads `response.status` and `response.headers.get('content-length')`
|
||||
// to stamp span attributes, so mock responses need both fields or the call
|
||||
// path throws before the route handler sees the body.
|
||||
function buildMockResponse(init: {
|
||||
ok: boolean
|
||||
status?: number
|
||||
json: () => Promise<unknown>
|
||||
}): Record<string, unknown> {
|
||||
return {
|
||||
ok: init.ok,
|
||||
status: init.status ?? (init.ok ? 200 : 500),
|
||||
headers: new Headers(),
|
||||
json: init.json,
|
||||
}
|
||||
}
|
||||
|
||||
describe('Copilot Stats API Route', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
@@ -58,10 +74,12 @@ describe('Copilot Stats API Route', () => {
|
||||
isAuthenticated: true,
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ success: true }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ success: true }),
|
||||
})
|
||||
)
|
||||
|
||||
const req = createMockRequest('POST', {
|
||||
messageId: 'message-123',
|
||||
@@ -152,10 +170,12 @@ describe('Copilot Stats API Route', () => {
|
||||
isAuthenticated: true,
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
json: () => Promise.resolve({ error: 'Invalid message ID' }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: false,
|
||||
json: () => Promise.resolve({ error: 'Invalid message ID' }),
|
||||
})
|
||||
)
|
||||
|
||||
const req = createMockRequest('POST', {
|
||||
messageId: 'invalid-message',
|
||||
@@ -176,10 +196,12 @@ describe('Copilot Stats API Route', () => {
|
||||
isAuthenticated: true,
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
json: () => Promise.resolve({ message: 'Rate limit exceeded' }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: false,
|
||||
json: () => Promise.resolve({ message: 'Rate limit exceeded' }),
|
||||
})
|
||||
)
|
||||
|
||||
const req = createMockRequest('POST', {
|
||||
messageId: 'message-123',
|
||||
@@ -200,10 +222,12 @@ describe('Copilot Stats API Route', () => {
|
||||
isAuthenticated: true,
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
json: () => Promise.reject(new Error('Not JSON')),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: false,
|
||||
json: () => Promise.reject(new Error('Not JSON')),
|
||||
})
|
||||
)
|
||||
|
||||
const req = createMockRequest('POST', {
|
||||
messageId: 'message-123',
|
||||
@@ -266,10 +290,12 @@ describe('Copilot Stats API Route', () => {
|
||||
isAuthenticated: true,
|
||||
})
|
||||
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ success: true }),
|
||||
})
|
||||
mockFetch.mockResolvedValueOnce(
|
||||
buildMockResponse({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ success: true }),
|
||||
})
|
||||
)
|
||||
|
||||
const req = createMockRequest('POST', {
|
||||
messageId: 'message-456',
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { z } from 'zod'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
import {
|
||||
authenticateCopilotRequestSessionOnly,
|
||||
createBadRequestResponse,
|
||||
@@ -40,13 +41,15 @@ export const POST = withRouteHandler(async (req: NextRequest) => {
|
||||
diffAccepted,
|
||||
}
|
||||
|
||||
const agentRes = await fetch(`${SIM_AGENT_API_URL}/api/stats`, {
|
||||
const agentRes = await fetchGo(`${SIM_AGENT_API_URL}/api/stats`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
spanName: 'sim → go /api/stats',
|
||||
operation: 'stats_ingest',
|
||||
})
|
||||
|
||||
// Prefer not to block clients; still relay status
|
||||
|
||||
@@ -21,6 +21,7 @@ import { validateOAuthAccessToken } from '@/lib/auth/oauth-token'
|
||||
import { getHighestPrioritySubscription } from '@/lib/billing/core/subscription'
|
||||
import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context'
|
||||
import { ORCHESTRATION_TIMEOUT_MS, SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { createRequestId } from '@/lib/copilot/request/http'
|
||||
import { runHeadlessCopilotLifecycle } from '@/lib/copilot/request/lifecycle/headless'
|
||||
import { orchestrateSubagentStream } from '@/lib/copilot/request/subagent'
|
||||
import { ensureHandlersRegistered, executeTool } from '@/lib/copilot/tool-executor'
|
||||
@@ -61,7 +62,8 @@ async function authenticateCopilotApiKey(apiKey: string): Promise<CopilotKeyAuth
|
||||
return { success: false, error: 'Server configuration error' }
|
||||
}
|
||||
|
||||
const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key`, {
|
||||
const { fetchGo } = await import('@/lib/copilot/request/go/fetch')
|
||||
const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
@@ -69,6 +71,8 @@ async function authenticateCopilotApiKey(apiKey: string): Promise<CopilotKeyAuth
|
||||
},
|
||||
body: JSON.stringify({ targetApiKey: apiKey }),
|
||||
signal: AbortSignal.timeout(10_000),
|
||||
spanName: 'sim → go /api/validate-key (mcp)',
|
||||
operation: 'mcp_validate_key',
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
@@ -89,7 +93,10 @@ async function authenticateCopilotApiKey(apiKey: string): Promise<CopilotKeyAuth
|
||||
}
|
||||
}
|
||||
|
||||
return { success: false, error: String(upstream ?? 'Copilot API key validation failed') }
|
||||
return {
|
||||
success: false,
|
||||
error: String(upstream ?? 'Copilot API key validation failed'),
|
||||
}
|
||||
}
|
||||
|
||||
const data = (await res.json()) as { ok?: boolean; userId?: string }
|
||||
@@ -696,7 +703,11 @@ async function handleBuildToolCall(
|
||||
resolvedWorkflowName = authorization.workflow?.name || undefined
|
||||
resolvedWorkspaceId = authorization.workflow?.workspaceId || undefined
|
||||
return authorization.allowed
|
||||
? { status: 'resolved' as const, workflowId, workflowName: resolvedWorkflowName }
|
||||
? {
|
||||
status: 'resolved' as const,
|
||||
workflowId,
|
||||
workflowName: resolvedWorkflowName,
|
||||
}
|
||||
: {
|
||||
status: 'not_found' as const,
|
||||
message: 'workflowId is required for build. Call create_workflow first.',
|
||||
@@ -815,6 +826,7 @@ async function handleSubagentToolCall(
|
||||
(args.message as string) ||
|
||||
(args.error as string) ||
|
||||
JSON.stringify(args)
|
||||
const simRequestId = createRequestId()
|
||||
|
||||
const context = (args.context as Record<string, unknown>) || {}
|
||||
if (args.plan && !context.plan) {
|
||||
@@ -836,6 +848,7 @@ async function handleSubagentToolCall(
|
||||
userId,
|
||||
workflowId: args.workflowId as string | undefined,
|
||||
workspaceId: args.workspaceId as string | undefined,
|
||||
simRequestId,
|
||||
abortSignal,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -140,35 +140,60 @@ export const MessageActions = memo(function MessageActions({
|
||||
}
|
||||
}
|
||||
|
||||
if (!content) return null
|
||||
const hasContent = Boolean(content)
|
||||
const canSubmitFeedback = Boolean(chatId && userQuery)
|
||||
if (!hasContent && !canSubmitFeedback) return null
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className='flex items-center gap-0.5'>
|
||||
<button
|
||||
type='button'
|
||||
aria-label='Copy message'
|
||||
onClick={copyToClipboard}
|
||||
className={BUTTON_CLASS}
|
||||
>
|
||||
{copied ? <Check className={ICON_CLASS} /> : <Copy className={ICON_CLASS} />}
|
||||
</button>
|
||||
<button
|
||||
type='button'
|
||||
aria-label='Like'
|
||||
onClick={() => handleFeedbackClick('up')}
|
||||
className={BUTTON_CLASS}
|
||||
>
|
||||
<ThumbsUp className={ICON_CLASS} />
|
||||
</button>
|
||||
<button
|
||||
type='button'
|
||||
aria-label='Dislike'
|
||||
onClick={() => handleFeedbackClick('down')}
|
||||
className={BUTTON_CLASS}
|
||||
>
|
||||
<ThumbsDown className={ICON_CLASS} />
|
||||
</button>
|
||||
{hasContent && (
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger asChild>
|
||||
<button
|
||||
type='button'
|
||||
aria-label='Copy message'
|
||||
onClick={copyToClipboard}
|
||||
className={BUTTON_CLASS}
|
||||
>
|
||||
{copied ? <Check className={ICON_CLASS} /> : <Copy className={ICON_CLASS} />}
|
||||
</button>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content side='top'>
|
||||
{copied ? 'Copied message' : 'Copy message'}
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
)}
|
||||
{canSubmitFeedback && (
|
||||
<>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger asChild>
|
||||
<button
|
||||
type='button'
|
||||
aria-label='Like'
|
||||
onClick={() => handleFeedbackClick('up')}
|
||||
className={BUTTON_CLASS}
|
||||
>
|
||||
<ThumbsUp className={ICON_CLASS} />
|
||||
</button>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content side='top'>Good response</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger asChild>
|
||||
<button
|
||||
type='button'
|
||||
aria-label='Dislike'
|
||||
onClick={() => handleFeedbackClick('down')}
|
||||
className={BUTTON_CLASS}
|
||||
>
|
||||
<ThumbsDown className={ICON_CLASS} />
|
||||
</button>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content side='top'>Bad response</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<Modal open={pendingFeedback !== null} onOpenChange={handleModalClose}>
|
||||
|
||||
@@ -28,6 +28,7 @@ import {
|
||||
MothershipStreamV1SpanPayloadKind,
|
||||
MothershipStreamV1ToolOutcome,
|
||||
MothershipStreamV1ToolPhase,
|
||||
MothershipStreamV1ToolStatus,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import {
|
||||
CrawlWebsite,
|
||||
@@ -88,6 +89,7 @@ import {
|
||||
markRunToolManuallyStopped,
|
||||
reportManualRunToolStop,
|
||||
} from '@/lib/copilot/tools/client/run-tool-execution'
|
||||
import { setCurrentChatTraceparent } from '@/lib/copilot/tools/client/trace-context'
|
||||
import { isWorkflowToolName } from '@/lib/copilot/tools/workflow-tools'
|
||||
import { getNextWorkflowColor } from '@/lib/workflows/colors'
|
||||
import { getQueryClient } from '@/app/_shell/providers/get-query-client'
|
||||
@@ -1273,6 +1275,14 @@ export function useChat(
|
||||
const activeTurnRef = useRef<ActiveTurn | null>(null)
|
||||
const pendingUserMsgRef = useRef<PersistedMessage | null>(null)
|
||||
const streamIdRef = useRef<string | undefined>(undefined)
|
||||
// W3C traceparent from the chat POST response; echoed on
|
||||
// abort/stop/confirm/replay so side-channel calls join the same
|
||||
// trace instead of becoming disconnected roots.
|
||||
const streamTraceparentRef = useRef<string | undefined>(undefined)
|
||||
// The `request.id` from the active stream's trace events. Forwarded
|
||||
// to /chat/stop so the persisted aborted message carries it (keeps
|
||||
// the copy-request-ID button functional after refetch).
|
||||
const streamRequestIdRef = useRef<string | undefined>(undefined)
|
||||
const locallyTerminalStreamIdRef = useRef<string | undefined>(undefined)
|
||||
const lastCursorRef = useRef('0')
|
||||
const sendingRef = useRef(false)
|
||||
@@ -1311,6 +1321,9 @@ export function useChat(
|
||||
activeTurnRef.current = null
|
||||
pendingUserMsgRef.current = null
|
||||
streamIdRef.current = undefined
|
||||
streamRequestIdRef.current = undefined
|
||||
streamTraceparentRef.current = undefined
|
||||
setCurrentChatTraceparent(undefined)
|
||||
lastCursorRef.current = '0'
|
||||
resetStreamingBuffers()
|
||||
}, [resetStreamingBuffers])
|
||||
@@ -1810,8 +1823,10 @@ export function useChat(
|
||||
try {
|
||||
const pendingLines: string[] = []
|
||||
|
||||
readLoop: while (true) {
|
||||
while (true) {
|
||||
if (pendingLines.length === 0) {
|
||||
// Don't read another chunk after `complete` has drained.
|
||||
if (sawCompleteEvent) break
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
if (isStale()) continue
|
||||
@@ -1851,6 +1866,7 @@ export function useChat(
|
||||
|
||||
if (parsed.trace?.requestId && parsed.trace.requestId !== streamRequestId) {
|
||||
streamRequestId = parsed.trace.requestId
|
||||
streamRequestIdRef.current = streamRequestId
|
||||
flush()
|
||||
}
|
||||
if (parsed.stream?.streamId) {
|
||||
@@ -2245,7 +2261,9 @@ export function useChat(
|
||||
}
|
||||
|
||||
const name = payload.toolName
|
||||
const isPartial = payload.partial === true
|
||||
const isPartial =
|
||||
payload.partial === true ||
|
||||
payload.status === MothershipStreamV1ToolStatus.generating
|
||||
if (name === ToolSearchToolRegex.id || isToolHiddenInUi(name)) {
|
||||
break
|
||||
}
|
||||
@@ -2467,9 +2485,12 @@ export function useChat(
|
||||
}
|
||||
case MothershipStreamV1EventType.complete: {
|
||||
sawCompleteEvent = true
|
||||
// `complete` is terminal for this stream, even if the transport takes a moment
|
||||
// longer to close.
|
||||
break readLoop
|
||||
// `complete` is the end-of-turn marker; drain whatever
|
||||
// else arrived in the same TCP chunk (trailing text,
|
||||
// followups, run metadata) before stopping. Do NOT
|
||||
// await another read — events after `complete` would
|
||||
// be a server bug.
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2530,7 +2551,12 @@ export function useChat(
|
||||
): Promise<StreamBatchResponse> => {
|
||||
const response = await fetch(
|
||||
`/api/mothership/chat/stream?streamId=${encodeURIComponent(streamId)}&after=${encodeURIComponent(afterCursor)}&batch=true`,
|
||||
{ signal }
|
||||
{
|
||||
signal,
|
||||
...(streamTraceparentRef.current
|
||||
? { headers: { traceparent: streamTraceparentRef.current } }
|
||||
: {}),
|
||||
}
|
||||
)
|
||||
if (!response.ok) {
|
||||
throw new Error(`Stream resume batch failed: ${response.status}`)
|
||||
@@ -2601,7 +2627,12 @@ export function useChat(
|
||||
|
||||
const sseRes = await fetch(
|
||||
`/api/mothership/chat/stream?streamId=${encodeURIComponent(streamId)}&after=${encodeURIComponent(latestCursor)}`,
|
||||
{ signal: activeAbort.signal }
|
||||
{
|
||||
signal: activeAbort.signal,
|
||||
...(streamTraceparentRef.current
|
||||
? { headers: { traceparent: streamTraceparentRef.current } }
|
||||
: {}),
|
||||
}
|
||||
)
|
||||
if (!sseRes.ok || !sseRes.body) {
|
||||
throw new Error(RECONNECT_TAIL_ERROR)
|
||||
@@ -2842,12 +2873,18 @@ export function useChat(
|
||||
streamId?: string
|
||||
content?: string
|
||||
blocks?: ContentBlock[]
|
||||
// `stopGeneration` must snapshot these BEFORE clearActiveTurn()
|
||||
// nulls the refs, or the fetch sees undefined.
|
||||
requestId?: string
|
||||
traceparent?: string
|
||||
}) => {
|
||||
const chatId = overrides?.chatId ?? chatIdRef.current
|
||||
const streamId = overrides?.streamId ?? streamIdRef.current
|
||||
if (!chatId || !streamId) return
|
||||
|
||||
const content = overrides?.content ?? streamingContentRef.current
|
||||
const requestId = overrides?.requestId ?? streamRequestIdRef.current
|
||||
const traceparent = overrides?.traceparent ?? streamTraceparentRef.current
|
||||
|
||||
const sourceBlocks = overrides?.blocks ?? streamingBlocksRef.current
|
||||
const storedBlocks = sourceBlocks.map((block) => {
|
||||
@@ -2880,12 +2917,16 @@ export function useChat(
|
||||
try {
|
||||
const res = await fetch(stopPathRef.current, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(traceparent ? { traceparent } : {}),
|
||||
},
|
||||
body: JSON.stringify({
|
||||
chatId,
|
||||
streamId,
|
||||
content,
|
||||
...(storedBlocks.length > 0 && { contentBlocks: storedBlocks }),
|
||||
...(requestId ? { requestId } : {}),
|
||||
}),
|
||||
})
|
||||
if (!res.ok) {
|
||||
@@ -2924,9 +2965,36 @@ export function useChat(
|
||||
const messagesRef = useRef(messages)
|
||||
messagesRef.current = messages
|
||||
|
||||
/**
|
||||
* Notify downstream consumers that a turn has ended and, if a
|
||||
* follow-up message is queued, kick the dispatcher. Safe to call
|
||||
* from both the normal-completion path (`finalize`) and the
|
||||
* abort/stop path (`stopGeneration`), which previously short-
|
||||
* circuited without notifying — queued messages then sat until the
|
||||
* user manually re-sent. Idempotent w.r.t. `onStreamEnd` (one call
|
||||
* per terminal transition); the dispatcher itself de-dupes.
|
||||
*/
|
||||
const notifyTurnEnded = useCallback(
|
||||
(options: { error: boolean; skipQueueDispatch?: boolean }) => {
|
||||
const hasQueuedFollowUp = !options.error && messageQueueRef.current.length > 0
|
||||
if (!options.error) {
|
||||
const cid = chatIdRef.current
|
||||
if (cid && onStreamEndRef.current) {
|
||||
onStreamEndRef.current(cid, messagesRef.current)
|
||||
}
|
||||
}
|
||||
if (!options.error && !options.skipQueueDispatch && hasQueuedFollowUp) {
|
||||
void enqueueQueueDispatchRef.current({ type: 'send_head' })
|
||||
}
|
||||
return hasQueuedFollowUp
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
const finalize = useCallback(
|
||||
(options?: { error?: boolean }) => {
|
||||
const hasQueuedFollowUp = !options?.error && messageQueueRef.current.length > 0
|
||||
const isError = !!options?.error
|
||||
const hasQueuedFollowUp = !isError && messageQueueRef.current.length > 0
|
||||
reconcileTerminalPreviewSessions()
|
||||
locallyTerminalStreamIdRef.current =
|
||||
streamIdRef.current ?? activeTurnRef.current?.userMessageId ?? undefined
|
||||
@@ -2934,23 +3002,15 @@ export function useChat(
|
||||
setTransportIdle()
|
||||
abortControllerRef.current = null
|
||||
invalidateChatQueries({ includeDetail: !hasQueuedFollowUp })
|
||||
|
||||
if (!options?.error) {
|
||||
const cid = chatIdRef.current
|
||||
if (cid && onStreamEndRef.current) {
|
||||
onStreamEndRef.current(cid, messagesRef.current)
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.error) {
|
||||
return
|
||||
}
|
||||
|
||||
if (hasQueuedFollowUp) {
|
||||
void enqueueQueueDispatchRef.current({ type: 'send_head' })
|
||||
}
|
||||
notifyTurnEnded({ error: isError })
|
||||
},
|
||||
[clearActiveTurn, invalidateChatQueries, reconcileTerminalPreviewSessions, setTransportIdle]
|
||||
[
|
||||
clearActiveTurn,
|
||||
invalidateChatQueries,
|
||||
notifyTurnEnded,
|
||||
reconcileTerminalPreviewSessions,
|
||||
setTransportIdle,
|
||||
]
|
||||
)
|
||||
finalizeRef.current = finalize
|
||||
|
||||
@@ -3162,6 +3222,14 @@ export function useChat(
|
||||
signal: abortController.signal,
|
||||
})
|
||||
|
||||
// Capture for propagation on side-channel calls + non-React
|
||||
// tool-completion callbacks (via trace-context singleton).
|
||||
const traceparent = response.headers.get('traceparent')
|
||||
if (traceparent) {
|
||||
streamTraceparentRef.current = traceparent
|
||||
setCurrentChatTraceparent(traceparent)
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}))
|
||||
if (response.status === 409) {
|
||||
@@ -3403,6 +3471,12 @@ export function useChat(
|
||||
...(block.options ? { options: [...block.options] } : {}),
|
||||
...(block.toolCall ? { toolCall: { ...block.toolCall } } : {}),
|
||||
}))
|
||||
// Snapshot BEFORE clearActiveTurn() nulls the refs. Both
|
||||
// persistPartialResponse and the abort/stop fetches run inside
|
||||
// stopBarrier below, after several awaits — the refs are long
|
||||
// gone by the time the fetches serialize their headers.
|
||||
const stopRequestIdSnapshot = streamRequestIdRef.current
|
||||
const stopTraceparentSnapshot = streamTraceparentRef.current
|
||||
|
||||
locallyTerminalStreamIdRef.current = sid
|
||||
streamGenRef.current++
|
||||
@@ -3462,7 +3536,10 @@ export function useChat(
|
||||
? (async () => {
|
||||
const res = await fetch('/api/mothership/chat/abort', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(stopTraceparentSnapshot ? { traceparent: stopTraceparentSnapshot } : {}),
|
||||
},
|
||||
body: JSON.stringify({
|
||||
streamId: sid,
|
||||
...(resolvedChatId ? { chatId: resolvedChatId } : {}),
|
||||
@@ -3485,6 +3562,8 @@ export function useChat(
|
||||
streamId: sid,
|
||||
content: stopContentSnapshot,
|
||||
blocks: stopBlocksSnapshot,
|
||||
requestId: stopRequestIdSnapshot,
|
||||
traceparent: stopTraceparentSnapshot,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -3498,6 +3577,8 @@ export function useChat(
|
||||
pendingStopPromiseRef.current = stopBarrier
|
||||
try {
|
||||
await stopBarrier
|
||||
// Dispatch queued follow-ups after Stop resolves.
|
||||
notifyTurnEnded({ error: false })
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to stop the previous response')
|
||||
throw err
|
||||
@@ -3509,6 +3590,7 @@ export function useChat(
|
||||
}, [
|
||||
cancelActiveWorkflowExecutions,
|
||||
invalidateChatQueries,
|
||||
notifyTurnEnded,
|
||||
persistPartialResponse,
|
||||
queryClient,
|
||||
resetEphemeralPreviewState,
|
||||
|
||||
@@ -1,20 +1,32 @@
|
||||
/**
|
||||
* Sim OpenTelemetry - Server-side Instrumentation
|
||||
*/
|
||||
// Sim OTel bootstrap. Filter by `mothership.origin` or span-name
|
||||
// prefix (`sim-mothership:` / `go-mothership:`) to separate the two
|
||||
// halves of a mothership trace in the OTLP backend.
|
||||
|
||||
import type { Attributes, Context, Link, SpanKind } from '@opentelemetry/api'
|
||||
import { DiagConsoleLogger, DiagLogLevel, diag } from '@opentelemetry/api'
|
||||
import type { Sampler, SamplingResult } from '@opentelemetry/sdk-trace-base'
|
||||
import { DiagConsoleLogger, DiagLogLevel, diag, TraceFlags, trace } from '@opentelemetry/api'
|
||||
import type {
|
||||
ReadableSpan,
|
||||
Sampler,
|
||||
SamplingResult,
|
||||
Span,
|
||||
SpanProcessor,
|
||||
} from '@opentelemetry/sdk-trace-base'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { env } from './lib/core/config/env'
|
||||
|
||||
diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.ERROR)
|
||||
|
||||
const logger = createLogger('OTelInstrumentation')
|
||||
|
||||
const MOTHERSHIP_ORIGIN = 'sim-mothership' as const
|
||||
const SPAN_NAME_PREFIX = `${MOTHERSHIP_ORIGIN}: `
|
||||
|
||||
const SERVICE_INSTANCE_SLUG = 'sim' as const
|
||||
|
||||
const DEFAULT_TELEMETRY_CONFIG = {
|
||||
endpoint: env.TELEMETRY_ENDPOINT || 'https://telemetry.simstudio.ai/v1/traces',
|
||||
serviceName: 'sim-studio',
|
||||
serviceName: 'mothership',
|
||||
serviceVersion: '0.1.0',
|
||||
serverSide: { enabled: true },
|
||||
batchSettings: {
|
||||
@@ -25,29 +37,95 @@ const DEFAULT_TELEMETRY_CONFIG = {
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* Span name prefixes we want to KEEP
|
||||
*/
|
||||
const ALLOWED_SPAN_PREFIXES = [
|
||||
'platform.', // Our platform events
|
||||
'gen_ai.', // GenAI semantic convention spans
|
||||
'workflow.', // Workflow execution spans
|
||||
'block.', // Block execution spans
|
||||
'http.client.', // Our API block HTTP calls
|
||||
'function.', // Function block execution
|
||||
'router.', // Router block evaluation
|
||||
'condition.', // Condition block evaluation
|
||||
'loop.', // Loop block execution
|
||||
'parallel.', // Parallel block execution
|
||||
]
|
||||
// Allowlist of span-name prefixes exported from this process.
|
||||
// Non-mothership code (workflow executor, block runtime, framework
|
||||
// noise) is dropped. Broaden carefully — `http.` etc. would reopen
|
||||
// the firehose.
|
||||
const ALLOWED_SPAN_PREFIXES = ['gen_ai.', 'copilot.', 'sim →', 'sim.', 'tool.execute']
|
||||
|
||||
function isBusinessSpan(spanName: string): boolean {
|
||||
return ALLOWED_SPAN_PREFIXES.some((prefix) => spanName.startsWith(prefix))
|
||||
}
|
||||
|
||||
// Parse `OTEL_EXPORTER_OTLP_HEADERS`: `key1=value1,key2=value2`
|
||||
// (URL-encoded values, whitespace tolerated).
|
||||
function parseOtlpHeadersEnv(raw: string): Record<string, string> {
|
||||
const out: Record<string, string> = {}
|
||||
if (!raw) return out
|
||||
for (const part of raw.split(',')) {
|
||||
const trimmed = part.trim()
|
||||
if (!trimmed) continue
|
||||
const eq = trimmed.indexOf('=')
|
||||
if (eq <= 0) continue
|
||||
const key = trimmed.slice(0, eq).trim()
|
||||
const rawVal = trimmed.slice(eq + 1).trim()
|
||||
let val = rawVal
|
||||
try {
|
||||
val = decodeURIComponent(rawVal)
|
||||
} catch {
|
||||
// value wasn't URL-encoded; keep as-is.
|
||||
}
|
||||
if (key) out[key] = val
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Append `/v1/traces` to the OTLP base URL unless already present.
|
||||
// The HTTP exporter doesn't auto-suffix the signal path even though
|
||||
// the spec says the env var is a base URL.
|
||||
function normalizeOtlpTracesUrl(url: string): string {
|
||||
if (!url) return url
|
||||
try {
|
||||
const u = new URL(url)
|
||||
if (u.pathname.endsWith('/v1/traces')) return url
|
||||
const base = url.replace(/\/$/, '')
|
||||
return `${base}/v1/traces`
|
||||
} catch {
|
||||
return url
|
||||
}
|
||||
}
|
||||
|
||||
// Sampling ratio from env (mirrors Go's `samplerFromEnv`); fallback
|
||||
// is 100% everywhere. Retention caps cost, not sampling.
|
||||
function resolveSamplingRatio(_isLocalEndpoint: boolean): number {
|
||||
const raw = process.env.TELEMETRY_SAMPLING_RATIO || process.env.OTEL_TRACES_SAMPLER_ARG || ''
|
||||
if (raw) {
|
||||
const parsed = Number.parseFloat(raw)
|
||||
if (Number.isFinite(parsed)) {
|
||||
if (parsed <= 0) return 0
|
||||
if (parsed >= 1) return 1
|
||||
return parsed
|
||||
}
|
||||
}
|
||||
return 1.0
|
||||
}
|
||||
|
||||
// Tags allowed spans with `mothership.origin` and prepends
|
||||
// `sim-mothership:` to the span name so backends can visually split
|
||||
// the two halves even when service.name is shared.
|
||||
class MothershipOriginSpanProcessor implements SpanProcessor {
|
||||
onStart(span: Span): void {
|
||||
const name = span.name
|
||||
if (!isBusinessSpan(name)) {
|
||||
return
|
||||
}
|
||||
span.setAttribute(TraceAttr.MothershipOrigin, MOTHERSHIP_ORIGIN)
|
||||
if (!name.startsWith(SPAN_NAME_PREFIX)) {
|
||||
span.updateName(`${SPAN_NAME_PREFIX}${name}`)
|
||||
}
|
||||
}
|
||||
onEnd(_span: ReadableSpan): void {}
|
||||
shutdown(): Promise<void> {
|
||||
return Promise.resolve()
|
||||
}
|
||||
forceFlush(): Promise<void> {
|
||||
return Promise.resolve()
|
||||
}
|
||||
}
|
||||
|
||||
async function initializeOpenTelemetry() {
|
||||
try {
|
||||
if (env.NEXT_TELEMETRY_DISABLED === '1') {
|
||||
if (env.NEXT_TELEMETRY_DISABLED === '1' || process.env.NEXT_TELEMETRY_DISABLED === '1') {
|
||||
logger.info('OpenTelemetry disabled via NEXT_TELEMETRY_DISABLED=1')
|
||||
return
|
||||
}
|
||||
@@ -59,11 +137,29 @@ async function initializeOpenTelemetry() {
|
||||
telemetryConfig = DEFAULT_TELEMETRY_CONFIG
|
||||
}
|
||||
|
||||
// Prefer the OTel spec env var, fall back to legacy TELEMETRY_ENDPOINT.
|
||||
const resolvedEndpoint =
|
||||
process.env.OTEL_EXPORTER_OTLP_ENDPOINT ||
|
||||
process.env.TELEMETRY_ENDPOINT ||
|
||||
env.TELEMETRY_ENDPOINT ||
|
||||
telemetryConfig.endpoint
|
||||
telemetryConfig = {
|
||||
...telemetryConfig,
|
||||
endpoint: resolvedEndpoint,
|
||||
serviceName: 'mothership',
|
||||
}
|
||||
|
||||
if (telemetryConfig.serverSide?.enabled === false) {
|
||||
logger.info('Server-side OpenTelemetry disabled in config')
|
||||
return
|
||||
}
|
||||
|
||||
logger.info('OpenTelemetry init', {
|
||||
endpoint: telemetryConfig.endpoint,
|
||||
serviceName: telemetryConfig.serviceName,
|
||||
origin: MOTHERSHIP_ORIGIN,
|
||||
})
|
||||
|
||||
const { NodeSDK } = await import('@opentelemetry/sdk-node')
|
||||
const { defaultResource, resourceFromAttributes } = await import('@opentelemetry/resources')
|
||||
const { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, ATTR_DEPLOYMENT_ENVIRONMENT } = await import(
|
||||
@@ -71,11 +167,14 @@ async function initializeOpenTelemetry() {
|
||||
)
|
||||
const { OTLPTraceExporter } = await import('@opentelemetry/exporter-trace-otlp-http')
|
||||
const { BatchSpanProcessor } = await import('@opentelemetry/sdk-trace-node')
|
||||
const { ParentBasedSampler, TraceIdRatioBasedSampler, SamplingDecision } = await import(
|
||||
const { TraceIdRatioBasedSampler, SamplingDecision } = await import(
|
||||
'@opentelemetry/sdk-trace-base'
|
||||
)
|
||||
|
||||
const createBusinessSpanSampler = (baseSampler: Sampler): Sampler => ({
|
||||
// Drops Next framework spans, inherits SAMPLED from business
|
||||
// parents, and re-samples business roots fresh (don't delegate to
|
||||
// ParentBased — its unsampled-parent path is AlwaysOff by default).
|
||||
const createBusinessSpanSampler = (rootRatioSampler: Sampler): Sampler => ({
|
||||
shouldSample(
|
||||
context: Context,
|
||||
traceId: string,
|
||||
@@ -88,25 +187,60 @@ async function initializeOpenTelemetry() {
|
||||
return { decision: SamplingDecision.NOT_RECORD }
|
||||
}
|
||||
|
||||
const parentSpanContext = trace.getSpanContext(context)
|
||||
const parentIsSampled =
|
||||
!!parentSpanContext &&
|
||||
(parentSpanContext.traceFlags & TraceFlags.SAMPLED) === TraceFlags.SAMPLED
|
||||
|
||||
if (parentIsSampled) {
|
||||
return { decision: SamplingDecision.RECORD_AND_SAMPLED }
|
||||
}
|
||||
|
||||
if (isBusinessSpan(spanName)) {
|
||||
return baseSampler.shouldSample(context, traceId, spanName, spanKind, attributes, links)
|
||||
return rootRatioSampler.shouldSample(
|
||||
context,
|
||||
traceId,
|
||||
spanName,
|
||||
spanKind,
|
||||
attributes,
|
||||
links
|
||||
)
|
||||
}
|
||||
|
||||
return { decision: SamplingDecision.NOT_RECORD }
|
||||
},
|
||||
|
||||
toString(): string {
|
||||
return `BusinessSpanSampler{baseSampler=${baseSampler.toString()}}`
|
||||
return `BusinessSpanSampler{rootSampler=${rootRatioSampler.toString()}}`
|
||||
},
|
||||
})
|
||||
|
||||
const otlpHeaders = parseOtlpHeadersEnv(process.env.OTEL_EXPORTER_OTLP_HEADERS || '')
|
||||
const exporterUrl = normalizeOtlpTracesUrl(telemetryConfig.endpoint)
|
||||
|
||||
const exporter = new OTLPTraceExporter({
|
||||
url: telemetryConfig.endpoint,
|
||||
headers: {},
|
||||
url: exporterUrl,
|
||||
headers: otlpHeaders,
|
||||
timeoutMillis: Math.min(telemetryConfig.batchSettings.exportTimeoutMillis, 10000),
|
||||
keepAlive: false,
|
||||
})
|
||||
|
||||
// Surface export failures (BatchSpanProcessor swallows them otherwise).
|
||||
const origExport = exporter.export.bind(exporter)
|
||||
exporter.export = (spans, resultCallback) => {
|
||||
origExport(spans, (result) => {
|
||||
if (result?.code !== 0) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error('[OTEL] exporter export failed', {
|
||||
endpoint: telemetryConfig.endpoint,
|
||||
resultCode: result?.code,
|
||||
error: result?.error?.message,
|
||||
spanCount: spans.length,
|
||||
})
|
||||
}
|
||||
resultCallback(result)
|
||||
})
|
||||
}
|
||||
|
||||
const batchProcessor = new BatchSpanProcessor(exporter, {
|
||||
maxQueueSize: telemetryConfig.batchSettings.maxQueueSize,
|
||||
maxExportBatchSize: telemetryConfig.batchSettings.maxExportBatchSize,
|
||||
@@ -114,28 +248,48 @@ async function initializeOpenTelemetry() {
|
||||
exportTimeoutMillis: telemetryConfig.batchSettings.exportTimeoutMillis,
|
||||
})
|
||||
|
||||
// Unique instance id per origin keeps Jaeger's clock-skew adjuster
|
||||
// from grouping Sim+Go spans together (they'd see multi-second
|
||||
// drift as intra-service and emit spurious warnings).
|
||||
const serviceInstanceId = `${telemetryConfig.serviceName}-${SERVICE_INSTANCE_SLUG}`
|
||||
const resource = defaultResource().merge(
|
||||
resourceFromAttributes({
|
||||
[ATTR_SERVICE_NAME]: telemetryConfig.serviceName,
|
||||
[ATTR_SERVICE_VERSION]: telemetryConfig.serviceVersion,
|
||||
[ATTR_DEPLOYMENT_ENVIRONMENT]: env.NODE_ENV || 'development',
|
||||
'service.namespace': 'sim-ai-platform',
|
||||
// OTEL_ → DEPLOYMENT_ENVIRONMENT → NODE_ENV; matches Go's
|
||||
// `resourceEnvFromEnv()` so both halves tag the same value.
|
||||
[ATTR_DEPLOYMENT_ENVIRONMENT]:
|
||||
process.env.OTEL_DEPLOYMENT_ENVIRONMENT ||
|
||||
process.env.DEPLOYMENT_ENVIRONMENT ||
|
||||
env.NODE_ENV ||
|
||||
'development',
|
||||
'service.namespace': 'mothership',
|
||||
'service.instance.id': serviceInstanceId,
|
||||
'mothership.origin': MOTHERSHIP_ORIGIN,
|
||||
'telemetry.sdk.name': 'opentelemetry',
|
||||
'telemetry.sdk.language': 'nodejs',
|
||||
'telemetry.sdk.version': '1.0.0',
|
||||
})
|
||||
)
|
||||
|
||||
const baseSampler = new ParentBasedSampler({
|
||||
root: new TraceIdRatioBasedSampler(0.1),
|
||||
const isLocalEndpoint = /localhost|127\.0\.0\.1/i.test(telemetryConfig.endpoint)
|
||||
const samplingRatio = resolveSamplingRatio(isLocalEndpoint)
|
||||
const rootRatioSampler = new TraceIdRatioBasedSampler(samplingRatio)
|
||||
const sampler = createBusinessSpanSampler(rootRatioSampler)
|
||||
|
||||
logger.info('OpenTelemetry sampler configured', {
|
||||
samplingRatio,
|
||||
endpoint: telemetryConfig.endpoint,
|
||||
origin: MOTHERSHIP_ORIGIN,
|
||||
})
|
||||
const sampler = createBusinessSpanSampler(baseSampler)
|
||||
|
||||
// Origin-prefix must run before batch so the rename/attr is captured.
|
||||
const spanProcessors: SpanProcessor[] = [new MothershipOriginSpanProcessor(), batchProcessor]
|
||||
|
||||
const sdk = new NodeSDK({
|
||||
resource,
|
||||
spanProcessor: batchProcessor,
|
||||
spanProcessors,
|
||||
sampler,
|
||||
traceExporter: exporter,
|
||||
})
|
||||
|
||||
sdk.start()
|
||||
@@ -152,7 +306,11 @@ async function initializeOpenTelemetry() {
|
||||
process.on('SIGTERM', shutdownOtel)
|
||||
process.on('SIGINT', shutdownOtel)
|
||||
|
||||
logger.info('OpenTelemetry instrumentation initialized with business span filtering')
|
||||
logger.info('OpenTelemetry instrumentation initialized', {
|
||||
serviceName: telemetryConfig.serviceName,
|
||||
origin: MOTHERSHIP_ORIGIN,
|
||||
samplingRatio,
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('Failed to initialize OpenTelemetry instrumentation', error)
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { trace } from '@opentelemetry/api'
|
||||
import { db } from '@sim/db'
|
||||
import {
|
||||
type CopilotAsyncToolStatus,
|
||||
@@ -8,6 +9,9 @@ import {
|
||||
} from '@sim/db/schema'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { and, desc, eq, inArray, isNull } from 'drizzle-orm'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { markSpanForError } from '@/lib/copilot/request/otel'
|
||||
import {
|
||||
ASYNC_TOOL_STATUS,
|
||||
type AsyncCompletionData,
|
||||
@@ -16,6 +20,38 @@ import {
|
||||
} from './lifecycle'
|
||||
|
||||
const logger = createLogger('CopilotAsyncRunsRepo')
|
||||
// Resolve the tracer lazily per-call to avoid capturing the NoOp tracer
|
||||
// before NodeSDK installs the global TracerProvider (Next.js 16/Turbopack
|
||||
// can evaluate modules before instrumentation-node.ts finishes).
|
||||
const getAsyncRunsTracer = () => trace.getTracer('sim-copilot-async-runs', '1.0.0')
|
||||
|
||||
// Wrap an async DB op in a client-kind span with canonical `db.*` attrs.
|
||||
// Cancellation is routed through `markSpanForError` so aborts record the
|
||||
// exception event but don't paint spans red.
|
||||
async function withDbSpan<T>(
|
||||
name: string,
|
||||
op: string,
|
||||
table: string,
|
||||
attrs: Record<string, string | number | boolean | undefined>,
|
||||
fn: () => Promise<T>
|
||||
): Promise<T> {
|
||||
const span = getAsyncRunsTracer().startSpan(name, {
|
||||
attributes: {
|
||||
[TraceAttr.DbSystem]: 'postgresql',
|
||||
[TraceAttr.DbOperation]: op,
|
||||
[TraceAttr.DbSqlTable]: table,
|
||||
...Object.fromEntries(Object.entries(attrs).filter(([, v]) => v !== undefined)),
|
||||
},
|
||||
})
|
||||
try {
|
||||
return await fn()
|
||||
} catch (error) {
|
||||
markSpanForError(span, error)
|
||||
throw error
|
||||
} finally {
|
||||
span.end()
|
||||
}
|
||||
}
|
||||
|
||||
export interface CreateRunSegmentInput {
|
||||
id?: string
|
||||
@@ -34,26 +70,43 @@ export interface CreateRunSegmentInput {
|
||||
}
|
||||
|
||||
export async function createRunSegment(input: CreateRunSegmentInput) {
|
||||
const [run] = await db
|
||||
.insert(copilotRuns)
|
||||
.values({
|
||||
...(input.id ? { id: input.id } : {}),
|
||||
executionId: input.executionId,
|
||||
parentRunId: input.parentRunId ?? null,
|
||||
chatId: input.chatId,
|
||||
userId: input.userId,
|
||||
workflowId: input.workflowId ?? null,
|
||||
workspaceId: input.workspaceId ?? null,
|
||||
streamId: input.streamId,
|
||||
agent: input.agent ?? null,
|
||||
model: input.model ?? null,
|
||||
provider: input.provider ?? null,
|
||||
requestContext: input.requestContext ?? {},
|
||||
status: input.status ?? 'active',
|
||||
})
|
||||
.returning()
|
||||
|
||||
return run
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsCreateRunSegment,
|
||||
'INSERT',
|
||||
'copilot_runs',
|
||||
{
|
||||
[TraceAttr.CopilotExecutionId]: input.executionId,
|
||||
[TraceAttr.ChatId]: input.chatId,
|
||||
[TraceAttr.StreamId]: input.streamId,
|
||||
[TraceAttr.UserId]: input.userId,
|
||||
[TraceAttr.CopilotRunParentId]: input.parentRunId ?? undefined,
|
||||
[TraceAttr.CopilotRunAgent]: input.agent ?? undefined,
|
||||
[TraceAttr.CopilotRunModel]: input.model ?? undefined,
|
||||
[TraceAttr.CopilotRunProvider]: input.provider ?? undefined,
|
||||
[TraceAttr.CopilotRunStatus]: input.status ?? 'active',
|
||||
},
|
||||
async () => {
|
||||
const [run] = await db
|
||||
.insert(copilotRuns)
|
||||
.values({
|
||||
...(input.id ? { id: input.id } : {}),
|
||||
executionId: input.executionId,
|
||||
parentRunId: input.parentRunId ?? null,
|
||||
chatId: input.chatId,
|
||||
userId: input.userId,
|
||||
workflowId: input.workflowId ?? null,
|
||||
workspaceId: input.workspaceId ?? null,
|
||||
streamId: input.streamId,
|
||||
agent: input.agent ?? null,
|
||||
model: input.model ?? null,
|
||||
provider: input.provider ?? null,
|
||||
requestContext: input.requestContext ?? {},
|
||||
status: input.status ?? 'active',
|
||||
})
|
||||
.returning()
|
||||
return run
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function updateRunStatus(
|
||||
@@ -65,32 +118,53 @@ export async function updateRunStatus(
|
||||
requestContext?: Record<string, unknown>
|
||||
} = {}
|
||||
) {
|
||||
const [run] = await db
|
||||
.update(copilotRuns)
|
||||
.set({
|
||||
status,
|
||||
completedAt: updates.completedAt,
|
||||
error: updates.error,
|
||||
requestContext: updates.requestContext,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(copilotRuns.id, runId))
|
||||
.returning()
|
||||
|
||||
return run ?? null
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsUpdateRunStatus,
|
||||
'UPDATE',
|
||||
'copilot_runs',
|
||||
{
|
||||
[TraceAttr.RunId]: runId,
|
||||
[TraceAttr.CopilotRunStatus]: status,
|
||||
[TraceAttr.CopilotRunHasError]: !!updates.error,
|
||||
[TraceAttr.CopilotRunHasCompletedAt]: !!updates.completedAt,
|
||||
},
|
||||
async () => {
|
||||
const [run] = await db
|
||||
.update(copilotRuns)
|
||||
.set({
|
||||
status,
|
||||
completedAt: updates.completedAt,
|
||||
error: updates.error,
|
||||
requestContext: updates.requestContext,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(copilotRuns.id, runId))
|
||||
.returning()
|
||||
return run ?? null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function getLatestRunForExecution(executionId: string) {
|
||||
const [run] = await db
|
||||
.select()
|
||||
.from(copilotRuns)
|
||||
.where(eq(copilotRuns.executionId, executionId))
|
||||
.orderBy(desc(copilotRuns.startedAt))
|
||||
.limit(1)
|
||||
|
||||
return run ?? null
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsGetLatestForExecution,
|
||||
'SELECT',
|
||||
'copilot_runs',
|
||||
{ [TraceAttr.CopilotExecutionId]: executionId },
|
||||
async () => {
|
||||
const [run] = await db
|
||||
.select()
|
||||
.from(copilotRuns)
|
||||
.where(eq(copilotRuns.executionId, executionId))
|
||||
.orderBy(desc(copilotRuns.startedAt))
|
||||
.limit(1)
|
||||
return run ?? null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// Un-instrumented: called from a 4 Hz resume poll; per-call spans
|
||||
// swamped traces. Use Prom histograms if latency visibility is needed.
|
||||
export async function getLatestRunForStream(streamId: string, userId?: string) {
|
||||
const conditions = userId
|
||||
? and(eq(copilotRuns.streamId, streamId), eq(copilotRuns.userId, userId))
|
||||
@@ -101,13 +175,20 @@ export async function getLatestRunForStream(streamId: string, userId?: string) {
|
||||
.where(conditions)
|
||||
.orderBy(desc(copilotRuns.startedAt))
|
||||
.limit(1)
|
||||
|
||||
return run ?? null
|
||||
}
|
||||
|
||||
export async function getRunSegment(runId: string) {
|
||||
const [run] = await db.select().from(copilotRuns).where(eq(copilotRuns.id, runId)).limit(1)
|
||||
return run ?? null
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsGetRunSegment,
|
||||
'SELECT',
|
||||
'copilot_runs',
|
||||
{ [TraceAttr.RunId]: runId },
|
||||
async () => {
|
||||
const [run] = await db.select().from(copilotRuns).where(eq(copilotRuns.id, runId)).limit(1)
|
||||
return run ?? null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function createRunCheckpoint(input: {
|
||||
@@ -117,18 +198,29 @@ export async function createRunCheckpoint(input: {
|
||||
agentState: Record<string, unknown>
|
||||
providerRequest: Record<string, unknown>
|
||||
}) {
|
||||
const [checkpoint] = await db
|
||||
.insert(copilotRunCheckpoints)
|
||||
.values({
|
||||
runId: input.runId,
|
||||
pendingToolCallId: input.pendingToolCallId,
|
||||
conversationSnapshot: input.conversationSnapshot,
|
||||
agentState: input.agentState,
|
||||
providerRequest: input.providerRequest,
|
||||
})
|
||||
.returning()
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsCreateRunCheckpoint,
|
||||
'INSERT',
|
||||
'copilot_run_checkpoints',
|
||||
{
|
||||
[TraceAttr.RunId]: input.runId,
|
||||
[TraceAttr.CopilotCheckpointPendingToolCallId]: input.pendingToolCallId,
|
||||
},
|
||||
async () => {
|
||||
const [checkpoint] = await db
|
||||
.insert(copilotRunCheckpoints)
|
||||
.values({
|
||||
runId: input.runId,
|
||||
pendingToolCallId: input.pendingToolCallId,
|
||||
conversationSnapshot: input.conversationSnapshot,
|
||||
agentState: input.agentState,
|
||||
providerRequest: input.providerRequest,
|
||||
})
|
||||
.returning()
|
||||
|
||||
return checkpoint
|
||||
return checkpoint
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function upsertAsyncToolCall(input: {
|
||||
@@ -139,67 +231,87 @@ export async function upsertAsyncToolCall(input: {
|
||||
args?: Record<string, unknown>
|
||||
status?: CopilotAsyncToolStatus
|
||||
}) {
|
||||
const existing = await getAsyncToolCall(input.toolCallId)
|
||||
const incomingStatus = input.status ?? 'pending'
|
||||
if (
|
||||
existing &&
|
||||
(isTerminalAsyncStatus(existing.status) || isDeliveredAsyncStatus(existing.status)) &&
|
||||
!isTerminalAsyncStatus(incomingStatus) &&
|
||||
!isDeliveredAsyncStatus(incomingStatus)
|
||||
) {
|
||||
logger.info('Ignoring async tool upsert that would downgrade terminal state', {
|
||||
toolCallId: input.toolCallId,
|
||||
existingStatus: existing.status,
|
||||
incomingStatus,
|
||||
})
|
||||
return existing
|
||||
}
|
||||
const effectiveRunId = input.runId ?? existing?.runId ?? null
|
||||
if (!effectiveRunId) {
|
||||
logger.warn('upsertAsyncToolCall missing runId and no existing row', {
|
||||
toolCallId: input.toolCallId,
|
||||
toolName: input.toolName,
|
||||
status: input.status ?? 'pending',
|
||||
})
|
||||
return null
|
||||
}
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsUpsertAsyncToolCall,
|
||||
'UPSERT',
|
||||
'copilot_async_tool_calls',
|
||||
{
|
||||
[TraceAttr.ToolCallId]: input.toolCallId,
|
||||
[TraceAttr.ToolName]: input.toolName,
|
||||
[TraceAttr.CopilotAsyncToolStatus]: input.status ?? 'pending',
|
||||
[TraceAttr.RunId]: input.runId ?? undefined,
|
||||
},
|
||||
async () => {
|
||||
const existing = await getAsyncToolCall(input.toolCallId)
|
||||
const incomingStatus = input.status ?? 'pending'
|
||||
if (
|
||||
existing &&
|
||||
(isTerminalAsyncStatus(existing.status) || isDeliveredAsyncStatus(existing.status)) &&
|
||||
!isTerminalAsyncStatus(incomingStatus) &&
|
||||
!isDeliveredAsyncStatus(incomingStatus)
|
||||
) {
|
||||
logger.info('Ignoring async tool upsert that would downgrade terminal state', {
|
||||
toolCallId: input.toolCallId,
|
||||
existingStatus: existing.status,
|
||||
incomingStatus,
|
||||
})
|
||||
return existing
|
||||
}
|
||||
const effectiveRunId = input.runId ?? existing?.runId ?? null
|
||||
if (!effectiveRunId) {
|
||||
logger.warn('upsertAsyncToolCall missing runId and no existing row', {
|
||||
toolCallId: input.toolCallId,
|
||||
toolName: input.toolName,
|
||||
status: input.status ?? 'pending',
|
||||
})
|
||||
return null
|
||||
}
|
||||
|
||||
const now = new Date()
|
||||
const [row] = await db
|
||||
.insert(copilotAsyncToolCalls)
|
||||
.values({
|
||||
runId: effectiveRunId,
|
||||
checkpointId: input.checkpointId ?? null,
|
||||
toolCallId: input.toolCallId,
|
||||
toolName: input.toolName,
|
||||
args: input.args ?? {},
|
||||
status: incomingStatus,
|
||||
updatedAt: now,
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: copilotAsyncToolCalls.toolCallId,
|
||||
set: {
|
||||
runId: effectiveRunId,
|
||||
checkpointId: input.checkpointId ?? null,
|
||||
toolName: input.toolName,
|
||||
args: input.args ?? {},
|
||||
status: incomingStatus,
|
||||
updatedAt: now,
|
||||
},
|
||||
})
|
||||
.returning()
|
||||
const now = new Date()
|
||||
const [row] = await db
|
||||
.insert(copilotAsyncToolCalls)
|
||||
.values({
|
||||
runId: effectiveRunId,
|
||||
checkpointId: input.checkpointId ?? null,
|
||||
toolCallId: input.toolCallId,
|
||||
toolName: input.toolName,
|
||||
args: input.args ?? {},
|
||||
status: incomingStatus,
|
||||
updatedAt: now,
|
||||
})
|
||||
.onConflictDoUpdate({
|
||||
target: copilotAsyncToolCalls.toolCallId,
|
||||
set: {
|
||||
runId: effectiveRunId,
|
||||
checkpointId: input.checkpointId ?? null,
|
||||
toolName: input.toolName,
|
||||
args: input.args ?? {},
|
||||
status: incomingStatus,
|
||||
updatedAt: now,
|
||||
},
|
||||
})
|
||||
.returning()
|
||||
|
||||
return row
|
||||
return row
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function getAsyncToolCall(toolCallId: string) {
|
||||
const [row] = await db
|
||||
.select()
|
||||
.from(copilotAsyncToolCalls)
|
||||
.where(eq(copilotAsyncToolCalls.toolCallId, toolCallId))
|
||||
.limit(1)
|
||||
|
||||
return row ?? null
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsGetAsyncToolCall,
|
||||
'SELECT',
|
||||
'copilot_async_tool_calls',
|
||||
{ [TraceAttr.ToolCallId]: toolCallId },
|
||||
async () => {
|
||||
const [row] = await db
|
||||
.select()
|
||||
.from(copilotAsyncToolCalls)
|
||||
.where(eq(copilotAsyncToolCalls.toolCallId, toolCallId))
|
||||
.limit(1)
|
||||
return row ?? null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function markAsyncToolStatus(
|
||||
@@ -213,28 +325,41 @@ export async function markAsyncToolStatus(
|
||||
completedAt?: Date | null
|
||||
} = {}
|
||||
) {
|
||||
const claimedAt =
|
||||
updates.claimedAt !== undefined
|
||||
? updates.claimedAt
|
||||
: status === 'running' && updates.claimedBy
|
||||
? new Date()
|
||||
: undefined
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsMarkAsyncToolStatus,
|
||||
'UPDATE',
|
||||
'copilot_async_tool_calls',
|
||||
{
|
||||
[TraceAttr.ToolCallId]: toolCallId,
|
||||
[TraceAttr.CopilotAsyncToolStatus]: status,
|
||||
[TraceAttr.CopilotAsyncToolHasError]: !!updates.error,
|
||||
[TraceAttr.CopilotAsyncToolClaimedBy]: updates.claimedBy ?? undefined,
|
||||
},
|
||||
async () => {
|
||||
const claimedAt =
|
||||
updates.claimedAt !== undefined
|
||||
? updates.claimedAt
|
||||
: status === 'running' && updates.claimedBy
|
||||
? new Date()
|
||||
: undefined
|
||||
|
||||
const [row] = await db
|
||||
.update(copilotAsyncToolCalls)
|
||||
.set({
|
||||
status,
|
||||
claimedBy: updates.claimedBy,
|
||||
claimedAt,
|
||||
result: updates.result,
|
||||
error: updates.error,
|
||||
completedAt: updates.completedAt,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(copilotAsyncToolCalls.toolCallId, toolCallId))
|
||||
.returning()
|
||||
const [row] = await db
|
||||
.update(copilotAsyncToolCalls)
|
||||
.set({
|
||||
status,
|
||||
claimedBy: updates.claimedBy,
|
||||
claimedAt,
|
||||
result: updates.result,
|
||||
error: updates.error,
|
||||
completedAt: updates.completedAt,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(copilotAsyncToolCalls.toolCallId, toolCallId))
|
||||
.returning()
|
||||
|
||||
return row ?? null
|
||||
return row ?? null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function markAsyncToolRunning(toolCallId: string, claimedBy: string) {
|
||||
@@ -278,57 +403,91 @@ export async function markAsyncToolDelivered(toolCallId: string) {
|
||||
}
|
||||
|
||||
export async function listAsyncToolCallsForRun(runId: string) {
|
||||
return db
|
||||
.select()
|
||||
.from(copilotAsyncToolCalls)
|
||||
.where(eq(copilotAsyncToolCalls.runId, runId))
|
||||
.orderBy(desc(copilotAsyncToolCalls.createdAt))
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsListForRun,
|
||||
'SELECT',
|
||||
'copilot_async_tool_calls',
|
||||
{ [TraceAttr.RunId]: runId },
|
||||
async () =>
|
||||
db
|
||||
.select()
|
||||
.from(copilotAsyncToolCalls)
|
||||
.where(eq(copilotAsyncToolCalls.runId, runId))
|
||||
.orderBy(desc(copilotAsyncToolCalls.createdAt))
|
||||
)
|
||||
}
|
||||
|
||||
export async function getAsyncToolCalls(toolCallIds: string[]) {
|
||||
if (toolCallIds.length === 0) return []
|
||||
return db
|
||||
.select()
|
||||
.from(copilotAsyncToolCalls)
|
||||
.where(inArray(copilotAsyncToolCalls.toolCallId, toolCallIds))
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsGetMany,
|
||||
'SELECT',
|
||||
'copilot_async_tool_calls',
|
||||
{ [TraceAttr.CopilotAsyncToolIdsCount]: toolCallIds.length },
|
||||
async () =>
|
||||
db
|
||||
.select()
|
||||
.from(copilotAsyncToolCalls)
|
||||
.where(inArray(copilotAsyncToolCalls.toolCallId, toolCallIds))
|
||||
)
|
||||
}
|
||||
|
||||
export async function claimCompletedAsyncToolCall(toolCallId: string, workerId: string) {
|
||||
const [row] = await db
|
||||
.update(copilotAsyncToolCalls)
|
||||
.set({
|
||||
claimedBy: workerId,
|
||||
claimedAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(
|
||||
and(
|
||||
eq(copilotAsyncToolCalls.toolCallId, toolCallId),
|
||||
inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']),
|
||||
isNull(copilotAsyncToolCalls.claimedBy)
|
||||
)
|
||||
)
|
||||
.returning()
|
||||
|
||||
return row ?? null
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsClaimCompleted,
|
||||
'UPDATE',
|
||||
'copilot_async_tool_calls',
|
||||
{
|
||||
[TraceAttr.ToolCallId]: toolCallId,
|
||||
[TraceAttr.CopilotAsyncToolWorkerId]: workerId,
|
||||
},
|
||||
async () => {
|
||||
const [row] = await db
|
||||
.update(copilotAsyncToolCalls)
|
||||
.set({
|
||||
claimedBy: workerId,
|
||||
claimedAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(
|
||||
and(
|
||||
eq(copilotAsyncToolCalls.toolCallId, toolCallId),
|
||||
inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']),
|
||||
isNull(copilotAsyncToolCalls.claimedBy)
|
||||
)
|
||||
)
|
||||
.returning()
|
||||
return row ?? null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function releaseCompletedAsyncToolClaim(toolCallId: string, workerId: string) {
|
||||
const [row] = await db
|
||||
.update(copilotAsyncToolCalls)
|
||||
.set({
|
||||
claimedBy: null,
|
||||
claimedAt: null,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(
|
||||
and(
|
||||
eq(copilotAsyncToolCalls.toolCallId, toolCallId),
|
||||
inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']),
|
||||
eq(copilotAsyncToolCalls.claimedBy, workerId)
|
||||
)
|
||||
)
|
||||
.returning()
|
||||
|
||||
return row ?? null
|
||||
return withDbSpan(
|
||||
TraceSpan.CopilotAsyncRunsReleaseClaim,
|
||||
'UPDATE',
|
||||
'copilot_async_tool_calls',
|
||||
{
|
||||
[TraceAttr.ToolCallId]: toolCallId,
|
||||
[TraceAttr.CopilotAsyncToolWorkerId]: workerId,
|
||||
},
|
||||
async () => {
|
||||
const [row] = await db
|
||||
.update(copilotAsyncToolCalls)
|
||||
.set({
|
||||
claimedBy: null,
|
||||
claimedAt: null,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(
|
||||
and(
|
||||
eq(copilotAsyncToolCalls.toolCallId, toolCallId),
|
||||
inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']),
|
||||
eq(copilotAsyncToolCalls.claimedBy, workerId)
|
||||
)
|
||||
)
|
||||
.returning()
|
||||
return row ?? null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { type Context as OtelContext, context as otelContextApi } from '@opentelemetry/api'
|
||||
import { db } from '@sim/db'
|
||||
import { copilotChats } from '@sim/db/schema'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { eq, sql } from 'drizzle-orm'
|
||||
import { type NextRequest, NextResponse } from 'next/server'
|
||||
import { z } from 'zod'
|
||||
@@ -20,11 +20,14 @@ import { finalizeAssistantTurn } from '@/lib/copilot/chat/terminal-state'
|
||||
import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context'
|
||||
import { COPILOT_REQUEST_MODES } from '@/lib/copilot/constants'
|
||||
import {
|
||||
createBadRequestResponse,
|
||||
createRequestTracker,
|
||||
createUnauthorizedResponse,
|
||||
} from '@/lib/copilot/request/http'
|
||||
CopilotChatPersistOutcome,
|
||||
CopilotTransport,
|
||||
} from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { createBadRequestResponse, createUnauthorizedResponse } from '@/lib/copilot/request/http'
|
||||
import { createSSEStream, SSE_RESPONSE_HEADERS } from '@/lib/copilot/request/lifecycle/start'
|
||||
import { startCopilotOtelRoot, withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
import {
|
||||
acquirePendingChatStream,
|
||||
getPendingChatStreamId,
|
||||
@@ -258,6 +261,15 @@ async function persistUserMessage(params: {
|
||||
contexts?: UnifiedChatRequest['contexts']
|
||||
workspaceId?: string
|
||||
notifyWorkspaceStatus: boolean
|
||||
/**
|
||||
* Root context for the mothership request. When present the persist
|
||||
* span is created explicitly under it, which avoids relying on
|
||||
* AsyncLocalStorage propagation — some upstream awaits (Next.js
|
||||
* framework frames, Turbopack-instrumented I/O) can swap the active
|
||||
* store out from under us in dev, which would otherwise leave this
|
||||
* span parented to the about-to-be-dropped Next.js HTTP span.
|
||||
*/
|
||||
parentOtelContext?: OtelContext
|
||||
}): Promise<unknown[] | undefined> {
|
||||
const {
|
||||
chatId,
|
||||
@@ -267,31 +279,60 @@ async function persistUserMessage(params: {
|
||||
contexts,
|
||||
workspaceId,
|
||||
notifyWorkspaceStatus,
|
||||
parentOtelContext,
|
||||
} = params
|
||||
if (!chatId) return undefined
|
||||
|
||||
const userMsg = buildPersistedUserMessage({
|
||||
id: userMessageId,
|
||||
content: message,
|
||||
fileAttachments,
|
||||
contexts,
|
||||
})
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotChatPersistUserMessage,
|
||||
{
|
||||
[TraceAttr.DbSystem]: 'postgresql',
|
||||
[TraceAttr.DbSqlTable]: 'copilot_chats',
|
||||
[TraceAttr.ChatId]: chatId,
|
||||
[TraceAttr.ChatUserMessageId]: userMessageId,
|
||||
[TraceAttr.ChatMessageBytes]: message.length,
|
||||
[TraceAttr.ChatFileAttachmentCount]: fileAttachments?.length ?? 0,
|
||||
[TraceAttr.ChatContextCount]: contexts?.length ?? 0,
|
||||
...(workspaceId ? { [TraceAttr.WorkspaceId]: workspaceId } : {}),
|
||||
},
|
||||
async (span) => {
|
||||
const userMsg = buildPersistedUserMessage({
|
||||
id: userMessageId,
|
||||
content: message,
|
||||
fileAttachments,
|
||||
contexts,
|
||||
})
|
||||
|
||||
const [updated] = await db
|
||||
.update(copilotChats)
|
||||
.set({
|
||||
messages: sql`${copilotChats.messages} || ${JSON.stringify([userMsg])}::jsonb`,
|
||||
conversationId: userMessageId,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(copilotChats.id, chatId))
|
||||
.returning({ messages: copilotChats.messages })
|
||||
const [updated] = await db
|
||||
.update(copilotChats)
|
||||
.set({
|
||||
messages: sql`${copilotChats.messages} || ${JSON.stringify([userMsg])}::jsonb`,
|
||||
conversationId: userMessageId,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(copilotChats.id, chatId))
|
||||
.returning({ messages: copilotChats.messages })
|
||||
|
||||
if (notifyWorkspaceStatus && updated && workspaceId) {
|
||||
taskPubSub?.publishStatusChanged({ workspaceId, chatId, type: 'started' })
|
||||
}
|
||||
const messagesAfter = Array.isArray(updated?.messages) ? updated.messages : undefined
|
||||
span.setAttributes({
|
||||
[TraceAttr.ChatPersistOutcome]: updated
|
||||
? CopilotChatPersistOutcome.Appended
|
||||
: CopilotChatPersistOutcome.ChatNotFound,
|
||||
[TraceAttr.ChatMessagesAfter]: messagesAfter?.length ?? 0,
|
||||
})
|
||||
|
||||
return Array.isArray(updated?.messages) ? updated.messages : undefined
|
||||
if (notifyWorkspaceStatus && updated && workspaceId) {
|
||||
taskPubSub?.publishStatusChanged({
|
||||
workspaceId,
|
||||
chatId,
|
||||
type: 'started',
|
||||
})
|
||||
}
|
||||
|
||||
return messagesAfter
|
||||
},
|
||||
parentOtelContext
|
||||
)
|
||||
}
|
||||
|
||||
async function buildInitialExecutionContext(params: {
|
||||
@@ -336,12 +377,42 @@ function buildOnComplete(params: {
|
||||
requestId: string
|
||||
workspaceId?: string
|
||||
notifyWorkspaceStatus: boolean
|
||||
/**
|
||||
* Root agent span for this request. When present, the final
|
||||
* assistant message + invoked tool calls are recorded as
|
||||
* `gen_ai.output.messages` on it before persistence runs. Keeps
|
||||
* the Honeycomb Gen AI view complete across both the Sim root
|
||||
* span and the Go-side `llm.stream` spans.
|
||||
*/
|
||||
otelRoot?: {
|
||||
setOutputMessages: (output: {
|
||||
assistantText?: string
|
||||
toolCalls?: Array<{ id: string; name: string; arguments?: Record<string, unknown> }>
|
||||
}) => void
|
||||
}
|
||||
}) {
|
||||
const { chatId, userMessageId, requestId, workspaceId, notifyWorkspaceStatus } = params
|
||||
const { chatId, userMessageId, requestId, workspaceId, notifyWorkspaceStatus, otelRoot } = params
|
||||
|
||||
return async (result: OrchestratorResult) => {
|
||||
if (otelRoot && result.success) {
|
||||
otelRoot.setOutputMessages({
|
||||
assistantText: result.content,
|
||||
toolCalls: result.toolCalls?.map((tc) => ({
|
||||
id: tc.id,
|
||||
name: tc.name,
|
||||
arguments: tc.params,
|
||||
})),
|
||||
})
|
||||
}
|
||||
|
||||
if (!chatId) return
|
||||
|
||||
// On cancel, /chat/stop is the sole DB writer — it persists
|
||||
// partial content AND clears conversationId in one UPDATE. If we
|
||||
// finalize here first the filter misses and content vanishes.
|
||||
// Real errors still finalize so the stream marker clears.
|
||||
if (result.cancelled) return
|
||||
|
||||
try {
|
||||
await finalizeAssistantTurn({
|
||||
chatId,
|
||||
@@ -529,10 +600,23 @@ async function resolveBranch(params: {
|
||||
}
|
||||
|
||||
export async function handleUnifiedChatPost(req: NextRequest) {
|
||||
const tracker = createRequestTracker(false)
|
||||
let actualChatId: string | undefined
|
||||
let userMessageId = ''
|
||||
let chatStreamLockAcquired = false
|
||||
// Started once we've parsed the body (need userMessageId to stamp as
|
||||
// streamId). Every subsequent span (persistUserMessage,
|
||||
// createRunSegment, the whole SSE stream, etc.) nests under this
|
||||
// root via AsyncLocalStorage / explicit propagation, and the stream's
|
||||
// terminal code path calls finish() when the request actually ends.
|
||||
// Errors thrown from the handler before the stream starts are
|
||||
// finished here in the catch below.
|
||||
let otelRoot: ReturnType<typeof startCopilotOtelRoot> | undefined
|
||||
// Canonical logical ID; assigned from otelRoot.requestId (the OTel
|
||||
// trace ID) as soon as startCopilotOtelRoot runs. Empty only in the
|
||||
// narrow pre-otelRoot window where errors don't correlate anyway.
|
||||
let requestId = ''
|
||||
const executionId = crypto.randomUUID()
|
||||
const runId = crypto.randomUUID()
|
||||
|
||||
try {
|
||||
const session = await getSession()
|
||||
@@ -540,212 +624,359 @@ export async function handleUnifiedChatPost(req: NextRequest) {
|
||||
return createUnauthorizedResponse()
|
||||
}
|
||||
const authenticatedUserId = session.user.id
|
||||
const authenticatedUserEmail = session.user.email
|
||||
|
||||
const body = ChatMessageSchema.parse(await req.json())
|
||||
const normalizedContexts = normalizeContexts(body.contexts)
|
||||
const normalizedContexts = normalizeContexts(body.contexts) ?? []
|
||||
userMessageId = body.userMessageId || crypto.randomUUID()
|
||||
|
||||
const branch = await resolveBranch({
|
||||
authenticatedUserId,
|
||||
workflowId: body.workflowId,
|
||||
workflowName: body.workflowName,
|
||||
workspaceId: body.workspaceId,
|
||||
model: body.model,
|
||||
mode: body.mode,
|
||||
provider: body.provider,
|
||||
})
|
||||
if (branch instanceof NextResponse) {
|
||||
return branch
|
||||
}
|
||||
|
||||
let currentChat: ChatLoadResult['chat'] = null
|
||||
let conversationHistory: unknown[] = []
|
||||
let chatIsNew = false
|
||||
actualChatId = body.chatId
|
||||
|
||||
if (body.chatId || body.createNewChat) {
|
||||
const chatResult = await resolveOrCreateChat({
|
||||
chatId: body.chatId,
|
||||
userId: authenticatedUserId,
|
||||
...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}),
|
||||
workspaceId: branch.workspaceId,
|
||||
model: branch.titleModel,
|
||||
type: branch.kind === 'workflow' ? 'copilot' : 'mothership',
|
||||
})
|
||||
currentChat = chatResult.chat
|
||||
actualChatId = chatResult.chatId || body.chatId
|
||||
chatIsNew = chatResult.isNew
|
||||
conversationHistory = Array.isArray(chatResult.conversationHistory)
|
||||
? chatResult.conversationHistory
|
||||
: []
|
||||
|
||||
if (body.chatId && !currentChat) {
|
||||
return NextResponse.json({ error: 'Chat not found' }, { status: 404 })
|
||||
}
|
||||
}
|
||||
|
||||
if (chatIsNew && actualChatId && body.resourceAttachments?.length) {
|
||||
await persistChatResources(
|
||||
actualChatId,
|
||||
body.resourceAttachments.map((r) => ({
|
||||
type: r.type,
|
||||
id: r.id,
|
||||
title: r.title ?? GENERIC_RESOURCE_TITLE[r.type],
|
||||
}))
|
||||
)
|
||||
}
|
||||
|
||||
if (actualChatId) {
|
||||
chatStreamLockAcquired = await acquirePendingChatStream(actualChatId, userMessageId)
|
||||
if (!chatStreamLockAcquired) {
|
||||
const activeStreamId = await getPendingChatStreamId(actualChatId)
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: 'A response is already in progress for this chat.',
|
||||
...(activeStreamId ? { activeStreamId } : {}),
|
||||
},
|
||||
{ status: 409 }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
const workspaceId = branch.workspaceId
|
||||
const userPermissionPromise = workspaceId
|
||||
? getUserEntityPermissions(authenticatedUserId, 'workspace', workspaceId).catch((error) => {
|
||||
logger.warn('Failed to load user permissions', {
|
||||
error: toError(error).message,
|
||||
workspaceId,
|
||||
})
|
||||
return null
|
||||
})
|
||||
: Promise.resolve(null)
|
||||
const workspaceContextPromise =
|
||||
branch.kind === 'workspace'
|
||||
? generateWorkspaceContext(branch.workspaceId, authenticatedUserId)
|
||||
: Promise.resolve(undefined)
|
||||
const agentContextsPromise = resolveAgentContexts({
|
||||
contexts: normalizedContexts,
|
||||
resourceAttachments: body.resourceAttachments,
|
||||
userId: authenticatedUserId,
|
||||
message: body.message,
|
||||
workspaceId,
|
||||
chatId: actualChatId,
|
||||
requestId: tracker.requestId,
|
||||
})
|
||||
const persistedMessagesPromise = persistUserMessage({
|
||||
chatId: actualChatId,
|
||||
userMessageId,
|
||||
message: body.message,
|
||||
fileAttachments: body.fileAttachments,
|
||||
contexts: normalizedContexts,
|
||||
workspaceId,
|
||||
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
|
||||
})
|
||||
const executionContextPromise = branch.buildExecutionContext({
|
||||
userId: authenticatedUserId,
|
||||
chatId: actualChatId,
|
||||
userTimezone: body.userTimezone,
|
||||
messageId: userMessageId,
|
||||
})
|
||||
|
||||
const [agentContexts, userPermission, workspaceContext, persistedMessages, executionContext] =
|
||||
await Promise.all([
|
||||
agentContextsPromise,
|
||||
userPermissionPromise,
|
||||
workspaceContextPromise,
|
||||
persistedMessagesPromise,
|
||||
executionContextPromise,
|
||||
])
|
||||
|
||||
if (persistedMessages) {
|
||||
conversationHistory = persistedMessages.filter((message) => {
|
||||
const record = message as Record<string, unknown>
|
||||
return record.id !== userMessageId
|
||||
})
|
||||
}
|
||||
|
||||
const requestPayload =
|
||||
branch.kind === 'workflow'
|
||||
? await branch.buildPayload({
|
||||
message: body.message,
|
||||
userId: authenticatedUserId,
|
||||
userMessageId,
|
||||
chatId: actualChatId,
|
||||
contexts: agentContexts,
|
||||
fileAttachments: body.fileAttachments,
|
||||
userPermission: userPermission ?? undefined,
|
||||
userTimezone: body.userTimezone,
|
||||
workflowId: branch.workflowId,
|
||||
workflowName: branch.workflowName,
|
||||
workspaceId: branch.workspaceId,
|
||||
mode: branch.mode,
|
||||
provider: branch.provider,
|
||||
commands: body.commands,
|
||||
prefetch: body.prefetch,
|
||||
implicitFeedback: body.implicitFeedback,
|
||||
})
|
||||
: await branch.buildPayload({
|
||||
message: body.message,
|
||||
userId: authenticatedUserId,
|
||||
userMessageId,
|
||||
chatId: actualChatId,
|
||||
contexts: agentContexts,
|
||||
fileAttachments: body.fileAttachments,
|
||||
userPermission: userPermission ?? undefined,
|
||||
userTimezone: body.userTimezone,
|
||||
workspaceContext,
|
||||
})
|
||||
|
||||
const executionId = crypto.randomUUID()
|
||||
const runId = crypto.randomUUID()
|
||||
|
||||
const stream = createSSEStream({
|
||||
requestPayload,
|
||||
userId: authenticatedUserId,
|
||||
otelRoot = startCopilotOtelRoot({
|
||||
streamId: userMessageId,
|
||||
executionId,
|
||||
runId,
|
||||
chatId: actualChatId,
|
||||
currentChat,
|
||||
isNewChat: conversationHistory.length === 0,
|
||||
message: body.message,
|
||||
titleModel: branch.titleModel,
|
||||
...(branch.titleProvider ? { titleProvider: branch.titleProvider } : {}),
|
||||
requestId: tracker.requestId,
|
||||
workspaceId,
|
||||
orchestrateOptions: {
|
||||
userId: authenticatedUserId,
|
||||
...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}),
|
||||
...(branch.kind === 'workspace' ? { workspaceId: branch.workspaceId } : {}),
|
||||
transport: CopilotTransport.Stream,
|
||||
userMessagePreview: body.message,
|
||||
})
|
||||
if (otelRoot.requestId) {
|
||||
requestId = otelRoot.requestId
|
||||
}
|
||||
// Identity stamp — Go already stamps `user.id` on spans from the
|
||||
// validated API-key path, but Sim is the only side of the wire
|
||||
// that knows the human-facing email. Stamping both on the Sim
|
||||
// root (so they show up on `rootAttrs` in Tempo search) saves
|
||||
// the "turn user.id into a real person" round-trip to the DB
|
||||
// for every ad-hoc investigation.
|
||||
otelRoot.span.setAttribute(TraceAttr.UserId, authenticatedUserId)
|
||||
if (authenticatedUserEmail) {
|
||||
otelRoot.span.setAttribute(TraceAttr.UserEmail, authenticatedUserEmail)
|
||||
}
|
||||
// `setInputMessages` is internally gated on
|
||||
// OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT; safe to call.
|
||||
otelRoot.setInputMessages({ userMessage: body.message })
|
||||
|
||||
// Wrap the rest of the handler so nested spans attach to the
|
||||
// root via AsyncLocalStorage (otherwise they orphan into new traces).
|
||||
const activeOtelRoot = otelRoot
|
||||
return await otelContextApi.with(activeOtelRoot.context, async () => {
|
||||
const branch = await withCopilotSpan(
|
||||
TraceSpan.CopilotChatResolveBranch,
|
||||
{
|
||||
[TraceAttr.WorkflowId]: body.workflowId ?? '',
|
||||
[TraceAttr.WorkspaceId]: body.workspaceId ?? '',
|
||||
},
|
||||
() =>
|
||||
resolveBranch({
|
||||
authenticatedUserId,
|
||||
workflowId: body.workflowId,
|
||||
workflowName: body.workflowName,
|
||||
workspaceId: body.workspaceId,
|
||||
model: body.model,
|
||||
mode: body.mode,
|
||||
provider: body.provider,
|
||||
}),
|
||||
activeOtelRoot.context
|
||||
)
|
||||
if (branch instanceof NextResponse) {
|
||||
// Non-actionable 4xx (400 bad-request from resolveBranch): stamp
|
||||
// outcome=error for dashboards but leave span status UNSET so
|
||||
// error alerts don't fire on normal validation rejections.
|
||||
activeOtelRoot.span.setAttribute(TraceAttr.HttpStatusCode, branch.status)
|
||||
activeOtelRoot.finish('error')
|
||||
return branch
|
||||
}
|
||||
|
||||
let currentChat: ChatLoadResult['chat'] = null
|
||||
let conversationHistory: unknown[] = []
|
||||
let chatIsNew = false
|
||||
actualChatId = body.chatId
|
||||
|
||||
if (body.chatId || body.createNewChat) {
|
||||
const chatResult = await withCopilotSpan(
|
||||
TraceSpan.CopilotChatResolveOrCreateChat,
|
||||
{
|
||||
[TraceAttr.ChatPreexisting]: !!body.chatId,
|
||||
[TraceAttr.CopilotChatIsNew]: !!body.createNewChat,
|
||||
},
|
||||
() =>
|
||||
resolveOrCreateChat({
|
||||
chatId: body.chatId,
|
||||
userId: authenticatedUserId,
|
||||
...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}),
|
||||
workspaceId: branch.workspaceId,
|
||||
model: branch.titleModel,
|
||||
type: branch.kind === 'workflow' ? 'copilot' : 'mothership',
|
||||
}),
|
||||
activeOtelRoot.context
|
||||
)
|
||||
currentChat = chatResult.chat
|
||||
actualChatId = chatResult.chatId || body.chatId
|
||||
chatIsNew = chatResult.isNew
|
||||
conversationHistory = Array.isArray(chatResult.conversationHistory)
|
||||
? chatResult.conversationHistory
|
||||
: []
|
||||
|
||||
if (body.chatId && !currentChat) {
|
||||
activeOtelRoot.span.setAttribute(TraceAttr.HttpStatusCode, 404)
|
||||
activeOtelRoot.finish('error')
|
||||
return NextResponse.json({ error: 'Chat not found' }, { status: 404 })
|
||||
}
|
||||
}
|
||||
|
||||
if (chatIsNew && actualChatId && body.resourceAttachments?.length) {
|
||||
await persistChatResources(
|
||||
actualChatId,
|
||||
body.resourceAttachments.map((r) => ({
|
||||
type: r.type,
|
||||
id: r.id,
|
||||
title: r.title ?? GENERIC_RESOURCE_TITLE[r.type],
|
||||
}))
|
||||
)
|
||||
}
|
||||
|
||||
let pendingStreamWaitMs = 0
|
||||
if (actualChatId) {
|
||||
const lockStart = Date.now()
|
||||
chatStreamLockAcquired = await acquirePendingChatStream(actualChatId, userMessageId)
|
||||
pendingStreamWaitMs = Date.now() - lockStart
|
||||
if (!chatStreamLockAcquired) {
|
||||
const activeStreamId = await getPendingChatStreamId(actualChatId)
|
||||
// 409 is in the actionable set (see `isActionableErrorStatus`);
|
||||
// pass a synthesized Error so the span escalates to ERROR status
|
||||
// and surfaces on pending-stream-collision dashboards.
|
||||
activeOtelRoot.span.setAttribute(TraceAttr.HttpStatusCode, 409)
|
||||
activeOtelRoot.finish(
|
||||
'error',
|
||||
new Error('A response is already in progress for this chat.')
|
||||
)
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: 'A response is already in progress for this chat.',
|
||||
...(activeStreamId ? { activeStreamId } : {}),
|
||||
},
|
||||
{ status: 409 }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Stamp request-shape metadata on the root `gen_ai.agent.execute`
|
||||
// span now that `branch`, attachment counts, and the pending-stream
|
||||
// wait are all known. This turns dashboard slicing by
|
||||
// `copilot.surface` / `copilot.mode` / `copilot.interrupted_prior_stream`
|
||||
// into a simple TraceQL filter.
|
||||
activeOtelRoot.setRequestShape({
|
||||
branchKind: branch.kind,
|
||||
mode: body.mode,
|
||||
model: body.model,
|
||||
provider: body.provider,
|
||||
createNewChat: body.createNewChat,
|
||||
prefetch: body.prefetch,
|
||||
fileAttachmentsCount: body.fileAttachments?.length ?? 0,
|
||||
resourceAttachmentsCount: body.resourceAttachments?.length ?? 0,
|
||||
contextsCount: normalizedContexts.length,
|
||||
commandsCount: body.commands?.length ?? 0,
|
||||
pendingStreamWaitMs,
|
||||
})
|
||||
|
||||
const workspaceId = branch.workspaceId
|
||||
const userPermissionPromise = workspaceId
|
||||
? getUserEntityPermissions(authenticatedUserId, 'workspace', workspaceId).catch((error) => {
|
||||
logger.warn('Failed to load user permissions', {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
workspaceId,
|
||||
})
|
||||
return null
|
||||
})
|
||||
: Promise.resolve(null)
|
||||
// Wrap the pre-LLM prep work in spans so the trace waterfall shows
|
||||
// where time is going between "request received" and "llm.stream
|
||||
// opens". Previously these ran bare under the root and inflated the
|
||||
// apparent "gap" before the model call. Each promise is its own
|
||||
// span; they run concurrently under Promise.all below.
|
||||
const workspaceContextPromise =
|
||||
branch.kind === 'workspace'
|
||||
? withCopilotSpan(
|
||||
TraceSpan.CopilotChatBuildWorkspaceContext,
|
||||
{ [TraceAttr.WorkspaceId]: branch.workspaceId },
|
||||
() => generateWorkspaceContext(branch.workspaceId, authenticatedUserId),
|
||||
activeOtelRoot.context
|
||||
)
|
||||
: Promise.resolve(undefined)
|
||||
const agentContextsPromise = withCopilotSpan(
|
||||
TraceSpan.CopilotChatResolveAgentContexts,
|
||||
{
|
||||
[TraceAttr.CopilotContextsCount]: normalizedContexts.length,
|
||||
[TraceAttr.CopilotResourceAttachmentsCount]: body.resourceAttachments?.length ?? 0,
|
||||
},
|
||||
() =>
|
||||
resolveAgentContexts({
|
||||
contexts: normalizedContexts,
|
||||
resourceAttachments: body.resourceAttachments,
|
||||
userId: authenticatedUserId,
|
||||
message: body.message,
|
||||
workspaceId,
|
||||
chatId: actualChatId,
|
||||
requestId,
|
||||
}),
|
||||
activeOtelRoot.context
|
||||
)
|
||||
const persistedMessagesPromise = persistUserMessage({
|
||||
chatId: actualChatId,
|
||||
userMessageId,
|
||||
message: body.message,
|
||||
fileAttachments: body.fileAttachments,
|
||||
contexts: normalizedContexts,
|
||||
workspaceId,
|
||||
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
|
||||
parentOtelContext: activeOtelRoot.context,
|
||||
})
|
||||
const executionContextPromise = withCopilotSpan(
|
||||
TraceSpan.CopilotChatBuildExecutionContext,
|
||||
{ [TraceAttr.CopilotBranchKind]: branch.kind },
|
||||
() =>
|
||||
branch.buildExecutionContext({
|
||||
userId: authenticatedUserId,
|
||||
chatId: actualChatId,
|
||||
userTimezone: body.userTimezone,
|
||||
messageId: userMessageId,
|
||||
}),
|
||||
activeOtelRoot.context
|
||||
)
|
||||
|
||||
const [agentContexts, userPermission, workspaceContext, persistedMessages, executionContext] =
|
||||
await Promise.all([
|
||||
agentContextsPromise,
|
||||
userPermissionPromise,
|
||||
workspaceContextPromise,
|
||||
persistedMessagesPromise,
|
||||
executionContextPromise,
|
||||
])
|
||||
|
||||
if (persistedMessages) {
|
||||
conversationHistory = persistedMessages.filter((message) => {
|
||||
const record = message as Record<string, unknown>
|
||||
return record.id !== userMessageId
|
||||
})
|
||||
}
|
||||
|
||||
// buildPayload is the last synchronous step before the outbound
|
||||
// Sim → Go HTTP call. It runs per-tool schema generation (subscription
|
||||
// lookup + registry iteration, cached 30s) and file upload tracking
|
||||
// per attachment. Wrapping it so we can see how much of the
|
||||
// "before llm.stream" gap lives here vs elsewhere.
|
||||
const requestPayload = await withCopilotSpan(
|
||||
TraceSpan.CopilotChatBuildPayload,
|
||||
{
|
||||
[TraceAttr.CopilotBranchKind]: branch.kind,
|
||||
[TraceAttr.CopilotFileAttachmentsCount]: body.fileAttachments?.length ?? 0,
|
||||
[TraceAttr.CopilotContextsCount]: normalizedContexts.length,
|
||||
},
|
||||
() =>
|
||||
branch.kind === 'workflow'
|
||||
? branch.buildPayload({
|
||||
message: body.message,
|
||||
userId: authenticatedUserId,
|
||||
userMessageId,
|
||||
chatId: actualChatId,
|
||||
contexts: agentContexts,
|
||||
fileAttachments: body.fileAttachments,
|
||||
userPermission: userPermission ?? undefined,
|
||||
userTimezone: body.userTimezone,
|
||||
workflowId: branch.workflowId,
|
||||
workflowName: branch.workflowName,
|
||||
workspaceId: branch.workspaceId,
|
||||
mode: branch.mode,
|
||||
provider: branch.provider,
|
||||
commands: body.commands,
|
||||
prefetch: body.prefetch,
|
||||
implicitFeedback: body.implicitFeedback,
|
||||
})
|
||||
: branch.buildPayload({
|
||||
message: body.message,
|
||||
userId: authenticatedUserId,
|
||||
userMessageId,
|
||||
chatId: actualChatId,
|
||||
contexts: agentContexts,
|
||||
fileAttachments: body.fileAttachments,
|
||||
userPermission: userPermission ?? undefined,
|
||||
userTimezone: body.userTimezone,
|
||||
workspaceContext,
|
||||
}),
|
||||
activeOtelRoot.context
|
||||
)
|
||||
|
||||
if (actualChatId) {
|
||||
activeOtelRoot.span.setAttribute(TraceAttr.ChatId, actualChatId)
|
||||
}
|
||||
if (workspaceId) {
|
||||
activeOtelRoot.span.setAttribute(TraceAttr.WorkspaceId, workspaceId)
|
||||
}
|
||||
|
||||
const stream = createSSEStream({
|
||||
requestPayload,
|
||||
userId: authenticatedUserId,
|
||||
streamId: userMessageId,
|
||||
executionId,
|
||||
runId,
|
||||
goRoute: branch.goRoute,
|
||||
autoExecuteTools: true,
|
||||
interactive: true,
|
||||
executionContext,
|
||||
onComplete: buildOnComplete({
|
||||
chatId: actualChatId,
|
||||
currentChat,
|
||||
isNewChat: conversationHistory.length === 0,
|
||||
message: body.message,
|
||||
titleModel: branch.titleModel,
|
||||
...(branch.titleProvider ? { titleProvider: branch.titleProvider } : {}),
|
||||
requestId,
|
||||
workspaceId,
|
||||
otelRoot: activeOtelRoot,
|
||||
orchestrateOptions: {
|
||||
userId: authenticatedUserId,
|
||||
...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}),
|
||||
...(branch.kind === 'workspace' ? { workspaceId: branch.workspaceId } : {}),
|
||||
chatId: actualChatId,
|
||||
userMessageId,
|
||||
requestId: tracker.requestId,
|
||||
workspaceId,
|
||||
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
|
||||
}),
|
||||
onError: buildOnError({
|
||||
chatId: actualChatId,
|
||||
userMessageId,
|
||||
requestId: tracker.requestId,
|
||||
workspaceId,
|
||||
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
|
||||
}),
|
||||
},
|
||||
})
|
||||
executionId,
|
||||
runId,
|
||||
goRoute: branch.goRoute,
|
||||
autoExecuteTools: true,
|
||||
interactive: true,
|
||||
executionContext,
|
||||
onComplete: buildOnComplete({
|
||||
chatId: actualChatId,
|
||||
userMessageId,
|
||||
requestId,
|
||||
workspaceId,
|
||||
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
|
||||
otelRoot,
|
||||
}),
|
||||
onError: buildOnError({
|
||||
chatId: actualChatId,
|
||||
userMessageId,
|
||||
requestId,
|
||||
workspaceId,
|
||||
notifyWorkspaceStatus: branch.notifyWorkspaceStatus,
|
||||
}),
|
||||
},
|
||||
})
|
||||
|
||||
return new Response(stream, { headers: SSE_RESPONSE_HEADERS })
|
||||
// Expose the root gen_ai.agent.execute span's trace identity to
|
||||
// the browser so subsequent HTTP calls (stop, abort, confirm,
|
||||
// SSE reconnect) can echo it back as `traceparent` — making
|
||||
// all side-channel work on this request appear as child spans
|
||||
// of this same trace in Tempo instead of disconnected roots.
|
||||
// W3C traceparent format: `00-<trace-id>-<parent-id>-<flags>`.
|
||||
const rootCtx = activeOtelRoot.span.spanContext()
|
||||
const rootTraceparent = `00-${rootCtx.traceId}-${rootCtx.spanId}-${
|
||||
(rootCtx.traceFlags & 0x1) === 0x1 ? '01' : '00'
|
||||
}`
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
...SSE_RESPONSE_HEADERS,
|
||||
traceparent: rootTraceparent,
|
||||
},
|
||||
})
|
||||
}) // end otelContextApi.with
|
||||
} catch (error) {
|
||||
if (chatStreamLockAcquired && actualChatId && userMessageId) {
|
||||
await releasePendingChatStream(actualChatId, userMessageId)
|
||||
}
|
||||
otelRoot?.finish('error', error)
|
||||
|
||||
if (error instanceof z.ZodError) {
|
||||
return NextResponse.json(
|
||||
@@ -754,13 +985,15 @@ export async function handleUnifiedChatPost(req: NextRequest) {
|
||||
)
|
||||
}
|
||||
|
||||
logger.error(`[${tracker.requestId}] Error handling unified chat request`, {
|
||||
logger.error(`[${requestId}] Error handling unified chat request`, {
|
||||
error: error instanceof Error ? error.message : 'Unknown error',
|
||||
stack: error instanceof Error ? error.stack : undefined,
|
||||
})
|
||||
|
||||
return NextResponse.json(
|
||||
{ error: error instanceof Error ? error.message : 'Internal server error' },
|
||||
{
|
||||
error: error instanceof Error ? error.message : 'Internal server error',
|
||||
},
|
||||
{ status: 500 }
|
||||
)
|
||||
}
|
||||
|
||||
@@ -2,6 +2,10 @@ import { db } from '@sim/db'
|
||||
import { copilotChats } from '@sim/db/schema'
|
||||
import { and, eq, sql } from 'drizzle-orm'
|
||||
import type { PersistedMessage } from '@/lib/copilot/chat/persisted-message'
|
||||
import { CopilotChatFinalizeOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
|
||||
interface FinalizeAssistantTurnParams {
|
||||
chatId: string
|
||||
@@ -19,39 +23,65 @@ export async function finalizeAssistantTurn({
|
||||
userMessageId,
|
||||
assistantMessage,
|
||||
}: FinalizeAssistantTurnParams): Promise<void> {
|
||||
const [row] = await db
|
||||
.select({ messages: copilotChats.messages })
|
||||
.from(copilotChats)
|
||||
.where(eq(copilotChats.id, chatId))
|
||||
.limit(1)
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotChatFinalizeAssistantTurn,
|
||||
{
|
||||
[TraceAttr.DbSystem]: 'postgresql',
|
||||
[TraceAttr.DbSqlTable]: 'copilot_chats',
|
||||
[TraceAttr.ChatId]: chatId,
|
||||
[TraceAttr.ChatUserMessageId]: userMessageId,
|
||||
[TraceAttr.ChatHasAssistantMessage]: !!assistantMessage,
|
||||
},
|
||||
async (span) => {
|
||||
const [row] = await db
|
||||
.select({ messages: copilotChats.messages })
|
||||
.from(copilotChats)
|
||||
.where(eq(copilotChats.id, chatId))
|
||||
.limit(1)
|
||||
|
||||
const messages: Record<string, unknown>[] = Array.isArray(row?.messages) ? row.messages : []
|
||||
const userIdx = messages.findIndex((message) => message.id === userMessageId)
|
||||
const alreadyHasResponse =
|
||||
userIdx >= 0 &&
|
||||
userIdx + 1 < messages.length &&
|
||||
(messages[userIdx + 1] as Record<string, unknown>)?.role === 'assistant'
|
||||
const canAppendAssistant = userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse
|
||||
const updateWhere = and(
|
||||
eq(copilotChats.id, chatId),
|
||||
eq(copilotChats.conversationId, userMessageId)
|
||||
const messages: Record<string, unknown>[] = Array.isArray(row?.messages) ? row.messages : []
|
||||
span.setAttribute(TraceAttr.ChatExistingMessageCount, messages.length)
|
||||
const userIdx = messages.findIndex((message) => message.id === userMessageId)
|
||||
const alreadyHasResponse =
|
||||
userIdx >= 0 &&
|
||||
userIdx + 1 < messages.length &&
|
||||
(messages[userIdx + 1] as Record<string, unknown>)?.role === 'assistant'
|
||||
const canAppendAssistant =
|
||||
userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse
|
||||
const updateWhere = and(
|
||||
eq(copilotChats.id, chatId),
|
||||
eq(copilotChats.conversationId, userMessageId)
|
||||
)
|
||||
|
||||
const baseUpdate = {
|
||||
conversationId: null,
|
||||
updatedAt: new Date(),
|
||||
}
|
||||
|
||||
if (assistantMessage && canAppendAssistant) {
|
||||
await db
|
||||
.update(copilotChats)
|
||||
.set({
|
||||
...baseUpdate,
|
||||
messages: sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`,
|
||||
})
|
||||
.where(updateWhere)
|
||||
span.setAttribute(
|
||||
TraceAttr.ChatFinalizeOutcome,
|
||||
CopilotChatFinalizeOutcome.AppendedAssistant
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
await db.update(copilotChats).set(baseUpdate).where(updateWhere)
|
||||
span.setAttribute(
|
||||
TraceAttr.ChatFinalizeOutcome,
|
||||
assistantMessage
|
||||
? alreadyHasResponse
|
||||
? 'assistant_already_persisted'
|
||||
: 'stale_user_message'
|
||||
: 'cleared_stream_marker_only'
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
const baseUpdate = {
|
||||
conversationId: null,
|
||||
updatedAt: new Date(),
|
||||
}
|
||||
|
||||
if (assistantMessage && canAppendAssistant) {
|
||||
await db
|
||||
.update(copilotChats)
|
||||
.set({
|
||||
...baseUpdate,
|
||||
messages: sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`,
|
||||
})
|
||||
.where(updateWhere)
|
||||
return
|
||||
}
|
||||
|
||||
await db.update(copilotChats).set(baseUpdate).where(updateWhere)
|
||||
}
|
||||
|
||||
@@ -34,9 +34,6 @@ export const STREAM_STORAGE_KEY = 'copilot_active_stream'
|
||||
/** POST — send a chat message through the unified mothership chat surface. */
|
||||
export const MOTHERSHIP_CHAT_API_PATH = '/api/mothership/chat'
|
||||
|
||||
/** Backwards-compatible alias while remaining callers migrate. */
|
||||
export const COPILOT_CHAT_API_PATH = MOTHERSHIP_CHAT_API_PATH
|
||||
|
||||
/** POST — confirm or reject a tool call. */
|
||||
export const COPILOT_CONFIRM_API_PATH = '/api/copilot/confirm'
|
||||
|
||||
|
||||
@@ -1316,6 +1316,11 @@ export const MOTHERSHIP_STREAM_V1_SCHEMA: JsonSchema = {
|
||||
MothershipStreamV1Trace: {
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
goTraceId: {
|
||||
description:
|
||||
'OTel trace ID from the first Go ingress. May differ from requestId when Sim assigns the canonical request identity.',
|
||||
type: 'string',
|
||||
},
|
||||
requestId: {
|
||||
type: 'string',
|
||||
},
|
||||
|
||||
@@ -66,6 +66,10 @@ export interface MothershipStreamV1StreamRef {
|
||||
streamId: string
|
||||
}
|
||||
export interface MothershipStreamV1Trace {
|
||||
/**
|
||||
* OTel trace ID from the first Go ingress. May differ from requestId when Sim assigns the canonical request identity.
|
||||
*/
|
||||
goTraceId?: string
|
||||
requestId: string
|
||||
spanId?: string
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ export interface RequestTraceV1SimReport {
|
||||
startMs: number
|
||||
streamId?: string
|
||||
usage?: RequestTraceV1UsageSummary
|
||||
userMessage?: string
|
||||
}
|
||||
/**
|
||||
* This interface was referenced by `RequestTraceV1SimReport`'s JSON-Schema
|
||||
@@ -112,6 +113,7 @@ export interface RequestTraceV1SimReport1 {
|
||||
startMs: number
|
||||
streamId?: string
|
||||
usage?: RequestTraceV1UsageSummary
|
||||
userMessage?: string
|
||||
}
|
||||
|
||||
export const RequestTraceV1Outcome = {
|
||||
|
||||
372
apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts
Normal file
372
apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts
Normal file
@@ -0,0 +1,372 @@
|
||||
// AUTO-GENERATED FILE. DO NOT EDIT.
|
||||
//
|
||||
// Source: copilot/copilot/contracts/trace-attribute-values-v1.schema.json
|
||||
// Regenerate with: bun run trace-attribute-values-contract:generate
|
||||
//
|
||||
// Canonical closed-set value vocabularies for mothership OTel
|
||||
// attributes. Call sites should reference e.g.
|
||||
// `CopilotRequestCancelReason.ExplicitStop` rather than the raw
|
||||
// string literal, so typos become compile errors and the Go contract
|
||||
// remains the single source of truth.
|
||||
|
||||
export const AbortBackend = {
|
||||
InProcess: 'in_process',
|
||||
Redis: 'redis',
|
||||
} as const
|
||||
|
||||
export type AbortBackendKey = keyof typeof AbortBackend
|
||||
export type AbortBackendValue = (typeof AbortBackend)[AbortBackendKey]
|
||||
|
||||
export const AbortRedisResult = {
|
||||
Error: 'error',
|
||||
Ok: 'ok',
|
||||
Slow: 'slow',
|
||||
} as const
|
||||
|
||||
export type AbortRedisResultKey = keyof typeof AbortRedisResult
|
||||
export type AbortRedisResultValue = (typeof AbortRedisResult)[AbortRedisResultKey]
|
||||
|
||||
export const AuthKeyMatch = {
|
||||
Enterprise: 'enterprise',
|
||||
None: 'none',
|
||||
User: 'user',
|
||||
} as const
|
||||
|
||||
export type AuthKeyMatchKey = keyof typeof AuthKeyMatch
|
||||
export type AuthKeyMatchValue = (typeof AuthKeyMatch)[AuthKeyMatchKey]
|
||||
|
||||
export const BillingAnalyticsOutcome = {
|
||||
Duplicate: 'duplicate',
|
||||
RetriesExhausted: 'retries_exhausted',
|
||||
Success: 'success',
|
||||
Unknown: 'unknown',
|
||||
} as const
|
||||
|
||||
export type BillingAnalyticsOutcomeKey = keyof typeof BillingAnalyticsOutcome
|
||||
export type BillingAnalyticsOutcomeValue =
|
||||
(typeof BillingAnalyticsOutcome)[BillingAnalyticsOutcomeKey]
|
||||
|
||||
export const BillingFlushOutcome = {
|
||||
CheckpointAlreadyClaimed: 'checkpoint_already_claimed',
|
||||
CheckpointLoadFailed: 'checkpoint_load_failed',
|
||||
Flushed: 'flushed',
|
||||
NoCheckpoint: 'no_checkpoint',
|
||||
NoSnapshot: 'no_snapshot',
|
||||
SkippedUnconfigured: 'skipped_unconfigured',
|
||||
} as const
|
||||
|
||||
export type BillingFlushOutcomeKey = keyof typeof BillingFlushOutcome
|
||||
export type BillingFlushOutcomeValue = (typeof BillingFlushOutcome)[BillingFlushOutcomeKey]
|
||||
|
||||
export const BillingRouteOutcome = {
|
||||
AuthFailed: 'auth_failed',
|
||||
Billed: 'billed',
|
||||
BillingDisabled: 'billing_disabled',
|
||||
DuplicateIdempotencyKey: 'duplicate_idempotency_key',
|
||||
InternalError: 'internal_error',
|
||||
InvalidBody: 'invalid_body',
|
||||
} as const
|
||||
|
||||
export type BillingRouteOutcomeKey = keyof typeof BillingRouteOutcome
|
||||
export type BillingRouteOutcomeValue = (typeof BillingRouteOutcome)[BillingRouteOutcomeKey]
|
||||
|
||||
export const CopilotAbortOutcome = {
|
||||
BadRequest: 'bad_request',
|
||||
FallbackPersistFailed: 'fallback_persist_failed',
|
||||
MissingMessageId: 'missing_message_id',
|
||||
MissingStreamId: 'missing_stream_id',
|
||||
NoChatId: 'no_chat_id',
|
||||
Ok: 'ok',
|
||||
SettleTimeout: 'settle_timeout',
|
||||
Settled: 'settled',
|
||||
Unauthorized: 'unauthorized',
|
||||
} as const
|
||||
|
||||
export type CopilotAbortOutcomeKey = keyof typeof CopilotAbortOutcome
|
||||
export type CopilotAbortOutcomeValue = (typeof CopilotAbortOutcome)[CopilotAbortOutcomeKey]
|
||||
|
||||
export const CopilotBranchKind = {
|
||||
Workflow: 'workflow',
|
||||
Workspace: 'workspace',
|
||||
} as const
|
||||
|
||||
export type CopilotBranchKindKey = keyof typeof CopilotBranchKind
|
||||
export type CopilotBranchKindValue = (typeof CopilotBranchKind)[CopilotBranchKindKey]
|
||||
|
||||
export const CopilotChatFinalizeOutcome = {
|
||||
AppendedAssistant: 'appended_assistant',
|
||||
AssistantAlreadyPersisted: 'assistant_already_persisted',
|
||||
ClearedStreamMarkerOnly: 'cleared_stream_marker_only',
|
||||
StaleUserMessage: 'stale_user_message',
|
||||
} as const
|
||||
|
||||
export type CopilotChatFinalizeOutcomeKey = keyof typeof CopilotChatFinalizeOutcome
|
||||
export type CopilotChatFinalizeOutcomeValue =
|
||||
(typeof CopilotChatFinalizeOutcome)[CopilotChatFinalizeOutcomeKey]
|
||||
|
||||
export const CopilotChatPersistOutcome = {
|
||||
Appended: 'appended',
|
||||
ChatNotFound: 'chat_not_found',
|
||||
} as const
|
||||
|
||||
export type CopilotChatPersistOutcomeKey = keyof typeof CopilotChatPersistOutcome
|
||||
export type CopilotChatPersistOutcomeValue =
|
||||
(typeof CopilotChatPersistOutcome)[CopilotChatPersistOutcomeKey]
|
||||
|
||||
export const CopilotConfirmOutcome = {
|
||||
Delivered: 'delivered',
|
||||
Forbidden: 'forbidden',
|
||||
InternalError: 'internal_error',
|
||||
RunNotFound: 'run_not_found',
|
||||
ToolCallNotFound: 'tool_call_not_found',
|
||||
Unauthorized: 'unauthorized',
|
||||
UpdateFailed: 'update_failed',
|
||||
ValidationError: 'validation_error',
|
||||
} as const
|
||||
|
||||
export type CopilotConfirmOutcomeKey = keyof typeof CopilotConfirmOutcome
|
||||
export type CopilotConfirmOutcomeValue = (typeof CopilotConfirmOutcome)[CopilotConfirmOutcomeKey]
|
||||
|
||||
export const CopilotFinalizeOutcome = {
|
||||
Aborted: 'aborted',
|
||||
Error: 'error',
|
||||
Success: 'success',
|
||||
} as const
|
||||
|
||||
export type CopilotFinalizeOutcomeKey = keyof typeof CopilotFinalizeOutcome
|
||||
export type CopilotFinalizeOutcomeValue = (typeof CopilotFinalizeOutcome)[CopilotFinalizeOutcomeKey]
|
||||
|
||||
export const CopilotLeg = {
|
||||
SimToGo: 'sim_to_go',
|
||||
} as const
|
||||
|
||||
export type CopilotLegKey = keyof typeof CopilotLeg
|
||||
export type CopilotLegValue = (typeof CopilotLeg)[CopilotLegKey]
|
||||
|
||||
export const CopilotOutputFileOutcome = {
|
||||
Failed: 'failed',
|
||||
Uploaded: 'uploaded',
|
||||
} as const
|
||||
|
||||
export type CopilotOutputFileOutcomeKey = keyof typeof CopilotOutputFileOutcome
|
||||
export type CopilotOutputFileOutcomeValue =
|
||||
(typeof CopilotOutputFileOutcome)[CopilotOutputFileOutcomeKey]
|
||||
|
||||
export const CopilotRecoveryOutcome = {
|
||||
GapDetected: 'gap_detected',
|
||||
InRange: 'in_range',
|
||||
} as const
|
||||
|
||||
export type CopilotRecoveryOutcomeKey = keyof typeof CopilotRecoveryOutcome
|
||||
export type CopilotRecoveryOutcomeValue = (typeof CopilotRecoveryOutcome)[CopilotRecoveryOutcomeKey]
|
||||
|
||||
export const CopilotRequestCancelReason = {
|
||||
ClientDisconnect: 'client_disconnect',
|
||||
ExplicitStop: 'explicit_stop',
|
||||
Timeout: 'timeout',
|
||||
Unknown: 'unknown',
|
||||
} as const
|
||||
|
||||
export type CopilotRequestCancelReasonKey = keyof typeof CopilotRequestCancelReason
|
||||
export type CopilotRequestCancelReasonValue =
|
||||
(typeof CopilotRequestCancelReason)[CopilotRequestCancelReasonKey]
|
||||
|
||||
export const CopilotResourcesOp = {
|
||||
Delete: 'delete',
|
||||
None: 'none',
|
||||
Upsert: 'upsert',
|
||||
} as const
|
||||
|
||||
export type CopilotResourcesOpKey = keyof typeof CopilotResourcesOp
|
||||
export type CopilotResourcesOpValue = (typeof CopilotResourcesOp)[CopilotResourcesOpKey]
|
||||
|
||||
export const CopilotResumeOutcome = {
|
||||
BatchDelivered: 'batch_delivered',
|
||||
ClientDisconnected: 'client_disconnected',
|
||||
EndedWithoutTerminal: 'ended_without_terminal',
|
||||
StreamNotFound: 'stream_not_found',
|
||||
TerminalDelivered: 'terminal_delivered',
|
||||
} as const
|
||||
|
||||
export type CopilotResumeOutcomeKey = keyof typeof CopilotResumeOutcome
|
||||
export type CopilotResumeOutcomeValue = (typeof CopilotResumeOutcome)[CopilotResumeOutcomeKey]
|
||||
|
||||
export const CopilotSseCloseReason = {
|
||||
Aborted: 'aborted',
|
||||
BackendError: 'backend_error',
|
||||
BillingLimit: 'billing_limit',
|
||||
ClosedNoTerminal: 'closed_no_terminal',
|
||||
Error: 'error',
|
||||
Terminal: 'terminal',
|
||||
Timeout: 'timeout',
|
||||
} as const
|
||||
|
||||
export type CopilotSseCloseReasonKey = keyof typeof CopilotSseCloseReason
|
||||
export type CopilotSseCloseReasonValue = (typeof CopilotSseCloseReason)[CopilotSseCloseReasonKey]
|
||||
|
||||
export const CopilotStopOutcome = {
|
||||
ChatNotFound: 'chat_not_found',
|
||||
InternalError: 'internal_error',
|
||||
NoMatchingRow: 'no_matching_row',
|
||||
Persisted: 'persisted',
|
||||
Unauthorized: 'unauthorized',
|
||||
ValidationError: 'validation_error',
|
||||
} as const
|
||||
|
||||
export type CopilotStopOutcomeKey = keyof typeof CopilotStopOutcome
|
||||
export type CopilotStopOutcomeValue = (typeof CopilotStopOutcome)[CopilotStopOutcomeKey]
|
||||
|
||||
export const CopilotSurface = {
|
||||
Copilot: 'copilot',
|
||||
Mothership: 'mothership',
|
||||
} as const
|
||||
|
||||
export type CopilotSurfaceKey = keyof typeof CopilotSurface
|
||||
export type CopilotSurfaceValue = (typeof CopilotSurface)[CopilotSurfaceKey]
|
||||
|
||||
export const CopilotTableOutcome = {
|
||||
EmptyContent: 'empty_content',
|
||||
EmptyRows: 'empty_rows',
|
||||
Failed: 'failed',
|
||||
Imported: 'imported',
|
||||
InvalidJsonShape: 'invalid_json_shape',
|
||||
InvalidShape: 'invalid_shape',
|
||||
RowLimitExceeded: 'row_limit_exceeded',
|
||||
TableNotFound: 'table_not_found',
|
||||
Wrote: 'wrote',
|
||||
} as const
|
||||
|
||||
export type CopilotTableOutcomeKey = keyof typeof CopilotTableOutcome
|
||||
export type CopilotTableOutcomeValue = (typeof CopilotTableOutcome)[CopilotTableOutcomeKey]
|
||||
|
||||
export const CopilotTableSourceFormat = {
|
||||
Csv: 'csv',
|
||||
Json: 'json',
|
||||
} as const
|
||||
|
||||
export type CopilotTableSourceFormatKey = keyof typeof CopilotTableSourceFormat
|
||||
export type CopilotTableSourceFormatValue =
|
||||
(typeof CopilotTableSourceFormat)[CopilotTableSourceFormatKey]
|
||||
|
||||
export const CopilotTransport = {
|
||||
Batch: 'batch',
|
||||
Headless: 'headless',
|
||||
Stream: 'stream',
|
||||
} as const
|
||||
|
||||
export type CopilotTransportKey = keyof typeof CopilotTransport
|
||||
export type CopilotTransportValue = (typeof CopilotTransport)[CopilotTransportKey]
|
||||
|
||||
export const CopilotValidateOutcome = {
|
||||
InternalAuthFailed: 'internal_auth_failed',
|
||||
InternalError: 'internal_error',
|
||||
InvalidBody: 'invalid_body',
|
||||
Ok: 'ok',
|
||||
UsageExceeded: 'usage_exceeded',
|
||||
UserNotFound: 'user_not_found',
|
||||
} as const
|
||||
|
||||
export type CopilotValidateOutcomeKey = keyof typeof CopilotValidateOutcome
|
||||
export type CopilotValidateOutcomeValue = (typeof CopilotValidateOutcome)[CopilotValidateOutcomeKey]
|
||||
|
||||
export const CopilotVfsOutcome = {
|
||||
PassthroughFitsBudget: 'passthrough_fits_budget',
|
||||
PassthroughNoMetadata: 'passthrough_no_metadata',
|
||||
PassthroughNoSharp: 'passthrough_no_sharp',
|
||||
RejectedNoMetadata: 'rejected_no_metadata',
|
||||
RejectedNoSharp: 'rejected_no_sharp',
|
||||
RejectedTooLargeAfterResize: 'rejected_too_large_after_resize',
|
||||
Resized: 'resized',
|
||||
} as const
|
||||
|
||||
export type CopilotVfsOutcomeKey = keyof typeof CopilotVfsOutcome
|
||||
export type CopilotVfsOutcomeValue = (typeof CopilotVfsOutcome)[CopilotVfsOutcomeKey]
|
||||
|
||||
export const CopilotVfsReadOutcome = {
|
||||
BinaryPlaceholder: 'binary_placeholder',
|
||||
DocumentParsed: 'document_parsed',
|
||||
DocumentTooLarge: 'document_too_large',
|
||||
ImagePrepared: 'image_prepared',
|
||||
ImageTooLarge: 'image_too_large',
|
||||
ParseFailed: 'parse_failed',
|
||||
ReadFailed: 'read_failed',
|
||||
TextRead: 'text_read',
|
||||
TextTooLarge: 'text_too_large',
|
||||
} as const
|
||||
|
||||
export type CopilotVfsReadOutcomeKey = keyof typeof CopilotVfsReadOutcome
|
||||
export type CopilotVfsReadOutcomeValue = (typeof CopilotVfsReadOutcome)[CopilotVfsReadOutcomeKey]
|
||||
|
||||
export const CopilotVfsReadPath = {
|
||||
Binary: 'binary',
|
||||
Image: 'image',
|
||||
ParseableDocument: 'parseable_document',
|
||||
Text: 'text',
|
||||
} as const
|
||||
|
||||
export type CopilotVfsReadPathKey = keyof typeof CopilotVfsReadPath
|
||||
export type CopilotVfsReadPathValue = (typeof CopilotVfsReadPath)[CopilotVfsReadPathKey]
|
||||
|
||||
export const LlmErrorStage = {
|
||||
BuildRequest: 'build_request',
|
||||
Decode: 'decode',
|
||||
HttpBuild: 'http_build',
|
||||
HttpStatus: 'http_status',
|
||||
Invoke: 'invoke',
|
||||
MarshalRequest: 'marshal_request',
|
||||
StreamClose: 'stream_close',
|
||||
} as const
|
||||
|
||||
export type LlmErrorStageKey = keyof typeof LlmErrorStage
|
||||
export type LlmErrorStageValue = (typeof LlmErrorStage)[LlmErrorStageKey]
|
||||
|
||||
export const RateLimitOutcome = {
|
||||
Allowed: 'allowed',
|
||||
IncrError: 'incr_error',
|
||||
Limited: 'limited',
|
||||
} as const
|
||||
|
||||
export type RateLimitOutcomeKey = keyof typeof RateLimitOutcome
|
||||
export type RateLimitOutcomeValue = (typeof RateLimitOutcome)[RateLimitOutcomeKey]
|
||||
|
||||
export const ToolAsyncWaiterResolution = {
|
||||
ContextCancelled: 'context_cancelled',
|
||||
Poll: 'poll',
|
||||
Pubsub: 'pubsub',
|
||||
StoredAfterClose: 'stored_after_close',
|
||||
StoredBeforeSubscribe: 'stored_before_subscribe',
|
||||
StoredPostSubscribe: 'stored_post_subscribe',
|
||||
SubscriptionClosed: 'subscription_closed',
|
||||
Unknown: 'unknown',
|
||||
} as const
|
||||
|
||||
export type ToolAsyncWaiterResolutionKey = keyof typeof ToolAsyncWaiterResolution
|
||||
export type ToolAsyncWaiterResolutionValue =
|
||||
(typeof ToolAsyncWaiterResolution)[ToolAsyncWaiterResolutionKey]
|
||||
|
||||
export const ToolErrorKind = {
|
||||
Dispatch: 'dispatch',
|
||||
NotFound: 'not_found',
|
||||
} as const
|
||||
|
||||
export type ToolErrorKindKey = keyof typeof ToolErrorKind
|
||||
export type ToolErrorKindValue = (typeof ToolErrorKind)[ToolErrorKindKey]
|
||||
|
||||
export const ToolExecutor = {
|
||||
Client: 'client',
|
||||
Go: 'go',
|
||||
Sim: 'sim',
|
||||
} as const
|
||||
|
||||
export type ToolExecutorKey = keyof typeof ToolExecutor
|
||||
export type ToolExecutorValue = (typeof ToolExecutor)[ToolExecutorKey]
|
||||
|
||||
export const ToolStoreStatus = {
|
||||
Cancelled: 'cancelled',
|
||||
Completed: 'completed',
|
||||
Failed: 'failed',
|
||||
Pending: 'pending',
|
||||
} as const
|
||||
|
||||
export type ToolStoreStatusKey = keyof typeof ToolStoreStatus
|
||||
export type ToolStoreStatusValue = (typeof ToolStoreStatus)[ToolStoreStatusKey]
|
||||
1066
apps/sim/lib/copilot/generated/trace-attributes-v1.ts
Normal file
1066
apps/sim/lib/copilot/generated/trace-attributes-v1.ts
Normal file
File diff suppressed because it is too large
Load Diff
50
apps/sim/lib/copilot/generated/trace-events-v1.ts
Normal file
50
apps/sim/lib/copilot/generated/trace-events-v1.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
// AUTO-GENERATED FILE. DO NOT EDIT.
|
||||
//
|
||||
// Source: copilot/copilot/contracts/trace-events-v1.schema.json
|
||||
// Regenerate with: bun run trace-events-contract:generate
|
||||
//
|
||||
// Canonical mothership OTel span event names. Call sites should
|
||||
// reference `TraceEvent.<Identifier>` (e.g.
|
||||
// `TraceEvent.RequestCancelled`) rather than raw string literals,
|
||||
// so the Go-side contract is the single source of truth and typos
|
||||
// become compile errors.
|
||||
|
||||
export const TraceEvent = {
|
||||
BedrockInvokeRetryWithoutImages: 'bedrock.invoke.retry_without_images',
|
||||
CopilotOutputFileError: 'copilot.output_file.error',
|
||||
CopilotSseFirstEvent: 'copilot.sse.first_event',
|
||||
CopilotSseIdleGapExceeded: 'copilot.sse.idle_gap_exceeded',
|
||||
CopilotSseTerminalEventReceived: 'copilot.sse.terminal_event_received',
|
||||
CopilotTableError: 'copilot.table.error',
|
||||
CopilotVfsParseFailed: 'copilot.vfs.parse_failed',
|
||||
CopilotVfsResizeAttempt: 'copilot.vfs.resize_attempt',
|
||||
CopilotVfsResizeAttemptFailed: 'copilot.vfs.resize_attempt_failed',
|
||||
LlmInvokeSent: 'llm.invoke.sent',
|
||||
LlmStreamFirstChunk: 'llm.stream.first_chunk',
|
||||
LlmStreamOpened: 'llm.stream.opened',
|
||||
PgNotifyFailed: 'pg_notify_failed',
|
||||
RedisSubscribed: 'redis.subscribed',
|
||||
RequestCancelled: 'request.cancelled',
|
||||
} as const
|
||||
|
||||
export type TraceEventKey = keyof typeof TraceEvent
|
||||
export type TraceEventValue = (typeof TraceEvent)[TraceEventKey]
|
||||
|
||||
/** Readonly sorted list of every canonical event name. */
|
||||
export const TraceEventValues: readonly TraceEventValue[] = [
|
||||
'bedrock.invoke.retry_without_images',
|
||||
'copilot.output_file.error',
|
||||
'copilot.sse.first_event',
|
||||
'copilot.sse.idle_gap_exceeded',
|
||||
'copilot.sse.terminal_event_received',
|
||||
'copilot.table.error',
|
||||
'copilot.vfs.parse_failed',
|
||||
'copilot.vfs.resize_attempt',
|
||||
'copilot.vfs.resize_attempt_failed',
|
||||
'llm.invoke.sent',
|
||||
'llm.stream.first_chunk',
|
||||
'llm.stream.opened',
|
||||
'pg_notify_failed',
|
||||
'redis.subscribed',
|
||||
'request.cancelled',
|
||||
] as const
|
||||
153
apps/sim/lib/copilot/generated/trace-spans-v1.ts
Normal file
153
apps/sim/lib/copilot/generated/trace-spans-v1.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
// AUTO-GENERATED FILE. DO NOT EDIT.
|
||||
//
|
||||
// Source: copilot/copilot/contracts/trace-spans-v1.schema.json
|
||||
// Regenerate with: bun run trace-spans-contract:generate
|
||||
//
|
||||
// Canonical mothership OTel span names. Call sites should reference
|
||||
// `TraceSpan.<Identifier>` (e.g. `TraceSpan.CopilotVfsReadFile`)
|
||||
// rather than raw string literals, so the Go-side contract is the
|
||||
// single source of truth and typos become compile errors.
|
||||
|
||||
export const TraceSpan = {
|
||||
AnthropicCountTokens: 'anthropic.count_tokens',
|
||||
AsyncToolStoreSet: 'async_tool_store.set',
|
||||
AuthRateLimitRecord: 'auth.rate_limit.record',
|
||||
AuthValidateKey: 'auth.validate_key',
|
||||
ChatContinueWithToolResults: 'chat.continue_with_tool_results',
|
||||
ChatExplicitAbortConsume: 'chat.explicit_abort.consume',
|
||||
ChatExplicitAbortFlushPausedBilling: 'chat.explicit_abort.flush_paused_billing',
|
||||
ChatExplicitAbortHandle: 'chat.explicit_abort.handle',
|
||||
ChatExplicitAbortMark: 'chat.explicit_abort.mark',
|
||||
ChatExplicitAbortPeek: 'chat.explicit_abort.peek',
|
||||
ChatGateAcquire: 'chat.gate.acquire',
|
||||
ChatPersistAfterDone: 'chat.persist_after_done',
|
||||
ChatSetup: 'chat.setup',
|
||||
ContextReduce: 'context.reduce',
|
||||
ContextSummarizeChunk: 'context.summarize_chunk',
|
||||
CopilotAnalyticsFlush: 'copilot.analytics.flush',
|
||||
CopilotAnalyticsSaveRequest: 'copilot.analytics.save_request',
|
||||
CopilotAnalyticsUpdateBilling: 'copilot.analytics.update_billing',
|
||||
CopilotAsyncRunsClaimCompleted: 'copilot.async_runs.claim_completed',
|
||||
CopilotAsyncRunsCreateRunCheckpoint: 'copilot.async_runs.create_run_checkpoint',
|
||||
CopilotAsyncRunsCreateRunSegment: 'copilot.async_runs.create_run_segment',
|
||||
CopilotAsyncRunsGetAsyncToolCall: 'copilot.async_runs.get_async_tool_call',
|
||||
CopilotAsyncRunsGetLatestForExecution: 'copilot.async_runs.get_latest_for_execution',
|
||||
CopilotAsyncRunsGetLatestForStream: 'copilot.async_runs.get_latest_for_stream',
|
||||
CopilotAsyncRunsGetMany: 'copilot.async_runs.get_many',
|
||||
CopilotAsyncRunsGetRunSegment: 'copilot.async_runs.get_run_segment',
|
||||
CopilotAsyncRunsListForRun: 'copilot.async_runs.list_for_run',
|
||||
CopilotAsyncRunsMarkAsyncToolStatus: 'copilot.async_runs.mark_async_tool_status',
|
||||
CopilotAsyncRunsReleaseClaim: 'copilot.async_runs.release_claim',
|
||||
CopilotAsyncRunsUpdateRunStatus: 'copilot.async_runs.update_run_status',
|
||||
CopilotAsyncRunsUpsertAsyncToolCall: 'copilot.async_runs.upsert_async_tool_call',
|
||||
CopilotAuthValidateApiKey: 'copilot.auth.validate_api_key',
|
||||
CopilotBillingUpdateCost: 'copilot.billing.update_cost',
|
||||
CopilotChatAbortActiveStream: 'copilot.chat.abort_active_stream',
|
||||
CopilotChatAbortStream: 'copilot.chat.abort_stream',
|
||||
CopilotChatAbortWaitSettle: 'copilot.chat.abort_wait_settle',
|
||||
CopilotChatAcquirePendingStreamLock: 'copilot.chat.acquire_pending_stream_lock',
|
||||
CopilotChatBuildExecutionContext: 'copilot.chat.build_execution_context',
|
||||
CopilotChatBuildPayload: 'copilot.chat.build_payload',
|
||||
CopilotChatBuildWorkspaceContext: 'copilot.chat.build_workspace_context',
|
||||
CopilotChatFinalizeAssistantTurn: 'copilot.chat.finalize_assistant_turn',
|
||||
CopilotChatPersistUserMessage: 'copilot.chat.persist_user_message',
|
||||
CopilotChatResolveAgentContexts: 'copilot.chat.resolve_agent_contexts',
|
||||
CopilotChatResolveBranch: 'copilot.chat.resolve_branch',
|
||||
CopilotChatResolveOrCreateChat: 'copilot.chat.resolve_or_create_chat',
|
||||
CopilotChatStopStream: 'copilot.chat.stop_stream',
|
||||
CopilotConfirmToolResult: 'copilot.confirm.tool_result',
|
||||
CopilotFinalizeStream: 'copilot.finalize_stream',
|
||||
CopilotRecoveryCheckReplayGap: 'copilot.recovery.check_replay_gap',
|
||||
CopilotResumeRequest: 'copilot.resume.request',
|
||||
CopilotSseReadLoop: 'copilot.sse.read_loop',
|
||||
CopilotSubagentExecute: 'copilot.subagent.execute',
|
||||
CopilotToolWaitForClientResult: 'copilot.tool.wait_for_client_result',
|
||||
CopilotToolsHandleResourceSideEffects: 'copilot.tools.handle_resource_side_effects',
|
||||
CopilotToolsWriteCsvToTable: 'copilot.tools.write_csv_to_table',
|
||||
CopilotToolsWriteOutputFile: 'copilot.tools.write_output_file',
|
||||
CopilotToolsWriteOutputTable: 'copilot.tools.write_output_table',
|
||||
CopilotVfsPrepareImage: 'copilot.vfs.prepare_image',
|
||||
CopilotVfsReadFile: 'copilot.vfs.read_file',
|
||||
GenAiAgentExecute: 'gen_ai.agent.execute',
|
||||
LlmStream: 'llm.stream',
|
||||
ProviderRouterCountTokens: 'provider.router.count_tokens',
|
||||
ProviderRouterRoute: 'provider.router.route',
|
||||
SimUpdateCost: 'sim.update_cost',
|
||||
SimValidateApiKey: 'sim.validate_api_key',
|
||||
ToolAsyncWaiterWait: 'tool.async_waiter.wait',
|
||||
ToolExecute: 'tool.execute',
|
||||
} as const
|
||||
|
||||
export type TraceSpanKey = keyof typeof TraceSpan
|
||||
export type TraceSpanValue = (typeof TraceSpan)[TraceSpanKey]
|
||||
|
||||
/** Readonly sorted list of every canonical span name. */
|
||||
export const TraceSpanValues: readonly TraceSpanValue[] = [
|
||||
'anthropic.count_tokens',
|
||||
'async_tool_store.set',
|
||||
'auth.rate_limit.record',
|
||||
'auth.validate_key',
|
||||
'chat.continue_with_tool_results',
|
||||
'chat.explicit_abort.consume',
|
||||
'chat.explicit_abort.flush_paused_billing',
|
||||
'chat.explicit_abort.handle',
|
||||
'chat.explicit_abort.mark',
|
||||
'chat.explicit_abort.peek',
|
||||
'chat.gate.acquire',
|
||||
'chat.persist_after_done',
|
||||
'chat.setup',
|
||||
'context.reduce',
|
||||
'context.summarize_chunk',
|
||||
'copilot.analytics.flush',
|
||||
'copilot.analytics.save_request',
|
||||
'copilot.analytics.update_billing',
|
||||
'copilot.async_runs.claim_completed',
|
||||
'copilot.async_runs.create_run_checkpoint',
|
||||
'copilot.async_runs.create_run_segment',
|
||||
'copilot.async_runs.get_async_tool_call',
|
||||
'copilot.async_runs.get_latest_for_execution',
|
||||
'copilot.async_runs.get_latest_for_stream',
|
||||
'copilot.async_runs.get_many',
|
||||
'copilot.async_runs.get_run_segment',
|
||||
'copilot.async_runs.list_for_run',
|
||||
'copilot.async_runs.mark_async_tool_status',
|
||||
'copilot.async_runs.release_claim',
|
||||
'copilot.async_runs.update_run_status',
|
||||
'copilot.async_runs.upsert_async_tool_call',
|
||||
'copilot.auth.validate_api_key',
|
||||
'copilot.billing.update_cost',
|
||||
'copilot.chat.abort_active_stream',
|
||||
'copilot.chat.abort_stream',
|
||||
'copilot.chat.abort_wait_settle',
|
||||
'copilot.chat.acquire_pending_stream_lock',
|
||||
'copilot.chat.build_execution_context',
|
||||
'copilot.chat.build_payload',
|
||||
'copilot.chat.build_workspace_context',
|
||||
'copilot.chat.finalize_assistant_turn',
|
||||
'copilot.chat.persist_user_message',
|
||||
'copilot.chat.resolve_agent_contexts',
|
||||
'copilot.chat.resolve_branch',
|
||||
'copilot.chat.resolve_or_create_chat',
|
||||
'copilot.chat.stop_stream',
|
||||
'copilot.confirm.tool_result',
|
||||
'copilot.finalize_stream',
|
||||
'copilot.recovery.check_replay_gap',
|
||||
'copilot.resume.request',
|
||||
'copilot.sse.read_loop',
|
||||
'copilot.subagent.execute',
|
||||
'copilot.tool.wait_for_client_result',
|
||||
'copilot.tools.handle_resource_side_effects',
|
||||
'copilot.tools.write_csv_to_table',
|
||||
'copilot.tools.write_output_file',
|
||||
'copilot.tools.write_output_table',
|
||||
'copilot.vfs.prepare_image',
|
||||
'copilot.vfs.read_file',
|
||||
'gen_ai.agent.execute',
|
||||
'llm.stream',
|
||||
'provider.router.count_tokens',
|
||||
'provider.router.route',
|
||||
'sim.update_cost',
|
||||
'sim.validate_api_key',
|
||||
'tool.async_waiter.wait',
|
||||
'tool.execute',
|
||||
] as const
|
||||
79
apps/sim/lib/copilot/request/go/fetch.test.ts
Normal file
79
apps/sim/lib/copilot/request/go/fetch.test.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import { trace } from '@opentelemetry/api'
|
||||
import {
|
||||
BasicTracerProvider,
|
||||
InMemorySpanExporter,
|
||||
SimpleSpanProcessor,
|
||||
} from '@opentelemetry/sdk-trace-base'
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
|
||||
describe('fetchGo', () => {
|
||||
const exporter = new InMemorySpanExporter()
|
||||
const provider = new BasicTracerProvider({
|
||||
spanProcessors: [new SimpleSpanProcessor(exporter)],
|
||||
})
|
||||
|
||||
beforeEach(() => {
|
||||
exporter.reset()
|
||||
trace.setGlobalTracerProvider(provider)
|
||||
vi.restoreAllMocks()
|
||||
})
|
||||
|
||||
it('emits a client span with http.* attrs and injects traceparent', async () => {
|
||||
const fetchMock = vi.fn().mockImplementation(async (_url: string, init: RequestInit) => {
|
||||
const headers = init.headers as Record<string, string>
|
||||
expect(headers.traceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/)
|
||||
return new Response('ok', {
|
||||
status: 200,
|
||||
headers: { 'content-length': '2' },
|
||||
})
|
||||
})
|
||||
vi.stubGlobal('fetch', fetchMock)
|
||||
|
||||
const res = await fetchGo('https://backend.example.com/api/copilot', {
|
||||
method: 'POST',
|
||||
body: 'payload',
|
||||
operation: 'stream',
|
||||
attributes: { 'copilot.leg': 'sim_to_go' },
|
||||
})
|
||||
expect(res.status).toBe(200)
|
||||
|
||||
const spans = exporter.getFinishedSpans()
|
||||
expect(spans).toHaveLength(1)
|
||||
const attrs = spans[0].attributes
|
||||
expect(spans[0].name).toBe('sim → go /api/copilot')
|
||||
expect(attrs['http.method']).toBe('POST')
|
||||
expect(attrs['http.url']).toBe('https://backend.example.com/api/copilot')
|
||||
expect(attrs['http.target']).toBe('/api/copilot')
|
||||
expect(attrs['http.status_code']).toBe(200)
|
||||
expect(attrs['copilot.operation']).toBe('stream')
|
||||
expect(attrs['copilot.leg']).toBe('sim_to_go')
|
||||
expect(typeof attrs['http.response.headers_ms']).toBe('number')
|
||||
})
|
||||
|
||||
it('marks span as error on non-2xx response', async () => {
|
||||
vi.stubGlobal('fetch', vi.fn().mockResolvedValue(new Response('nope', { status: 500 })))
|
||||
|
||||
const res = await fetchGo('https://backend.example.com/api/tools/resume', {
|
||||
method: 'POST',
|
||||
})
|
||||
expect(res.status).toBe(500)
|
||||
|
||||
const spans = exporter.getFinishedSpans()
|
||||
expect(spans).toHaveLength(1)
|
||||
expect(spans[0].status.code).toBe(2)
|
||||
})
|
||||
|
||||
it('records exceptions when fetch throws', async () => {
|
||||
vi.stubGlobal('fetch', vi.fn().mockRejectedValue(new Error('network boom')))
|
||||
|
||||
await expect(
|
||||
fetchGo('https://backend.example.com/api/traces', { method: 'POST' })
|
||||
).rejects.toThrow('network boom')
|
||||
|
||||
const spans = exporter.getFinishedSpans()
|
||||
expect(spans).toHaveLength(1)
|
||||
expect(spans[0].status.code).toBe(2)
|
||||
expect(spans[0].events.some((e) => e.name === 'exception')).toBe(true)
|
||||
})
|
||||
})
|
||||
112
apps/sim/lib/copilot/request/go/fetch.ts
Normal file
112
apps/sim/lib/copilot/request/go/fetch.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
import { type Context, context, SpanStatusCode, trace } from '@opentelemetry/api'
|
||||
import { CopilotLeg } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { traceHeaders } from '@/lib/copilot/request/go/propagation'
|
||||
import { isActionableErrorStatus, markSpanForError } from '@/lib/copilot/request/otel'
|
||||
|
||||
// Lazy tracer resolution: module-level `trace.getTracer()` can be evaluated
|
||||
// before `instrumentation-node.ts` installs the TracerProvider under
|
||||
// Next.js 16 + Turbopack dev, freezing a NoOp tracer and silently dropping
|
||||
// every outbound Sim → Go span. Resolving per-call avoids the race.
|
||||
const getTracer = () => trace.getTracer('sim-copilot-http', '1.0.0')
|
||||
|
||||
interface OutboundFetchOptions extends RequestInit {
|
||||
otelContext?: Context
|
||||
spanName?: string
|
||||
operation?: string
|
||||
attributes?: Record<string, string | number | boolean>
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an outbound Sim → Go fetch wrapped in an OTel child span so each
|
||||
* call shows up as a distinct segment in Jaeger, and propagates the W3C
|
||||
* traceparent so the Go-side span joins the same trace.
|
||||
*
|
||||
* The span captures generic attributes (method, status, duration, response
|
||||
* size, error code) so any future latency investigation — not just images or
|
||||
* Bedrock — has uniform metadata to work with.
|
||||
*/
|
||||
export async function fetchGo(url: string, options: OutboundFetchOptions = {}): Promise<Response> {
|
||||
const {
|
||||
otelContext,
|
||||
spanName,
|
||||
operation,
|
||||
attributes,
|
||||
headers: providedHeaders,
|
||||
...init
|
||||
} = options
|
||||
|
||||
const parsed = safeParseUrl(url)
|
||||
const pathname = parsed?.pathname ?? url
|
||||
const method = (init.method ?? 'GET').toUpperCase()
|
||||
const parentContext = otelContext ?? context.active()
|
||||
|
||||
const span = getTracer().startSpan(
|
||||
spanName ?? `sim → go ${pathname}`,
|
||||
{
|
||||
attributes: {
|
||||
[TraceAttr.HttpMethod]: method,
|
||||
[TraceAttr.HttpUrl]: url,
|
||||
[TraceAttr.HttpTarget]: pathname,
|
||||
[TraceAttr.NetPeerName]: parsed?.host ?? '',
|
||||
[TraceAttr.CopilotLeg]: CopilotLeg.SimToGo,
|
||||
...(operation ? { [TraceAttr.CopilotOperation]: operation } : {}),
|
||||
...(attributes ?? {}),
|
||||
},
|
||||
},
|
||||
parentContext
|
||||
)
|
||||
|
||||
const activeContext = trace.setSpan(parentContext, span)
|
||||
const propagatedHeaders = traceHeaders({}, activeContext)
|
||||
const mergedHeaders = {
|
||||
...(providedHeaders as Record<string, string> | undefined),
|
||||
...propagatedHeaders,
|
||||
}
|
||||
|
||||
const start = performance.now()
|
||||
try {
|
||||
const response = await context.with(activeContext, () =>
|
||||
fetch(url, {
|
||||
...init,
|
||||
method,
|
||||
headers: mergedHeaders,
|
||||
})
|
||||
)
|
||||
const elapsedMs = performance.now() - start
|
||||
const contentLength = Number(response.headers.get('content-length') ?? 0)
|
||||
span.setAttribute(TraceAttr.HttpStatusCode, response.status)
|
||||
span.setAttribute(TraceAttr.HttpResponseHeadersMs, Math.round(elapsedMs))
|
||||
if (contentLength > 0) {
|
||||
span.setAttribute(TraceAttr.HttpResponseContentLength, contentLength)
|
||||
}
|
||||
// Only mark ERROR for actionable status codes. 4xx that represent
|
||||
// normal auth/validation rejections (400/401/403/404/405/422/etc.)
|
||||
// stay UNSET so error dashboards don't drown in expected rejection
|
||||
// paths. See `isActionableErrorStatus` in Go's telemetry middleware
|
||||
// for the mirror rule (5xx + 402/409/429).
|
||||
if (isActionableErrorStatus(response.status)) {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: `HTTP ${response.status}`,
|
||||
})
|
||||
} else {
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
}
|
||||
return response
|
||||
} catch (error) {
|
||||
span.setAttribute(TraceAttr.HttpResponseHeadersMs, Math.round(performance.now() - start))
|
||||
markSpanForError(span, error)
|
||||
throw error
|
||||
} finally {
|
||||
span.end()
|
||||
}
|
||||
}
|
||||
|
||||
function safeParseUrl(url: string): URL | null {
|
||||
try {
|
||||
return new URL(url)
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
57
apps/sim/lib/copilot/request/go/propagation.ts
Normal file
57
apps/sim/lib/copilot/request/go/propagation.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { type Context, context } from '@opentelemetry/api'
|
||||
import { W3CTraceContextPropagator } from '@opentelemetry/core'
|
||||
|
||||
const propagator = new W3CTraceContextPropagator()
|
||||
const headerSetter = {
|
||||
set(carrier: Record<string, string>, key: string, value: string) {
|
||||
carrier[key] = value
|
||||
},
|
||||
}
|
||||
|
||||
const headerGetter = {
|
||||
keys(carrier: Headers): string[] {
|
||||
const out: string[] = []
|
||||
carrier.forEach((_, key) => {
|
||||
out.push(key)
|
||||
})
|
||||
return out
|
||||
},
|
||||
get(carrier: Headers, key: string): string | undefined {
|
||||
return carrier.get(key) ?? undefined
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* Injects W3C trace context (traceparent, tracestate) into outbound HTTP
|
||||
* headers so Go-side spans join the same OTel trace tree as the calling
|
||||
* Sim span.
|
||||
*
|
||||
* Usage: spread the result into your fetch headers:
|
||||
* fetch(url, { headers: { ...myHeaders, ...traceHeaders() } })
|
||||
*/
|
||||
export function traceHeaders(
|
||||
carrier?: Record<string, string>,
|
||||
otelContext?: Context
|
||||
): Record<string, string> {
|
||||
const headers: Record<string, string> = carrier ?? {}
|
||||
propagator.inject(otelContext ?? context.active(), headers, headerSetter)
|
||||
return headers
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts W3C trace context from incoming request headers (traceparent /
|
||||
* tracestate) and returns an OTel Context seeded with the upstream span.
|
||||
*
|
||||
* Use this at the top of inbound Sim route handlers that Go calls into
|
||||
* (e.g. /api/billing/update-cost, /api/copilot/api-keys/validate) so the
|
||||
* Sim-side span becomes a proper child of the Go-side client span in the
|
||||
* same trace — closing the round trip in Jaeger.
|
||||
*
|
||||
* When no traceparent is present (e.g. calls from a browser or a client
|
||||
* that hasn't been instrumented), this returns `context.active()`
|
||||
* unchanged, and any span started under it becomes a new root — the same
|
||||
* behavior as before this helper existed.
|
||||
*/
|
||||
export function contextFromRequestHeaders(headers: Headers): Context {
|
||||
return propagator.extract(context.active(), headers, headerGetter)
|
||||
}
|
||||
@@ -17,7 +17,7 @@ import {
|
||||
runStreamLoop,
|
||||
} from '@/lib/copilot/request/go/stream'
|
||||
import { createEvent } from '@/lib/copilot/request/session'
|
||||
import { TraceCollector } from '@/lib/copilot/request/trace'
|
||||
import { RequestTraceV1Outcome, TraceCollector } from '@/lib/copilot/request/trace'
|
||||
import type { ExecutionContext, StreamingContext } from '@/lib/copilot/request/types'
|
||||
|
||||
function createSseResponse(events: unknown[]): Response {
|
||||
@@ -281,4 +281,55 @@ describe('copilot go stream helpers', () => {
|
||||
context.errors.some((message) => message.includes('Failed to parse SSE event JSON'))
|
||||
).toBe(true)
|
||||
})
|
||||
|
||||
it('records a split canonical request id and go trace id from the stream envelope', async () => {
|
||||
vi.mocked(fetch).mockResolvedValueOnce(
|
||||
createSseResponse([
|
||||
{
|
||||
v: 1,
|
||||
type: MothershipStreamV1EventType.text,
|
||||
seq: 1,
|
||||
ts: '2026-01-01T00:00:00.000Z',
|
||||
stream: { streamId: 'stream-1', cursor: '1' },
|
||||
trace: {
|
||||
requestId: 'sim-request-1',
|
||||
goTraceId: 'go-trace-1',
|
||||
},
|
||||
payload: {
|
||||
channel: 'assistant',
|
||||
text: 'hello',
|
||||
},
|
||||
},
|
||||
createEvent({
|
||||
streamId: 'stream-1',
|
||||
cursor: '2',
|
||||
seq: 2,
|
||||
requestId: 'sim-request-1',
|
||||
type: MothershipStreamV1EventType.complete,
|
||||
payload: {
|
||||
status: MothershipStreamV1CompletionStatus.complete,
|
||||
},
|
||||
}),
|
||||
])
|
||||
)
|
||||
|
||||
const context = createStreamingContext()
|
||||
context.requestId = 'sim-request-1'
|
||||
const execContext: ExecutionContext = {
|
||||
userId: 'user-1',
|
||||
workflowId: 'workflow-1',
|
||||
}
|
||||
|
||||
await runStreamLoop('https://example.com/mothership/stream', {}, context, execContext, {
|
||||
timeout: 1000,
|
||||
})
|
||||
|
||||
expect(context.requestId).toBe('sim-request-1')
|
||||
expect(
|
||||
context.trace.build({
|
||||
outcome: RequestTraceV1Outcome.success,
|
||||
simRequestId: 'sim-request-1',
|
||||
}).goTraceId
|
||||
).toBe('go-trace-1')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,7 +1,15 @@
|
||||
import { type Context, SpanStatusCode } from '@opentelemetry/api'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { ORCHESTRATION_TIMEOUT_MS } from '@/lib/copilot/constants'
|
||||
import { MothershipStreamV1SpanLifecycleEvent } from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import {
|
||||
type MothershipStreamV1EventType,
|
||||
MothershipStreamV1SpanLifecycleEvent,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import { CopilotSseCloseReason } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
import {
|
||||
buildPreviewContentUpdate,
|
||||
createFilePreviewAdapterState,
|
||||
@@ -12,9 +20,11 @@ import {
|
||||
import { FatalSseEventError, processSSEStream } from '@/lib/copilot/request/go/parser'
|
||||
import {
|
||||
handleSubagentRouting,
|
||||
prePersistClientExecutableToolCall,
|
||||
sseHandlers,
|
||||
subAgentHandlers,
|
||||
} from '@/lib/copilot/request/handlers'
|
||||
import { getCopilotTracer } from '@/lib/copilot/request/otel'
|
||||
import {
|
||||
eventToStreamEvent,
|
||||
isSubagentSpanStreamEvent,
|
||||
@@ -92,6 +102,7 @@ export interface StreamLoopOptions extends OrchestratorOptions {
|
||||
* Called when the Go backend's trace ID (go_trace_id) is first received via SSE.
|
||||
*/
|
||||
onGoTraceId?: (goTraceId: string) => void
|
||||
otelContext?: Context
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -111,15 +122,31 @@ export async function runStreamLoop(
|
||||
const { timeout = ORCHESTRATION_TIMEOUT_MS, abortSignal } = options
|
||||
const filePreviewAdapterState = createFilePreviewAdapterState()
|
||||
|
||||
const fetchSpan = context.trace.startSpan(
|
||||
`HTTP Request → ${new URL(fetchUrl).pathname}`,
|
||||
'sim.http.fetch',
|
||||
{ url: fetchUrl }
|
||||
)
|
||||
const response = await fetch(fetchUrl, {
|
||||
const pathname = new URL(fetchUrl).pathname
|
||||
const requestBodyBytes = estimateBodyBytes(fetchOptions.body)
|
||||
const fetchSpan = context.trace.startSpan(`HTTP Request → ${pathname}`, 'sim.http.fetch', {
|
||||
url: fetchUrl,
|
||||
method: fetchOptions.method ?? 'GET',
|
||||
requestBodyBytes,
|
||||
})
|
||||
const fetchStart = performance.now()
|
||||
const response = await fetchGo(fetchUrl, {
|
||||
...fetchOptions,
|
||||
signal: abortSignal,
|
||||
otelContext: options.otelContext,
|
||||
spanName: `sim → go ${pathname}`,
|
||||
operation: 'stream',
|
||||
attributes: {
|
||||
[TraceAttr.CopilotStream]: true,
|
||||
...(requestBodyBytes ? { [TraceAttr.HttpRequestContentLength]: requestBodyBytes } : {}),
|
||||
},
|
||||
})
|
||||
const headersElapsedMs = Math.round(performance.now() - fetchStart)
|
||||
fetchSpan.attributes = {
|
||||
...(fetchSpan.attributes ?? {}),
|
||||
status: response.status,
|
||||
headersMs: headersElapsedMs,
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
context.trace.endSpan(fetchSpan, 'error')
|
||||
@@ -141,142 +168,237 @@ export async function runStreamLoop(
|
||||
}
|
||||
|
||||
context.trace.endSpan(fetchSpan)
|
||||
const reader = response.body.getReader()
|
||||
|
||||
const bodySpan = context.trace.startSpan(`SSE Body → ${pathname}`, 'sim.http.stream_body', {
|
||||
url: fetchUrl,
|
||||
method: fetchOptions.method ?? 'GET',
|
||||
})
|
||||
|
||||
// Aggregate counters populated inline by the reader wrapper + onEvent
|
||||
// dispatcher below and flushed to both the legacy TraceCollector span
|
||||
// and the OTel read-loop span when the loop terminates. Kept as plain
|
||||
// JS variables (not span attrs) so incrementing them is free — we
|
||||
// only pay OTel cost once at span End().
|
||||
//
|
||||
// Idle-gap tracking is split two ways so we can tell apart
|
||||
// upstream-silent from we-were-busy:
|
||||
//
|
||||
// - `longestInboundGapMs`: biggest time between consecutive
|
||||
// `reader.read()` calls returning bytes. Upper bound on
|
||||
// "Go silent". Actually also includes Node waiting for main
|
||||
// thread free, so see dispatchMs below.
|
||||
// - `longestDispatchMs`: biggest time any single event handler
|
||||
// took between "event received" and "returned control". Upper
|
||||
// bound on "Sim was CPU-bound on a handler". If this is high
|
||||
// AND inbound gap is high at the same time, it's Sim. If only
|
||||
// inbound gap is high, it's upstream.
|
||||
// - `totalDispatchMs`: sum of all handler times. Helps gauge
|
||||
// whether handlers in aggregate ate a meaningful fraction of
|
||||
// the read loop.
|
||||
const counters = {
|
||||
bytes: 0,
|
||||
chunks: 0,
|
||||
events: 0,
|
||||
eventsByType: {
|
||||
session: 0,
|
||||
text: 0,
|
||||
tool: 0,
|
||||
span: 0,
|
||||
resource: 0,
|
||||
run: 0,
|
||||
error: 0,
|
||||
complete: 0,
|
||||
} as Record<MothershipStreamV1EventType, number>,
|
||||
firstEventMs: undefined as number | undefined,
|
||||
lastChunkMs: performance.now(),
|
||||
longestInboundGapMs: 0,
|
||||
longestDispatchMs: 0,
|
||||
totalDispatchMs: 0,
|
||||
}
|
||||
const bodyStart = performance.now()
|
||||
let endedOn: string = CopilotSseCloseReason.Terminal
|
||||
|
||||
// Wrap the body's reader so we can track per-chunk bytes and the gap
|
||||
// between chunks. `processSSEStream` consumes this reader exactly as
|
||||
// it would the raw one — no API changes there.
|
||||
const IDLE_GAP_EVENT_THRESHOLD_MS = 10000
|
||||
const rawReader = response.body.getReader()
|
||||
const reader: ReadableStreamDefaultReader<Uint8Array> = {
|
||||
async read() {
|
||||
const result = await rawReader.read()
|
||||
if (!result.done && result.value) {
|
||||
const now = performance.now()
|
||||
const gap = now - counters.lastChunkMs
|
||||
if (gap > counters.longestInboundGapMs) counters.longestInboundGapMs = gap
|
||||
counters.lastChunkMs = now
|
||||
counters.chunks += 1
|
||||
counters.bytes += result.value.byteLength
|
||||
}
|
||||
return result
|
||||
},
|
||||
cancel: (reason) => rawReader.cancel(reason),
|
||||
releaseLock: () => rawReader.releaseLock(),
|
||||
get closed() {
|
||||
return rawReader.closed
|
||||
},
|
||||
}
|
||||
const decoder = new TextDecoder()
|
||||
|
||||
const timeoutId = setTimeout(() => {
|
||||
context.errors.push('Request timed out')
|
||||
context.streamComplete = true
|
||||
endedOn = CopilotSseCloseReason.Timeout
|
||||
reader.cancel().catch(() => {})
|
||||
}, timeout)
|
||||
|
||||
try {
|
||||
await processSSEStream(reader, decoder, abortSignal, async (raw) => {
|
||||
if (abortSignal?.aborted) {
|
||||
context.wasAborted = true
|
||||
return true
|
||||
}
|
||||
|
||||
const parsedEvent = parsePersistedStreamEventEnvelope(raw)
|
||||
if (!parsedEvent.ok) {
|
||||
const detail = [parsedEvent.message, ...(parsedEvent.errors ?? [])]
|
||||
.filter(Boolean)
|
||||
.join('; ')
|
||||
const failureMessage = `Received invalid stream event on shared path: ${detail}`
|
||||
context.errors.push(failureMessage)
|
||||
logger.error('Received invalid stream event on shared path', {
|
||||
reason: parsedEvent.reason,
|
||||
message: parsedEvent.message,
|
||||
errors: parsedEvent.errors,
|
||||
})
|
||||
throw new FatalSseEventError(failureMessage)
|
||||
}
|
||||
|
||||
const envelope = parsedEvent.event
|
||||
const streamEvent = eventToStreamEvent(envelope)
|
||||
if (envelope.trace?.requestId) {
|
||||
const prev = context.requestId
|
||||
context.requestId = envelope.trace.requestId
|
||||
context.trace.setGoTraceId(envelope.trace.requestId)
|
||||
if (envelope.trace.requestId !== prev) {
|
||||
options.onGoTraceId?.(envelope.trace.requestId)
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldSkipToolCallEvent(streamEvent) || shouldSkipToolResultEvent(streamEvent)) {
|
||||
return
|
||||
}
|
||||
|
||||
await processFilePreviewStreamEvent({
|
||||
streamId: envelope.stream.streamId,
|
||||
streamEvent,
|
||||
context,
|
||||
execContext,
|
||||
options,
|
||||
state: filePreviewAdapterState,
|
||||
})
|
||||
|
||||
// Track how long THIS handler invocation takes so we can tell
|
||||
// apart "Go was silent" from "we were CPU-bound on a handler".
|
||||
// `longestInboundGapMs` includes handler time (the next reader.read
|
||||
// doesn't run until the previous handler returns), so dispatch
|
||||
// time is the correction needed to isolate upstream silence.
|
||||
const dispatchStart = performance.now()
|
||||
try {
|
||||
await options.onEvent?.(streamEvent)
|
||||
} catch (error) {
|
||||
logger.warn('Failed to forward stream event', {
|
||||
type: streamEvent.type,
|
||||
error: toError(error).message,
|
||||
})
|
||||
}
|
||||
if (counters.events === 0) {
|
||||
counters.firstEventMs = Math.round(performance.now() - bodyStart)
|
||||
}
|
||||
counters.events += 1
|
||||
if (abortSignal?.aborted) {
|
||||
context.wasAborted = true
|
||||
return true
|
||||
}
|
||||
|
||||
// Yield a macrotask so Node.js flushes the HTTP response buffer to
|
||||
// the browser. Microtask yields (await Promise.resolve()) are not
|
||||
// enough — the I/O layer needs a full event loop tick to write.
|
||||
await new Promise<void>((resolve) => setImmediate(resolve))
|
||||
const parsedEvent = parsePersistedStreamEventEnvelope(raw)
|
||||
if (!parsedEvent.ok) {
|
||||
const detail = [parsedEvent.message, ...(parsedEvent.errors ?? [])]
|
||||
.filter(Boolean)
|
||||
.join('; ')
|
||||
const failureMessage = `Received invalid stream event on shared path: ${detail}`
|
||||
context.errors.push(failureMessage)
|
||||
logger.error('Received invalid stream event on shared path', {
|
||||
reason: parsedEvent.reason,
|
||||
message: parsedEvent.message,
|
||||
errors: parsedEvent.errors,
|
||||
})
|
||||
throw new FatalSseEventError(failureMessage)
|
||||
}
|
||||
|
||||
if (options.onBeforeDispatch?.(streamEvent, context)) {
|
||||
return context.streamComplete || undefined
|
||||
}
|
||||
const envelope = parsedEvent.event
|
||||
const streamEvent = eventToStreamEvent(envelope)
|
||||
if (envelope.trace?.requestId) {
|
||||
const goTraceId = envelope.trace.goTraceId || envelope.trace.requestId
|
||||
context.trace.setGoTraceId(goTraceId)
|
||||
options.onGoTraceId?.(goTraceId)
|
||||
}
|
||||
|
||||
if (isSubagentSpanStreamEvent(streamEvent)) {
|
||||
const spanData = parseSubagentSpanData(streamEvent.payload.data)
|
||||
const toolCallId = streamEvent.scope?.parentToolCallId || spanData?.toolCallId
|
||||
const subagentName = streamEvent.payload.agent
|
||||
const spanEvt = streamEvent.payload.event
|
||||
const isPendingPause = spanData?.pending === true
|
||||
if (spanEvt === MothershipStreamV1SpanLifecycleEvent.start) {
|
||||
const lastParent = context.subAgentParentStack[context.subAgentParentStack.length - 1]
|
||||
const lastBlock = context.contentBlocks[context.contentBlocks.length - 1]
|
||||
if (toolCallId) {
|
||||
if (lastParent !== toolCallId) {
|
||||
context.subAgentParentStack.push(toolCallId)
|
||||
}
|
||||
context.subAgentParentToolCallId = toolCallId
|
||||
context.subAgentContent[toolCallId] ??= ''
|
||||
context.subAgentToolCalls[toolCallId] ??= []
|
||||
}
|
||||
if (
|
||||
subagentName &&
|
||||
!(
|
||||
lastParent === toolCallId &&
|
||||
lastBlock?.type === 'subagent' &&
|
||||
lastBlock.content === subagentName
|
||||
)
|
||||
) {
|
||||
context.contentBlocks.push({
|
||||
type: 'subagent',
|
||||
content: subagentName,
|
||||
timestamp: Date.now(),
|
||||
})
|
||||
}
|
||||
// Per-type counters for the copilot.sse.read_loop span. Bound set
|
||||
// (8 types) so this can never blow up into high cardinality.
|
||||
if (streamEvent.type in counters.eventsByType) {
|
||||
counters.eventsByType[streamEvent.type as MothershipStreamV1EventType] += 1
|
||||
}
|
||||
|
||||
if (shouldSkipToolCallEvent(streamEvent) || shouldSkipToolResultEvent(streamEvent)) {
|
||||
return
|
||||
}
|
||||
if (spanEvt === MothershipStreamV1SpanLifecycleEvent.end) {
|
||||
if (isPendingPause) {
|
||||
|
||||
await processFilePreviewStreamEvent({
|
||||
streamId: envelope.stream.streamId,
|
||||
streamEvent,
|
||||
context,
|
||||
execContext,
|
||||
options,
|
||||
state: filePreviewAdapterState,
|
||||
})
|
||||
|
||||
await prePersistClientExecutableToolCall(streamEvent, context)
|
||||
|
||||
try {
|
||||
await options.onEvent?.(streamEvent)
|
||||
} catch (error) {
|
||||
logger.warn('Failed to forward stream event', {
|
||||
type: streamEvent.type,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
}
|
||||
|
||||
// Yield a macrotask so Node.js flushes the HTTP response buffer to
|
||||
// the browser. Microtask yields (await Promise.resolve()) are not
|
||||
// enough — the I/O layer needs a full event loop tick to write.
|
||||
await new Promise<void>((resolve) => setImmediate(resolve))
|
||||
|
||||
if (options.onBeforeDispatch?.(streamEvent, context)) {
|
||||
return context.streamComplete || undefined
|
||||
}
|
||||
|
||||
if (isSubagentSpanStreamEvent(streamEvent)) {
|
||||
const spanData = parseSubagentSpanData(streamEvent.payload.data)
|
||||
const toolCallId = streamEvent.scope?.parentToolCallId || spanData?.toolCallId
|
||||
const subagentName = streamEvent.payload.agent
|
||||
const spanEvt = streamEvent.payload.event
|
||||
const isPendingPause = spanData?.pending === true
|
||||
if (spanEvt === MothershipStreamV1SpanLifecycleEvent.start) {
|
||||
const lastParent = context.subAgentParentStack[context.subAgentParentStack.length - 1]
|
||||
const lastBlock = context.contentBlocks[context.contentBlocks.length - 1]
|
||||
if (toolCallId) {
|
||||
if (lastParent !== toolCallId) {
|
||||
context.subAgentParentStack.push(toolCallId)
|
||||
}
|
||||
context.subAgentParentToolCallId = toolCallId
|
||||
context.subAgentContent[toolCallId] ??= ''
|
||||
context.subAgentToolCalls[toolCallId] ??= []
|
||||
}
|
||||
if (
|
||||
subagentName &&
|
||||
!(
|
||||
lastParent === toolCallId &&
|
||||
lastBlock?.type === 'subagent' &&
|
||||
lastBlock.content === subagentName
|
||||
)
|
||||
) {
|
||||
context.contentBlocks.push({
|
||||
type: 'subagent',
|
||||
content: subagentName,
|
||||
timestamp: Date.now(),
|
||||
})
|
||||
}
|
||||
return
|
||||
}
|
||||
if (context.subAgentParentStack.length > 0) {
|
||||
context.subAgentParentStack.pop()
|
||||
} else {
|
||||
logger.warn('subagent end without matching start')
|
||||
if (spanEvt === MothershipStreamV1SpanLifecycleEvent.end) {
|
||||
if (isPendingPause) {
|
||||
return
|
||||
}
|
||||
if (context.subAgentParentStack.length > 0) {
|
||||
context.subAgentParentStack.pop()
|
||||
} else {
|
||||
logger.warn('subagent end without matching start')
|
||||
}
|
||||
context.subAgentParentToolCallId =
|
||||
context.subAgentParentStack.length > 0
|
||||
? context.subAgentParentStack[context.subAgentParentStack.length - 1]
|
||||
: undefined
|
||||
return
|
||||
}
|
||||
context.subAgentParentToolCallId =
|
||||
context.subAgentParentStack.length > 0
|
||||
? context.subAgentParentStack[context.subAgentParentStack.length - 1]
|
||||
: undefined
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if (handleSubagentRouting(streamEvent, context)) {
|
||||
const handler = subAgentHandlers[streamEvent.type]
|
||||
if (handleSubagentRouting(streamEvent, context)) {
|
||||
const handler = subAgentHandlers[streamEvent.type]
|
||||
if (handler) {
|
||||
await handler(streamEvent, context, execContext, options)
|
||||
}
|
||||
return context.streamComplete || undefined
|
||||
}
|
||||
|
||||
const handler = sseHandlers[streamEvent.type]
|
||||
if (handler) {
|
||||
await handler(streamEvent, context, execContext, options)
|
||||
}
|
||||
return context.streamComplete || undefined
|
||||
} finally {
|
||||
const dispatchMs = performance.now() - dispatchStart
|
||||
counters.totalDispatchMs += dispatchMs
|
||||
if (dispatchMs > counters.longestDispatchMs) counters.longestDispatchMs = dispatchMs
|
||||
}
|
||||
|
||||
const handler = sseHandlers[streamEvent.type]
|
||||
if (handler) {
|
||||
await handler(streamEvent, context, execContext, options)
|
||||
}
|
||||
return context.streamComplete || undefined
|
||||
})
|
||||
|
||||
if (!context.streamComplete && !abortSignal?.aborted && !context.wasAborted) {
|
||||
@@ -288,18 +410,209 @@ export async function runStreamLoop(
|
||||
requestId: context.requestId,
|
||||
messageId: context.messageId,
|
||||
})
|
||||
endedOn = CopilotSseCloseReason.ClosedNoTerminal
|
||||
throw new CopilotBackendError(message, { status: 503 })
|
||||
}
|
||||
} catch (error) {
|
||||
if (error instanceof FatalSseEventError && !context.errors.includes(error.message)) {
|
||||
context.errors.push(error.message)
|
||||
}
|
||||
if (endedOn === CopilotSseCloseReason.Terminal) {
|
||||
endedOn =
|
||||
error instanceof CopilotBackendError
|
||||
? CopilotSseCloseReason.BackendError
|
||||
: error instanceof BillingLimitError
|
||||
? CopilotSseCloseReason.BillingLimit
|
||||
: CopilotSseCloseReason.Error
|
||||
}
|
||||
throw error
|
||||
} finally {
|
||||
if (abortSignal?.aborted) {
|
||||
context.wasAborted = true
|
||||
await reader.cancel().catch(() => {})
|
||||
if (endedOn === CopilotSseCloseReason.Terminal) {
|
||||
endedOn = CopilotSseCloseReason.Aborted
|
||||
}
|
||||
}
|
||||
clearTimeout(timeoutId)
|
||||
|
||||
// Legacy TraceCollector span (consumed by the in-memory trace
|
||||
// collector, kept for backwards compatibility with existing
|
||||
// tooling). The real OTel span is stamped below.
|
||||
const bodyDurationMs = Math.round(performance.now() - bodyStart)
|
||||
bodySpan.attributes = {
|
||||
...(bodySpan.attributes ?? {}),
|
||||
eventsReceived: counters.events,
|
||||
firstEventMs: counters.firstEventMs,
|
||||
endedOn,
|
||||
durationMs: bodyDurationMs,
|
||||
}
|
||||
context.trace.endSpan(
|
||||
bodySpan,
|
||||
endedOn === CopilotSseCloseReason.Terminal
|
||||
? 'ok'
|
||||
: endedOn === CopilotSseCloseReason.Aborted
|
||||
? 'cancelled'
|
||||
: 'error'
|
||||
)
|
||||
|
||||
// Real OTel span for Tempo/Grafana. Stamped aggregate-only so
|
||||
// there is no per-chunk OTel cost — one span per read loop with
|
||||
// integer counters, plus a bounded set of events.
|
||||
//
|
||||
// `expectedTerminal` = "the caller considered this leg the FINAL
|
||||
// leg and genuinely expected a terminal event on the wire." We
|
||||
// derive it from `context.streamComplete` MINUS the tool-pause
|
||||
// case: when the server emits a `run.checkpoint_pause`, its
|
||||
// handler also sets `streamComplete=true` to stop the read loop
|
||||
// cleanly, but no `complete` SSE event is ever sent in that
|
||||
// case — that's the tool-pause protocol, not a missing terminal.
|
||||
// `awaitingAsyncContinuation` is set by the same handler, so
|
||||
// its presence distinguishes "tool pause, no terminal expected"
|
||||
// from "caller thought stream was done but server never said so"
|
||||
// (= the real disappeared-response bug class).
|
||||
const expectedTerminal = context.streamComplete && !context.awaitingAsyncContinuation
|
||||
stampSseReadLoopSpan(bodyStart, counters, endedOn, fetchUrl, pathname, {
|
||||
idleGapEventThresholdMs: IDLE_GAP_EVENT_THRESHOLD_MS,
|
||||
expectedTerminal,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
function estimateBodyBytes(body: BodyInit | null | undefined): number {
|
||||
if (!body) {
|
||||
return 0
|
||||
}
|
||||
if (typeof body === 'string') {
|
||||
return body.length
|
||||
}
|
||||
if (body instanceof ArrayBuffer) {
|
||||
return body.byteLength
|
||||
}
|
||||
if (ArrayBuffer.isView(body)) {
|
||||
return body.byteLength
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type SseReadLoopCounters = {
|
||||
bytes: number
|
||||
chunks: number
|
||||
events: number
|
||||
eventsByType: Record<MothershipStreamV1EventType, number>
|
||||
firstEventMs: number | undefined
|
||||
longestInboundGapMs: number
|
||||
longestDispatchMs: number
|
||||
totalDispatchMs: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Ship a one-shot `copilot.sse.read_loop` OTel span with the aggregate
|
||||
* counters collected during the read loop. Uses `startTime` so the
|
||||
* span's duration reflects the actual loop wall clock even though we
|
||||
* only talk to OTel once at the end.
|
||||
*
|
||||
* Deliberately synchronous, no per-chunk span calls: total OTel cost
|
||||
* per read loop is fixed (~10 attrs + up to 3 events), independent of
|
||||
* chunk count.
|
||||
*/
|
||||
function stampSseReadLoopSpan(
|
||||
startPerfMs: number,
|
||||
counters: SseReadLoopCounters,
|
||||
closeReason: string,
|
||||
fetchUrl: string,
|
||||
pathname: string,
|
||||
opts: { idleGapEventThresholdMs: number; expectedTerminal: boolean }
|
||||
): void {
|
||||
// Translate performance.now() values into wall-clock Date values so
|
||||
// the span's timestamps land in real time (OTel accepts both, but we
|
||||
// need to pair startTime with a matching "now" for .end()).
|
||||
const nowPerf = performance.now()
|
||||
const nowWall = Date.now()
|
||||
const startWall = nowWall - (nowPerf - startPerfMs)
|
||||
|
||||
const terminalEventSeen = counters.eventsByType.complete > 0
|
||||
// `terminal_event_missing` is the single-attribute dashboard signal
|
||||
// for the "disappeared response" bug class: the caller considered
|
||||
// this leg to be the final one (`context.streamComplete === true`)
|
||||
// but no `complete` event arrived on the wire. Tool-pause legs have
|
||||
// expectedTerminal=false and never trip this, so dashboards can
|
||||
// filter on `{ .copilot.sse.terminal_event_missing = true }` without
|
||||
// false positives.
|
||||
const terminalEventMissing = opts.expectedTerminal && !terminalEventSeen
|
||||
|
||||
const tracer = getCopilotTracer()
|
||||
const span = tracer.startSpan(TraceSpan.CopilotSseReadLoop, {
|
||||
startTime: startWall,
|
||||
attributes: {
|
||||
[TraceAttr.HttpUrl]: fetchUrl,
|
||||
[TraceAttr.HttpPath]: pathname,
|
||||
[TraceAttr.CopilotSseBytesReceived]: counters.bytes,
|
||||
[TraceAttr.CopilotSseChunksReceived]: counters.chunks,
|
||||
[TraceAttr.CopilotSseEventsReceived]: counters.events,
|
||||
[TraceAttr.CopilotSseEventsSession]: counters.eventsByType.session,
|
||||
[TraceAttr.CopilotSseEventsText]: counters.eventsByType.text,
|
||||
[TraceAttr.CopilotSseEventsTool]: counters.eventsByType.tool,
|
||||
[TraceAttr.CopilotSseEventsSpan]: counters.eventsByType.span,
|
||||
[TraceAttr.CopilotSseEventsResource]: counters.eventsByType.resource,
|
||||
[TraceAttr.CopilotSseEventsRun]: counters.eventsByType.run,
|
||||
[TraceAttr.CopilotSseEventsError]: counters.eventsByType.error,
|
||||
[TraceAttr.CopilotSseEventsComplete]: counters.eventsByType.complete,
|
||||
[TraceAttr.CopilotSseLongestInboundGapMs]: Math.round(counters.longestInboundGapMs),
|
||||
[TraceAttr.CopilotSseLongestDispatchMs]: Math.round(counters.longestDispatchMs),
|
||||
[TraceAttr.CopilotSseTotalDispatchMs]: Math.round(counters.totalDispatchMs),
|
||||
[TraceAttr.CopilotSseCloseReason]: closeReason,
|
||||
[TraceAttr.CopilotSseExpectedTerminal]: opts.expectedTerminal,
|
||||
[TraceAttr.CopilotSseTerminalEventSeen]: terminalEventSeen,
|
||||
[TraceAttr.CopilotSseTerminalEventMissing]: terminalEventMissing,
|
||||
},
|
||||
})
|
||||
|
||||
if (counters.firstEventMs !== undefined) {
|
||||
span.setAttribute(TraceAttr.CopilotSseFirstEventMs, counters.firstEventMs)
|
||||
// Anchor the event to the moment the first SSE event was actually
|
||||
// received (startWall + firstEventMs), not `now`, so a trace
|
||||
// waterfall shows the diamond at the TTFT point — not at span end.
|
||||
span.addEvent(
|
||||
TraceEvent.CopilotSseFirstEvent,
|
||||
{ [TraceAttr.CopilotSseFirstEventMs]: counters.firstEventMs },
|
||||
startWall + counters.firstEventMs
|
||||
)
|
||||
}
|
||||
// Fire the idle-gap event when the INBOUND gap (time between TCP
|
||||
// reads returning bytes) exceeds the threshold. This is the
|
||||
// "upstream was silent or Sim was CPU-bound" signal; dispatch time
|
||||
// on its own doesn't warrant an event because it's within our
|
||||
// control and visible on a dedicated attribute.
|
||||
if (counters.longestInboundGapMs >= opts.idleGapEventThresholdMs) {
|
||||
span.addEvent(TraceEvent.CopilotSseIdleGapExceeded, {
|
||||
[TraceAttr.CopilotSseLongestInboundGapMs]: Math.round(counters.longestInboundGapMs),
|
||||
[TraceAttr.CopilotSseLongestDispatchMs]: Math.round(counters.longestDispatchMs),
|
||||
})
|
||||
}
|
||||
if (terminalEventSeen) {
|
||||
span.addEvent(TraceEvent.CopilotSseTerminalEventReceived)
|
||||
}
|
||||
|
||||
// Span status: only mark ERROR for real failures. User aborts and
|
||||
// clean terminals stay UNSET so dashboards filtering `status=error`
|
||||
// don't light up for normal cancellations. Tool-pause legs (caller
|
||||
// didn't set streamComplete) are NOT errors even though they have
|
||||
// no complete event.
|
||||
if (terminalEventMissing) {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: 'SSE read loop finished without terminal event (caller expected one)',
|
||||
})
|
||||
} else if (
|
||||
closeReason !== CopilotSseCloseReason.Terminal &&
|
||||
closeReason !== CopilotSseCloseReason.Aborted
|
||||
) {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: `SSE read loop ended with reason: ${closeReason}`,
|
||||
})
|
||||
}
|
||||
|
||||
span.end(nowWall)
|
||||
}
|
||||
|
||||
@@ -8,9 +8,10 @@ import { handleRunEvent } from './run'
|
||||
import { handleSessionEvent } from './session'
|
||||
import { handleSpanEvent } from './span'
|
||||
import { handleTextEvent } from './text'
|
||||
import { handleToolEvent } from './tool'
|
||||
import { handleToolEvent, prePersistClientExecutableToolCall } from './tool'
|
||||
import type { StreamHandler } from './types'
|
||||
|
||||
export { prePersistClientExecutableToolCall }
|
||||
export type { StreamHandler, ToolScope } from './types'
|
||||
|
||||
const logger = createLogger('CopilotHandlerRouting')
|
||||
|
||||
@@ -1,3 +1,62 @@
|
||||
import {
|
||||
MothershipStreamV1SpanLifecycleEvent,
|
||||
MothershipStreamV1SpanPayloadKind,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import type { StreamHandler } from './types'
|
||||
|
||||
export const handleSpanEvent: StreamHandler = () => {}
|
||||
/**
|
||||
* Mirror Go-emitted span lifecycle events onto the Sim-side TraceCollector.
|
||||
*
|
||||
* Go publishes `span` events for subagent lifecycles and structured-result
|
||||
* payloads. For subagents, the start/end pair is also used for UI routing
|
||||
* elsewhere; here we additionally record a named span on the trace collector
|
||||
* so the final RequestTraceV1 report shows the full nested structure without
|
||||
* requiring the reader to inspect the raw envelope stream.
|
||||
*/
|
||||
export const handleSpanEvent: StreamHandler = (event, context) => {
|
||||
if (event.type !== 'span') {
|
||||
return
|
||||
}
|
||||
|
||||
const payload = event.payload as {
|
||||
kind?: string
|
||||
event?: string
|
||||
agent?: string
|
||||
data?: unknown
|
||||
}
|
||||
const kind = payload?.kind ?? ''
|
||||
const evt = payload?.event ?? ''
|
||||
|
||||
if (kind === MothershipStreamV1SpanPayloadKind.subagent) {
|
||||
const scopeAgent =
|
||||
typeof payload.agent === 'string' && payload.agent ? payload.agent : 'subagent'
|
||||
if (evt === MothershipStreamV1SpanLifecycleEvent.start) {
|
||||
const span = context.trace.startSpan(`subagent:${scopeAgent}`, 'go.subagent', {
|
||||
agent: scopeAgent,
|
||||
parentToolCallId: event.scope?.parentToolCallId,
|
||||
})
|
||||
context.subAgentTraceSpans ??= new Map()
|
||||
context.subAgentTraceSpans.set(`${scopeAgent}:${event.scope?.parentToolCallId || ''}`, span)
|
||||
} else if (evt === MothershipStreamV1SpanLifecycleEvent.end) {
|
||||
const key = `${scopeAgent}:${event.scope?.parentToolCallId || ''}`
|
||||
const span = context.subAgentTraceSpans?.get(key)
|
||||
if (span) {
|
||||
context.trace.endSpan(span, 'ok')
|
||||
context.subAgentTraceSpans?.delete(key)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (
|
||||
kind === MothershipStreamV1SpanPayloadKind.structured_result ||
|
||||
kind === MothershipStreamV1SpanPayloadKind.subagent_result
|
||||
) {
|
||||
const span = context.trace.startSpan(`${kind}:${payload.agent ?? 'main'}`, `go.${kind}`, {
|
||||
agent: payload.agent,
|
||||
hasData: payload.data !== undefined,
|
||||
})
|
||||
context.trace.endSpan(span, 'ok')
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,9 @@ import {
|
||||
MothershipStreamV1ToolOutcome,
|
||||
type MothershipStreamV1ToolResultPayload,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
import {
|
||||
isToolArgsDeltaStreamEvent,
|
||||
isToolCallStreamEvent,
|
||||
@@ -52,6 +55,52 @@ function applyToolDisplay(
|
||||
if (displayTitle) toolCall.displayTitle = displayTitle
|
||||
}
|
||||
|
||||
/**
|
||||
* Upsert the durable `async_tool_calls` row before the authoritative tool-call
|
||||
* SSE frame is forwarded to the client, so `/api/copilot/confirm` can never
|
||||
* race ahead of the row that identifies the call. This is the sole
|
||||
* persistence point for client-executable tools; gating mirrors the
|
||||
* client-wait branch in `dispatchToolExecution`.
|
||||
*/
|
||||
export async function prePersistClientExecutableToolCall(
|
||||
event: StreamEvent,
|
||||
context: StreamingContext
|
||||
): Promise<void> {
|
||||
if (event.type !== 'tool') return
|
||||
if (!isToolCallStreamEvent(event)) return
|
||||
|
||||
const data = event.payload
|
||||
const isGenerating = data.status === TOOL_CALL_STATUS.generating
|
||||
const isPartial = data.partial === true || isGenerating
|
||||
if (isPartial) return
|
||||
|
||||
const ui = getToolCallUI(data)
|
||||
if (!ui.clientExecutable) return
|
||||
|
||||
const catalogEntry = getToolEntry(data.toolName)
|
||||
const isInternal = ui.internal === true || catalogEntry?.internal === true
|
||||
if (isInternal) return
|
||||
|
||||
const delegateWorkflowRunToClient = isWorkflowToolName(data.toolName)
|
||||
if (isSimExecuted(data.toolName) && !delegateWorkflowRunToClient) return
|
||||
|
||||
if (!context.runId) return
|
||||
|
||||
await upsertAsyncToolCall({
|
||||
runId: context.runId,
|
||||
toolCallId: data.toolCallId,
|
||||
toolName: data.toolName,
|
||||
args: data.arguments,
|
||||
status: MothershipStreamV1AsyncToolRecordStatus.running,
|
||||
}).catch((err) => {
|
||||
logger.warn('Failed to pre-persist async tool row before forwarding call frame', {
|
||||
toolCallId: data.toolCallId,
|
||||
toolName: data.toolName,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Unified tool event handler for both main and subagent scopes.
|
||||
*
|
||||
@@ -363,35 +412,35 @@ async function dispatchToolExecution(
|
||||
}
|
||||
} else {
|
||||
toolCall.status = 'executing'
|
||||
const pendingPromise = (async () => {
|
||||
await upsertAsyncToolCall({
|
||||
runId: context.runId,
|
||||
toolCallId,
|
||||
toolName,
|
||||
args,
|
||||
status: MothershipStreamV1AsyncToolRecordStatus.running,
|
||||
}).catch((err) => {
|
||||
logger.warn(`Failed to persist async tool row for client-executable ${scopeLabel}tool`, {
|
||||
const pendingPromise = withCopilotSpan(
|
||||
TraceSpan.CopilotToolWaitForClientResult,
|
||||
{
|
||||
[TraceAttr.ToolName]: toolName,
|
||||
[TraceAttr.ToolCallId]: toolCallId,
|
||||
[TraceAttr.ToolTimeoutMs]: options.timeout || STREAM_TIMEOUT_MS,
|
||||
...(context.runId ? { [TraceAttr.RunId]: context.runId } : {}),
|
||||
},
|
||||
async (span) => {
|
||||
const completion = await waitForToolCompletion(
|
||||
toolCallId,
|
||||
toolName,
|
||||
error: toError(err).message,
|
||||
})
|
||||
})
|
||||
const completion = await waitForToolCompletion(
|
||||
toolCallId,
|
||||
options.timeout || STREAM_TIMEOUT_MS,
|
||||
options.abortSignal
|
||||
)
|
||||
handleClientCompletion(toolCall, toolCallId, completion)
|
||||
await emitSyntheticToolResult(toolCallId, toolCall.name, completion, options)
|
||||
return (
|
||||
completion ?? {
|
||||
status: MothershipStreamV1ToolOutcome.error,
|
||||
message: 'Tool completion missing',
|
||||
data: { error: 'Tool completion missing' },
|
||||
options.timeout || STREAM_TIMEOUT_MS,
|
||||
options.abortSignal
|
||||
)
|
||||
span.setAttribute(TraceAttr.ToolCompletionReceived, completion !== undefined)
|
||||
if (completion) {
|
||||
span.setAttribute(TraceAttr.ToolOutcome, completion.status)
|
||||
}
|
||||
)
|
||||
})().catch((err) => {
|
||||
handleClientCompletion(toolCall, toolCallId, completion)
|
||||
await emitSyntheticToolResult(toolCallId, toolCall.name, completion, options)
|
||||
return (
|
||||
completion ?? {
|
||||
status: MothershipStreamV1ToolOutcome.error,
|
||||
message: 'Tool completion missing',
|
||||
data: { error: 'Tool completion missing' },
|
||||
}
|
||||
)
|
||||
}
|
||||
).catch((err) => {
|
||||
logger.error(`Client-executable ${scopeLabel}tool wait failed`, {
|
||||
toolCallId,
|
||||
toolName,
|
||||
|
||||
@@ -38,7 +38,7 @@ export function createRequestId(): string {
|
||||
return generateId()
|
||||
}
|
||||
|
||||
export function createShortRequestId(): string {
|
||||
function createShortRequestId(): string {
|
||||
return generateRequestId()
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { SpanStatusCode, trace } from '@opentelemetry/api'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { updateRunStatus } from '@/lib/copilot/async-runs/repository'
|
||||
@@ -5,30 +6,70 @@ import {
|
||||
MothershipStreamV1CompletionStatus,
|
||||
MothershipStreamV1EventType,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import {
|
||||
type RequestTraceV1Outcome,
|
||||
RequestTraceV1Outcome as RequestTraceV1OutcomeConst,
|
||||
} from '@/lib/copilot/generated/request-trace-v1'
|
||||
import { CopilotFinalizeOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import type { StreamWriter } from '@/lib/copilot/request/session'
|
||||
import type { OrchestratorResult } from '@/lib/copilot/request/types'
|
||||
|
||||
const logger = createLogger('CopilotStreamFinalize')
|
||||
const getTracer = () => trace.getTracer('sim-copilot-finalize', '1.0.0')
|
||||
|
||||
/**
|
||||
* Single finalization path for stream results.
|
||||
* Handles abort / error / success and publishes the terminal event.
|
||||
* Replaces duplicated blocks in the old chat-streaming.ts.
|
||||
*/
|
||||
// Single finalization path. `outcome` is the caller's resolved verdict
|
||||
// so we don't have to re-derive cancel vs error from raw signals.
|
||||
export async function finalizeStream(
|
||||
result: OrchestratorResult,
|
||||
publisher: StreamWriter,
|
||||
runId: string,
|
||||
aborted: boolean,
|
||||
outcome: RequestTraceV1Outcome,
|
||||
requestId: string
|
||||
): Promise<void> {
|
||||
if (aborted) {
|
||||
return handleAborted(result, publisher, runId, requestId)
|
||||
const spanOutcome =
|
||||
outcome === RequestTraceV1OutcomeConst.cancelled
|
||||
? CopilotFinalizeOutcome.Aborted
|
||||
: outcome === RequestTraceV1OutcomeConst.success
|
||||
? CopilotFinalizeOutcome.Success
|
||||
: CopilotFinalizeOutcome.Error
|
||||
const span = getTracer().startSpan(TraceSpan.CopilotFinalizeStream, {
|
||||
attributes: {
|
||||
[TraceAttr.CopilotFinalizeOutcome]: spanOutcome,
|
||||
[TraceAttr.RunId]: runId,
|
||||
[TraceAttr.RequestId]: requestId,
|
||||
[TraceAttr.CopilotResultToolCalls]: result.toolCalls?.length ?? 0,
|
||||
[TraceAttr.CopilotResultContentBlocks]: result.contentBlocks?.length ?? 0,
|
||||
[TraceAttr.CopilotResultContentLength]: result.content?.length ?? 0,
|
||||
[TraceAttr.CopilotPublisherSawComplete]: publisher.sawComplete,
|
||||
[TraceAttr.CopilotPublisherClientDisconnected]: publisher.clientDisconnected,
|
||||
},
|
||||
})
|
||||
try {
|
||||
if (outcome === RequestTraceV1OutcomeConst.cancelled) {
|
||||
await handleAborted(result, publisher, runId, requestId)
|
||||
} else if (outcome === RequestTraceV1OutcomeConst.error) {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: result.error || 'orchestration failed',
|
||||
})
|
||||
await handleError(result, publisher, runId, requestId)
|
||||
} else {
|
||||
await handleSuccess(publisher, runId, requestId)
|
||||
}
|
||||
// Successful + cancelled paths fall through as status-unset → set
|
||||
// OK so dashboards don't show "incomplete" for normal terminals.
|
||||
if (outcome !== RequestTraceV1OutcomeConst.error) {
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
}
|
||||
} catch (error) {
|
||||
span.recordException(error instanceof Error ? error : new Error(String(error)))
|
||||
span.setStatus({ code: SpanStatusCode.ERROR, message: 'finalize threw' })
|
||||
throw error
|
||||
} finally {
|
||||
span.end()
|
||||
}
|
||||
if (!result.success) {
|
||||
return handleError(result, publisher, runId, requestId)
|
||||
}
|
||||
return handleSuccess(publisher, runId, requestId)
|
||||
}
|
||||
|
||||
async function handleAborted(
|
||||
|
||||
@@ -2,6 +2,9 @@
|
||||
* @vitest-environment node
|
||||
*/
|
||||
|
||||
import { propagation, trace } from '@opentelemetry/api'
|
||||
import { W3CTraceContextPropagator } from '@opentelemetry/core'
|
||||
import { BasicTracerProvider } from '@opentelemetry/sdk-trace-base'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1'
|
||||
import type { OrchestratorResult } from '@/lib/copilot/request/types'
|
||||
@@ -29,6 +32,8 @@ function createLifecycleResult(overrides?: Partial<OrchestratorResult>): Orchest
|
||||
|
||||
describe('runHeadlessCopilotLifecycle', () => {
|
||||
beforeEach(() => {
|
||||
trace.setGlobalTracerProvider(new BasicTracerProvider())
|
||||
propagation.setGlobalPropagator(new W3CTraceContextPropagator())
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn().mockResolvedValue(
|
||||
@@ -155,6 +160,40 @@ describe('runHeadlessCopilotLifecycle', () => {
|
||||
expect(body.simRequestId).toBe('workflow-request-id')
|
||||
})
|
||||
|
||||
it('passes an OTel context to the lifecycle and trace report', async () => {
|
||||
let lifecycleTraceparent = ''
|
||||
runCopilotLifecycle.mockImplementationOnce(async (_payload, options) => {
|
||||
const { traceHeaders } = await import('@/lib/copilot/request/go/propagation')
|
||||
lifecycleTraceparent = traceHeaders({}, options.otelContext).traceparent ?? ''
|
||||
return createLifecycleResult()
|
||||
})
|
||||
|
||||
await runHeadlessCopilotLifecycle(
|
||||
{
|
||||
message: 'hello',
|
||||
messageId: 'req-otel',
|
||||
},
|
||||
{
|
||||
userId: 'user-1',
|
||||
chatId: 'chat-1',
|
||||
workflowId: 'workflow-1',
|
||||
goRoute: '/api/mothership/execute',
|
||||
interactive: false,
|
||||
}
|
||||
)
|
||||
|
||||
expect(lifecycleTraceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/)
|
||||
const [, init] = vi.mocked(fetch).mock.calls[0] as [string, RequestInit]
|
||||
const headers = init.headers as Record<string, string>
|
||||
// The outbound trace report now runs inside its own OTel child span, so
|
||||
// traceparent has the same trace-id as the lifecycle but a different
|
||||
// span-id. Both must stay on the same trace.
|
||||
const lifecycleTraceId = lifecycleTraceparent.split('-')[1]
|
||||
expect(headers.traceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/)
|
||||
expect(headers.traceparent.split('-')[1]).toBe(lifecycleTraceId)
|
||||
expect(headers.traceparent.split('-')[2]).not.toBe(lifecycleTraceparent.split('-')[2])
|
||||
})
|
||||
|
||||
it('reports an error trace when the lifecycle throws', async () => {
|
||||
runCopilotLifecycle.mockRejectedValueOnce(new Error('kaboom'))
|
||||
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { generateId } from '@sim/utils/id'
|
||||
import type { RequestTraceV1Outcome as RequestTraceOutcome } from '@/lib/copilot/generated/request-trace-v1'
|
||||
import {
|
||||
RequestTraceV1Outcome,
|
||||
RequestTraceV1SpanStatus,
|
||||
} from '@/lib/copilot/generated/request-trace-v1'
|
||||
import { CopilotTransport } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import type { CopilotLifecycleOptions } from '@/lib/copilot/request/lifecycle/run'
|
||||
import { runCopilotLifecycle } from '@/lib/copilot/request/lifecycle/run'
|
||||
import { withCopilotOtelContext } from '@/lib/copilot/request/otel'
|
||||
import { reportTrace, TraceCollector } from '@/lib/copilot/request/trace'
|
||||
import type { OrchestratorResult } from '@/lib/copilot/request/types'
|
||||
|
||||
@@ -34,51 +35,72 @@ export async function runHeadlessCopilotLifecycle(
|
||||
let result: OrchestratorResult | undefined
|
||||
let outcome: RequestTraceOutcome = RequestTraceV1Outcome.error
|
||||
|
||||
try {
|
||||
result = await runCopilotLifecycle(requestPayload, {
|
||||
...options,
|
||||
trace,
|
||||
simRequestId,
|
||||
})
|
||||
outcome = options.abortSignal?.aborted
|
||||
? RequestTraceV1Outcome.cancelled
|
||||
: result.success
|
||||
? RequestTraceV1Outcome.success
|
||||
: RequestTraceV1Outcome.error
|
||||
return result
|
||||
} catch (error) {
|
||||
outcome = options.abortSignal?.aborted
|
||||
? RequestTraceV1Outcome.cancelled
|
||||
: RequestTraceV1Outcome.error
|
||||
throw error
|
||||
} finally {
|
||||
trace.endSpan(
|
||||
requestSpan,
|
||||
outcome === RequestTraceV1Outcome.success
|
||||
? RequestTraceV1SpanStatus.ok
|
||||
: outcome === RequestTraceV1Outcome.cancelled
|
||||
? RequestTraceV1SpanStatus.cancelled
|
||||
: RequestTraceV1SpanStatus.error
|
||||
)
|
||||
|
||||
try {
|
||||
await reportTrace(
|
||||
trace.build({
|
||||
outcome,
|
||||
return withCopilotOtelContext(
|
||||
{
|
||||
requestId: simRequestId,
|
||||
route: options.goRoute,
|
||||
chatId: options.chatId,
|
||||
workflowId: options.workflowId,
|
||||
executionId: options.executionId,
|
||||
runId: options.runId,
|
||||
transport: CopilotTransport.Headless,
|
||||
},
|
||||
async (otelContext) => {
|
||||
try {
|
||||
result = await runCopilotLifecycle(requestPayload, {
|
||||
...options,
|
||||
trace,
|
||||
simRequestId,
|
||||
chatId: result?.chatId ?? options.chatId,
|
||||
runId: options.runId,
|
||||
executionId: options.executionId,
|
||||
usage: result?.usage,
|
||||
cost: result?.cost,
|
||||
otelContext,
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
logger.warn('Failed to report headless trace', {
|
||||
simRequestId,
|
||||
chatId: result?.chatId ?? options.chatId,
|
||||
error: toError(error).message,
|
||||
})
|
||||
outcome = options.abortSignal?.aborted
|
||||
? RequestTraceV1Outcome.cancelled
|
||||
: result.success
|
||||
? RequestTraceV1Outcome.success
|
||||
: RequestTraceV1Outcome.error
|
||||
return result
|
||||
} catch (error) {
|
||||
outcome = options.abortSignal?.aborted
|
||||
? RequestTraceV1Outcome.cancelled
|
||||
: RequestTraceV1Outcome.error
|
||||
throw error
|
||||
} finally {
|
||||
trace.endSpan(
|
||||
requestSpan,
|
||||
outcome === RequestTraceV1Outcome.success
|
||||
? RequestTraceV1SpanStatus.ok
|
||||
: outcome === RequestTraceV1Outcome.cancelled
|
||||
? RequestTraceV1SpanStatus.cancelled
|
||||
: RequestTraceV1SpanStatus.error
|
||||
)
|
||||
|
||||
try {
|
||||
// Best-effort extraction of the prompt from the untyped
|
||||
// headless payload. Keeps parity with the streaming path
|
||||
// where `message` is destructured directly.
|
||||
const userMessage =
|
||||
typeof requestPayload.message === 'string' ? requestPayload.message : undefined
|
||||
await reportTrace(
|
||||
trace.build({
|
||||
outcome,
|
||||
simRequestId,
|
||||
chatId: result?.chatId ?? options.chatId,
|
||||
runId: options.runId,
|
||||
executionId: options.executionId,
|
||||
userMessage,
|
||||
usage: result?.usage,
|
||||
cost: result?.cost,
|
||||
}),
|
||||
otelContext
|
||||
)
|
||||
} catch (error) {
|
||||
logger.warn('Failed to report headless trace', {
|
||||
simRequestId,
|
||||
chatId: result?.chatId ?? options.chatId,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import type { Context } from '@opentelemetry/api'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { generateId } from '@sim/utils/id'
|
||||
@@ -50,6 +51,7 @@ export interface CopilotLifecycleOptions extends OrchestratorOptions {
|
||||
goRoute?: string
|
||||
trace?: TraceCollector
|
||||
simRequestId?: string
|
||||
otelContext?: Context
|
||||
onGoTraceId?: (goTraceId: string) => void
|
||||
executionContext?: ExecutionContext
|
||||
}
|
||||
@@ -112,6 +114,7 @@ export async function runCopilotLifecycle(
|
||||
|
||||
const context = createStreamingContext({
|
||||
chatId,
|
||||
requestId: lifecycleOptions.simRequestId,
|
||||
executionId: resolvedExecutionId,
|
||||
runId: resolvedRunId,
|
||||
messageId: payloadMsgId,
|
||||
@@ -123,6 +126,15 @@ export async function runCopilotLifecycle(
|
||||
|
||||
const result: OrchestratorResult = {
|
||||
success: context.errors.length === 0 && !context.wasAborted,
|
||||
// `cancelled` is an explicit discriminator so callers can tell
|
||||
// "user hit Stop" (don't clear the chat row; /chat/stop owns it)
|
||||
// from "backend errored" (do clear the row so the chat isn't
|
||||
// stuck with a non-null `conversationId`). An error that also
|
||||
// happens to fire the abort signal still counts as an error
|
||||
// path, but practically that doesn't happen in the success
|
||||
// branch here — if there are errors we never reach a
|
||||
// wasAborted-without-errors state.
|
||||
cancelled: context.wasAborted && context.errors.length === 0,
|
||||
content: context.accumulatedContent,
|
||||
contentBlocks: context.contentBlocks,
|
||||
toolCalls: buildToolCallSummaries(context),
|
||||
@@ -137,9 +149,23 @@ export async function runCopilotLifecycle(
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error('Copilot orchestration failed')
|
||||
logger.error('Copilot orchestration failed', { error: err.message })
|
||||
await lifecycleOptions.onError?.(err)
|
||||
// If the abort signal fired, this throw is a consequence of the
|
||||
// cancel (publisher.publish fails once the client disconnects, a
|
||||
// downstream Go read throws on ctx cancel, etc.) — NOT a real
|
||||
// backend error. Don't invoke `onError`, because on the cancel
|
||||
// path `/api/copilot/chat/stop` is the single DB writer and
|
||||
// `onError` would race with it via `finalizeAssistantTurn`,
|
||||
// clearing `conversationId` before stop's UPDATE can match (see
|
||||
// `buildOnComplete` in chat/post.ts for the full rationale).
|
||||
// Return `cancelled: true` so upstream classification stays
|
||||
// consistent with the success-path cancel result.
|
||||
const wasCancelled = lifecycleOptions.abortSignal?.aborted ?? false
|
||||
if (!wasCancelled) {
|
||||
await lifecycleOptions.onError?.(err)
|
||||
}
|
||||
return {
|
||||
success: false,
|
||||
cancelled: wasCancelled,
|
||||
content: '',
|
||||
contentBlocks: [],
|
||||
toolCalls: [],
|
||||
@@ -225,7 +251,6 @@ async function runCheckpointLoop(
|
||||
'Content-Type': 'application/json',
|
||||
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
|
||||
'X-Client-Version': SIM_AGENT_VERSION,
|
||||
...(options.simRequestId ? { 'X-Sim-Request-ID': options.simRequestId } : {}),
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
},
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
* @vitest-environment node
|
||||
*/
|
||||
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import { propagation, trace } from '@opentelemetry/api'
|
||||
import { W3CTraceContextPropagator } from '@opentelemetry/core'
|
||||
import { BasicTracerProvider } from '@opentelemetry/sdk-trace-base'
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
|
||||
import { MothershipStreamV1EventType } from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
|
||||
const {
|
||||
@@ -115,6 +118,19 @@ async function drainStream(stream: ReadableStream) {
|
||||
describe('createSSEStream terminal error handling', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks()
|
||||
trace.setGlobalTracerProvider(new BasicTracerProvider())
|
||||
propagation.setGlobalPropagator(new W3CTraceContextPropagator())
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn().mockResolvedValue(
|
||||
new Response(JSON.stringify({ title: 'Test title' }), {
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
)
|
||||
)
|
||||
resetBuffer.mockResolvedValue(undefined)
|
||||
clearFilePreviewSessions.mockResolvedValue(undefined)
|
||||
scheduleBufferCleanup.mockResolvedValue(undefined)
|
||||
@@ -131,6 +147,10 @@ describe('createSSEStream terminal error handling', () => {
|
||||
updateRunStatus.mockResolvedValue(null)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
vi.unstubAllGlobals()
|
||||
})
|
||||
|
||||
it('writes a terminal error event before close when orchestration returns success=false', async () => {
|
||||
runCopilotLifecycle.mockResolvedValue({
|
||||
success: false,
|
||||
@@ -190,4 +210,39 @@ describe('createSSEStream terminal error handling', () => {
|
||||
)
|
||||
expect(scheduleBufferCleanup).toHaveBeenCalledWith('stream-1')
|
||||
})
|
||||
|
||||
it('passes an OTel context into the streaming lifecycle', async () => {
|
||||
let lifecycleTraceparent = ''
|
||||
runCopilotLifecycle.mockImplementation(async (_payload, options) => {
|
||||
const { traceHeaders } = await import('@/lib/copilot/request/go/propagation')
|
||||
lifecycleTraceparent = traceHeaders({}, options.otelContext).traceparent ?? ''
|
||||
return {
|
||||
success: true,
|
||||
content: 'OK',
|
||||
contentBlocks: [],
|
||||
toolCalls: [],
|
||||
}
|
||||
})
|
||||
|
||||
const stream = createSSEStream({
|
||||
requestPayload: { message: 'hello' },
|
||||
userId: 'user-1',
|
||||
streamId: 'stream-1',
|
||||
executionId: 'exec-1',
|
||||
runId: 'run-1',
|
||||
currentChat: null,
|
||||
isNewChat: false,
|
||||
message: 'hello',
|
||||
titleModel: 'gpt-5.4',
|
||||
requestId: 'req-otel',
|
||||
orchestrateOptions: {
|
||||
goRoute: '/api/mothership',
|
||||
workflowId: 'workflow-1',
|
||||
},
|
||||
})
|
||||
|
||||
await drainStream(stream)
|
||||
|
||||
expect(lifecycleTraceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { type Context, context as otelContextApi } from '@opentelemetry/api'
|
||||
import { db } from '@sim/db'
|
||||
import { copilotChats } from '@sim/db/schema'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { eq } from 'drizzle-orm'
|
||||
import { createRunSegment } from '@/lib/copilot/async-runs/repository'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
@@ -9,13 +9,25 @@ import {
|
||||
MothershipStreamV1EventType,
|
||||
MothershipStreamV1SessionKind,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1'
|
||||
import {
|
||||
RequestTraceV1Outcome,
|
||||
RequestTraceV1SpanStatus,
|
||||
} from '@/lib/copilot/generated/request-trace-v1'
|
||||
import {
|
||||
CopilotRequestCancelReason,
|
||||
type CopilotRequestCancelReasonValue,
|
||||
CopilotTransport,
|
||||
} from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
|
||||
import { finalizeStream } from '@/lib/copilot/request/lifecycle/finalize'
|
||||
import type { CopilotLifecycleOptions } from '@/lib/copilot/request/lifecycle/run'
|
||||
import { runCopilotLifecycle } from '@/lib/copilot/request/lifecycle/run'
|
||||
import { type CopilotLifecycleOutcome, startCopilotOtelRoot } from '@/lib/copilot/request/otel'
|
||||
import {
|
||||
cleanupAbortMarker,
|
||||
clearFilePreviewSessions,
|
||||
isExplicitStopReason,
|
||||
registerActiveStream,
|
||||
releasePendingChatStream,
|
||||
resetBuffer,
|
||||
@@ -53,6 +65,11 @@ export interface StreamingOrchestrationParams {
|
||||
requestId: string
|
||||
workspaceId?: string
|
||||
orchestrateOptions: Omit<CopilotLifecycleOptions, 'onEvent'>
|
||||
/**
|
||||
* Pre-started root; child spans bind to it and `finish()` fires on
|
||||
* termination. Omit to let the stream start its own root (headless).
|
||||
*/
|
||||
otelRoot?: ReturnType<typeof startCopilotOtelRoot>
|
||||
}
|
||||
|
||||
export function createSSEStream(params: StreamingOrchestrationParams): ReadableStream {
|
||||
@@ -71,163 +88,334 @@ export function createSSEStream(params: StreamingOrchestrationParams): ReadableS
|
||||
requestId,
|
||||
workspaceId,
|
||||
orchestrateOptions,
|
||||
otelRoot,
|
||||
} = params
|
||||
|
||||
// Reuse caller's root if provided; otherwise start our own.
|
||||
const activeOtelRoot =
|
||||
otelRoot ??
|
||||
startCopilotOtelRoot({
|
||||
requestId,
|
||||
route: orchestrateOptions.goRoute,
|
||||
chatId,
|
||||
workflowId: orchestrateOptions.workflowId,
|
||||
executionId,
|
||||
runId,
|
||||
streamId,
|
||||
transport: CopilotTransport.Stream,
|
||||
})
|
||||
|
||||
const abortController = new AbortController()
|
||||
registerActiveStream(streamId, abortController)
|
||||
|
||||
const publisher = new StreamWriter({ streamId, chatId, requestId })
|
||||
|
||||
// Classify cancel: signal.reason (explicit-stop set) wins, then
|
||||
// clientDisconnected, else Unknown (latent contract bug — log it).
|
||||
const recordCancelled = (errorMessage?: string): CopilotRequestCancelReasonValue => {
|
||||
const rawReason = abortController.signal.reason
|
||||
let cancelReason: CopilotRequestCancelReasonValue
|
||||
if (isExplicitStopReason(rawReason)) {
|
||||
cancelReason = CopilotRequestCancelReason.ExplicitStop
|
||||
} else if (publisher.clientDisconnected) {
|
||||
cancelReason = CopilotRequestCancelReason.ClientDisconnect
|
||||
} else {
|
||||
cancelReason = CopilotRequestCancelReason.Unknown
|
||||
const serializedReason =
|
||||
rawReason === undefined
|
||||
? 'undefined'
|
||||
: rawReason instanceof Error
|
||||
? `${rawReason.name}: ${rawReason.message}`
|
||||
: typeof rawReason === 'string'
|
||||
? rawReason
|
||||
: (() => {
|
||||
try {
|
||||
return JSON.stringify(rawReason)
|
||||
} catch {
|
||||
return String(rawReason)
|
||||
}
|
||||
})()
|
||||
// Contract violation: add the new reason to AbortReason /
|
||||
// isExplicitStopReason or extend the classifier.
|
||||
logger.error(`[${requestId}] Stream cancelled with unknown abort reason`, {
|
||||
streamId,
|
||||
chatId,
|
||||
reason: serializedReason,
|
||||
})
|
||||
activeOtelRoot.span.setAttribute(TraceAttr.CopilotAbortUnknownReason, serializedReason)
|
||||
}
|
||||
activeOtelRoot.span.setAttribute(TraceAttr.CopilotRequestCancelReason, cancelReason)
|
||||
activeOtelRoot.span.addEvent(TraceEvent.RequestCancelled, {
|
||||
[TraceAttr.CopilotRequestCancelReason]: cancelReason,
|
||||
...(errorMessage ? { [TraceAttr.ErrorMessage]: errorMessage } : {}),
|
||||
})
|
||||
return cancelReason
|
||||
}
|
||||
|
||||
const collector = new TraceCollector()
|
||||
|
||||
return new ReadableStream({
|
||||
async start(controller) {
|
||||
publisher.attach(controller)
|
||||
|
||||
const requestSpan = collector.startSpan('Mothership Request', 'request', {
|
||||
streamId,
|
||||
chatId,
|
||||
runId,
|
||||
})
|
||||
let outcome: 'success' | 'error' | 'cancelled' = 'error'
|
||||
let lifecycleResult:
|
||||
| {
|
||||
usage?: { prompt: number; completion: number }
|
||||
cost?: { input: number; output: number; total: number }
|
||||
}
|
||||
| undefined
|
||||
|
||||
await Promise.all([resetBuffer(streamId), clearFilePreviewSessions(streamId)])
|
||||
|
||||
if (chatId) {
|
||||
createRunSegment({
|
||||
id: runId,
|
||||
executionId,
|
||||
chatId,
|
||||
userId,
|
||||
workflowId: (requestPayload.workflowId as string | undefined) || null,
|
||||
workspaceId,
|
||||
streamId,
|
||||
model: (requestPayload.model as string | undefined) || null,
|
||||
provider: (requestPayload.provider as string | undefined) || null,
|
||||
requestContext: { requestId },
|
||||
}).catch((error) => {
|
||||
logger.warn(`[${requestId}] Failed to create copilot run segment`, {
|
||||
error: toError(error).message,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
const abortPoller = startAbortPoller(streamId, abortController, { requestId })
|
||||
publisher.startKeepalive()
|
||||
|
||||
if (chatId) {
|
||||
publisher.publish({
|
||||
type: MothershipStreamV1EventType.session,
|
||||
payload: {
|
||||
kind: MothershipStreamV1SessionKind.chat,
|
||||
chatId,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fireTitleGeneration({
|
||||
chatId,
|
||||
currentChat,
|
||||
isNewChat,
|
||||
message,
|
||||
titleModel,
|
||||
titleProvider,
|
||||
workspaceId,
|
||||
requestId,
|
||||
publisher,
|
||||
})
|
||||
|
||||
try {
|
||||
const result = await runCopilotLifecycle(requestPayload, {
|
||||
...orchestrateOptions,
|
||||
executionId,
|
||||
runId,
|
||||
trace: collector,
|
||||
simRequestId: requestId,
|
||||
abortSignal: abortController.signal,
|
||||
onEvent: async (event) => {
|
||||
await publisher.publish(event)
|
||||
},
|
||||
})
|
||||
|
||||
lifecycleResult = result
|
||||
outcome = abortController.signal.aborted
|
||||
? RequestTraceV1Outcome.cancelled
|
||||
: result.success
|
||||
? RequestTraceV1Outcome.success
|
||||
: RequestTraceV1Outcome.error
|
||||
await finalizeStream(result, publisher, runId, abortController.signal.aborted, requestId)
|
||||
} catch (error) {
|
||||
outcome = abortController.signal.aborted
|
||||
? RequestTraceV1Outcome.cancelled
|
||||
: RequestTraceV1Outcome.error
|
||||
if (publisher.clientDisconnected) {
|
||||
logger.info(`[${requestId}] Stream errored after client disconnect`, {
|
||||
error: error instanceof Error ? error.message : 'Stream error',
|
||||
})
|
||||
}
|
||||
logger.error(`[${requestId}] Unexpected orchestration error:`, error)
|
||||
|
||||
const syntheticResult = {
|
||||
success: false as const,
|
||||
content: '',
|
||||
contentBlocks: [],
|
||||
toolCalls: [],
|
||||
error: 'An unexpected error occurred while processing the response.',
|
||||
}
|
||||
await finalizeStream(
|
||||
syntheticResult,
|
||||
publisher,
|
||||
runId,
|
||||
abortController.signal.aborted,
|
||||
requestId
|
||||
)
|
||||
} finally {
|
||||
collector.endSpan(
|
||||
requestSpan,
|
||||
outcome === RequestTraceV1Outcome.success
|
||||
? 'ok'
|
||||
: outcome === RequestTraceV1Outcome.cancelled
|
||||
? 'cancelled'
|
||||
: 'error'
|
||||
)
|
||||
|
||||
clearInterval(abortPoller)
|
||||
// Re-enter the root OTel context — ALS doesn't survive the
|
||||
// Next handler → ReadableStream.start boundary.
|
||||
await otelContextApi.with(activeOtelRoot.context, async () => {
|
||||
const otelContext = activeOtelRoot.context
|
||||
let rootOutcome: CopilotLifecycleOutcome = RequestTraceV1Outcome.error
|
||||
let rootError: unknown
|
||||
// `cancelReason` must be declared OUTSIDE the outer `try` so
|
||||
// it remains in scope for the outer `finally` that calls
|
||||
// `activeOtelRoot.finish(rootOutcome, rootError, cancelReason)`.
|
||||
// `let` bindings declared inside a `try` block are NOT visible
|
||||
// in the paired `finally`; referencing one there raises a
|
||||
// TDZ ReferenceError, skipping `finish()`, leaving the root
|
||||
// span never-ended, and making Tempo see every child as an
|
||||
// orphan under a phantom parent. (Regression landed 2026-04-21.)
|
||||
let cancelReason: CopilotRequestCancelReasonValue | undefined
|
||||
try {
|
||||
await publisher.close()
|
||||
} catch (error) {
|
||||
logger.warn(`[${requestId}] Failed to flush stream persistence during close`, {
|
||||
error: toError(error).message,
|
||||
const requestSpan = collector.startSpan('Mothership Request', 'request', {
|
||||
streamId,
|
||||
chatId,
|
||||
runId,
|
||||
})
|
||||
}
|
||||
unregisterActiveStream(streamId)
|
||||
if (chatId) {
|
||||
await releasePendingChatStream(chatId, streamId)
|
||||
}
|
||||
await scheduleBufferCleanup(streamId)
|
||||
await scheduleFilePreviewSessionCleanup(streamId)
|
||||
await cleanupAbortMarker(streamId)
|
||||
let outcome: CopilotLifecycleOutcome = RequestTraceV1Outcome.error
|
||||
let lifecycleResult:
|
||||
| {
|
||||
usage?: { prompt: number; completion: number }
|
||||
cost?: { input: number; output: number; total: number }
|
||||
}
|
||||
| undefined
|
||||
|
||||
const trace = collector.build({
|
||||
outcome: outcome as 'success' | 'error' | 'cancelled',
|
||||
simRequestId: requestId,
|
||||
streamId,
|
||||
chatId,
|
||||
runId,
|
||||
executionId,
|
||||
usage: lifecycleResult?.usage,
|
||||
cost: lifecycleResult?.cost,
|
||||
})
|
||||
reportTrace(trace).catch(() => {})
|
||||
}
|
||||
await Promise.all([resetBuffer(streamId), clearFilePreviewSessions(streamId)])
|
||||
|
||||
if (chatId) {
|
||||
createRunSegment({
|
||||
id: runId,
|
||||
executionId,
|
||||
chatId,
|
||||
userId,
|
||||
workflowId: (requestPayload.workflowId as string | undefined) || null,
|
||||
workspaceId,
|
||||
streamId,
|
||||
model: (requestPayload.model as string | undefined) || null,
|
||||
provider: (requestPayload.provider as string | undefined) || null,
|
||||
requestContext: { requestId },
|
||||
}).catch((error) => {
|
||||
logger.warn(`[${requestId}] Failed to create copilot run segment`, {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
const abortPoller = startAbortPoller(streamId, abortController, {
|
||||
requestId,
|
||||
})
|
||||
publisher.startKeepalive()
|
||||
|
||||
if (chatId) {
|
||||
publisher.publish({
|
||||
type: MothershipStreamV1EventType.session,
|
||||
payload: {
|
||||
kind: MothershipStreamV1SessionKind.chat,
|
||||
chatId,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fireTitleGeneration({
|
||||
chatId,
|
||||
currentChat,
|
||||
isNewChat,
|
||||
message,
|
||||
titleModel,
|
||||
titleProvider,
|
||||
workspaceId,
|
||||
requestId,
|
||||
publisher,
|
||||
otelContext,
|
||||
})
|
||||
|
||||
try {
|
||||
const result = await runCopilotLifecycle(requestPayload, {
|
||||
...orchestrateOptions,
|
||||
executionId,
|
||||
runId,
|
||||
trace: collector,
|
||||
simRequestId: requestId,
|
||||
otelContext,
|
||||
abortSignal: abortController.signal,
|
||||
onEvent: async (event) => {
|
||||
await publisher.publish(event)
|
||||
},
|
||||
})
|
||||
|
||||
lifecycleResult = result
|
||||
// Outcome classification (priority order):
|
||||
// 1. `result.success` → success. The orchestrator
|
||||
// reporting "finished cleanly" wins over any later
|
||||
// signal change. Matters for the narrow race where
|
||||
// the user clicks Stop a beat after the stream
|
||||
// completed.
|
||||
// 2. `signal.aborted` (from `abortActiveStream` or the
|
||||
// Redis-marker poller) OR `clientDisconnected` with
|
||||
// a non-success result → cancelled. `recordCancelled`
|
||||
// further refines into explicit_stop / client_disconnect
|
||||
// / unknown via `signal.reason`.
|
||||
// 3. Otherwise → error.
|
||||
outcome = result.success
|
||||
? RequestTraceV1Outcome.success
|
||||
: abortController.signal.aborted || publisher.clientDisconnected
|
||||
? RequestTraceV1Outcome.cancelled
|
||||
: RequestTraceV1Outcome.error
|
||||
if (outcome === RequestTraceV1Outcome.cancelled) {
|
||||
cancelReason = recordCancelled()
|
||||
}
|
||||
// Pass the resolved outcome — not `signal.aborted` — so
|
||||
// `finalizeStream` classifies the same way we did above.
|
||||
// A client-disconnect-without-controller-abort still needs
|
||||
// to hit `handleAborted` (not `handleError`) so the chat
|
||||
// row gets `cancelled` terminal state instead of `error`.
|
||||
await finalizeStream(result, publisher, runId, outcome, requestId)
|
||||
} catch (error) {
|
||||
// Error-path classification: if the abort signal fired or
|
||||
// the client disconnected, treat the thrown error as a
|
||||
// cancel (same rationale as the try-path above).
|
||||
const wasCancelled = abortController.signal.aborted || publisher.clientDisconnected
|
||||
outcome = wasCancelled ? RequestTraceV1Outcome.cancelled : RequestTraceV1Outcome.error
|
||||
if (outcome === RequestTraceV1Outcome.cancelled) {
|
||||
cancelReason = recordCancelled(error instanceof Error ? error.message : String(error))
|
||||
}
|
||||
if (publisher.clientDisconnected) {
|
||||
logger.info(`[${requestId}] Stream errored after client disconnect`, {
|
||||
error: error instanceof Error ? error.message : 'Stream error',
|
||||
})
|
||||
}
|
||||
// Demote to warn when the throw came from a user-initiated
|
||||
// cancel — it isn't an "unexpected" failure then, and the
|
||||
// error-level log pollutes alerting on normal Stop presses.
|
||||
const logFn = outcome === RequestTraceV1Outcome.cancelled ? logger.warn : logger.error
|
||||
logFn.call(logger, `[${requestId}] Orchestration ended with ${outcome}:`, error)
|
||||
|
||||
const syntheticResult = {
|
||||
success: false as const,
|
||||
content: '',
|
||||
contentBlocks: [],
|
||||
toolCalls: [],
|
||||
error: 'An unexpected error occurred while processing the response.',
|
||||
}
|
||||
await finalizeStream(syntheticResult, publisher, runId, outcome, requestId)
|
||||
} finally {
|
||||
collector.endSpan(
|
||||
requestSpan,
|
||||
outcome === RequestTraceV1Outcome.success
|
||||
? RequestTraceV1SpanStatus.ok
|
||||
: outcome === RequestTraceV1Outcome.cancelled
|
||||
? RequestTraceV1SpanStatus.cancelled
|
||||
: RequestTraceV1SpanStatus.error
|
||||
)
|
||||
|
||||
clearInterval(abortPoller)
|
||||
try {
|
||||
await publisher.close()
|
||||
} catch (error) {
|
||||
logger.warn(`[${requestId}] Failed to flush stream persistence during close`, {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
}
|
||||
unregisterActiveStream(streamId)
|
||||
if (chatId) {
|
||||
await releasePendingChatStream(chatId, streamId)
|
||||
}
|
||||
await scheduleBufferCleanup(streamId)
|
||||
await scheduleFilePreviewSessionCleanup(streamId)
|
||||
await cleanupAbortMarker(streamId)
|
||||
|
||||
const trace = collector.build({
|
||||
outcome,
|
||||
simRequestId: requestId,
|
||||
streamId,
|
||||
chatId,
|
||||
runId,
|
||||
executionId,
|
||||
// Pass the raw user prompt through so the Go-side trace
|
||||
// ingest can stamp it onto the `request_traces.message`
|
||||
// column at insert time. Avoids relying on the late
|
||||
// `UpdateAnalytics` UPDATE (which silently misses many
|
||||
// rows).
|
||||
userMessage: message,
|
||||
usage: lifecycleResult?.usage,
|
||||
cost: lifecycleResult?.cost,
|
||||
})
|
||||
reportTrace(trace, otelContext).catch((err) => {
|
||||
logger.warn(`[${requestId}] Failed to report trace`, {
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
})
|
||||
})
|
||||
rootOutcome = outcome
|
||||
if (lifecycleResult?.usage) {
|
||||
activeOtelRoot.span.setAttributes({
|
||||
[TraceAttr.GenAiUsageInputTokens]: lifecycleResult.usage.prompt ?? 0,
|
||||
[TraceAttr.GenAiUsageOutputTokens]: lifecycleResult.usage.completion ?? 0,
|
||||
})
|
||||
}
|
||||
if (lifecycleResult?.cost) {
|
||||
activeOtelRoot.span.setAttributes({
|
||||
[TraceAttr.BillingCostInputUsd]: lifecycleResult.cost.input ?? 0,
|
||||
[TraceAttr.BillingCostOutputUsd]: lifecycleResult.cost.output ?? 0,
|
||||
[TraceAttr.BillingCostTotalUsd]: lifecycleResult.cost.total ?? 0,
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
rootOutcome = RequestTraceV1Outcome.error
|
||||
rootError = error
|
||||
throw error
|
||||
} finally {
|
||||
// `finish` is idempotent, so it's safe whether the POST
|
||||
// handler started the root (and may also call finish on an
|
||||
// error path before the stream ran) or we did. The cancel
|
||||
// reason (if any) determines whether `cancelled` is an
|
||||
// expected outcome (explicit_stop → status OK) or a real
|
||||
// error (client_disconnect / unknown → status ERROR).
|
||||
//
|
||||
// Belt-and-suspenders: if `finish()` itself throws (e.g. an
|
||||
// argument in the TDZ, a bad attribute, a regression in
|
||||
// status-setting), fall back to `span.end()` directly. A
|
||||
// root that never ends leaves every child orphaned in Tempo
|
||||
// under a phantom parent; force-ending it keeps the trace
|
||||
// shape intact even when the pretty-finalize path is
|
||||
// broken. The error is logged so Loki greps surface the
|
||||
// regression instead of it silently costing us trace
|
||||
// fidelity for hours.
|
||||
try {
|
||||
activeOtelRoot.finish(rootOutcome, rootError, cancelReason)
|
||||
} catch (finishError) {
|
||||
logger.error(`[${requestId}] activeOtelRoot.finish threw; force-ending root span`, {
|
||||
error: finishError instanceof Error ? finishError.message : String(finishError),
|
||||
})
|
||||
try {
|
||||
activeOtelRoot.span.end()
|
||||
} catch {
|
||||
// Already ended or an OTel internal failure — nothing
|
||||
// more we can do. The export pipe has already had its
|
||||
// chance; swallow to avoid masking the original error
|
||||
// path.
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
},
|
||||
cancel() {
|
||||
// The browser's SSE reader closed. Flip `clientDisconnected` so
|
||||
// in-flight `publisher.publish` calls silently no-op (prevents
|
||||
// enqueueing on a closed controller).
|
||||
//
|
||||
// Browser disconnect is NOT an abort — firing the controller
|
||||
// here retroactively reclassifies in-flight successful streams
|
||||
// as aborted and skips assistant persistence. Let the
|
||||
// orchestrator drain naturally; publish no-ops post-disconnect.
|
||||
// Explicit Stop still fires the controller via /chat/abort.
|
||||
publisher.markDisconnected()
|
||||
},
|
||||
})
|
||||
@@ -247,6 +435,7 @@ function fireTitleGeneration(params: {
|
||||
workspaceId?: string
|
||||
requestId: string
|
||||
publisher: StreamWriter
|
||||
otelContext?: Context
|
||||
}): void {
|
||||
const {
|
||||
chatId,
|
||||
@@ -258,10 +447,16 @@ function fireTitleGeneration(params: {
|
||||
workspaceId,
|
||||
requestId,
|
||||
publisher,
|
||||
otelContext,
|
||||
} = params
|
||||
if (!chatId || currentChat?.title || !isNewChat) return
|
||||
|
||||
requestChatTitle({ message, model: titleModel, provider: titleProvider })
|
||||
requestChatTitle({
|
||||
message,
|
||||
model: titleModel,
|
||||
provider: titleProvider,
|
||||
otelContext,
|
||||
})
|
||||
.then(async (title) => {
|
||||
if (!title) return
|
||||
await db.update(copilotChats).set({ title }).where(eq(copilotChats.id, chatId))
|
||||
@@ -270,7 +465,11 @@ function fireTitleGeneration(params: {
|
||||
payload: { kind: MothershipStreamV1SessionKind.title, title },
|
||||
})
|
||||
if (workspaceId) {
|
||||
taskPubSub?.publishStatusChanged({ workspaceId, chatId, type: 'renamed' })
|
||||
taskPubSub?.publishStatusChanged({
|
||||
workspaceId,
|
||||
chatId,
|
||||
type: 'renamed',
|
||||
})
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
@@ -286,20 +485,35 @@ export async function requestChatTitle(params: {
|
||||
message: string
|
||||
model: string
|
||||
provider?: string
|
||||
otelContext?: Context
|
||||
}): Promise<string | null> {
|
||||
const { message, model, provider } = params
|
||||
const { message, model, provider, otelContext } = params
|
||||
if (!message || !model) return null
|
||||
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if (env.COPILOT_API_KEY) {
|
||||
headers['x-api-key'] = env.COPILOT_API_KEY
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(`${SIM_AGENT_API_URL}/api/generate-chat-title`, {
|
||||
const { fetchGo } = await import('@/lib/copilot/request/go/fetch')
|
||||
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/generate-chat-title`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({ message, model, ...(provider ? { provider } : {}) }),
|
||||
body: JSON.stringify({
|
||||
message,
|
||||
model,
|
||||
...(provider ? { provider } : {}),
|
||||
}),
|
||||
otelContext,
|
||||
spanName: 'sim → go /api/generate-chat-title',
|
||||
operation: 'generate_chat_title',
|
||||
attributes: {
|
||||
[TraceAttr.GenAiRequestModel]: model,
|
||||
...(provider ? { [TraceAttr.GenAiSystem]: provider } : {}),
|
||||
},
|
||||
})
|
||||
|
||||
const payload = await response.json().catch(() => ({}))
|
||||
|
||||
588
apps/sim/lib/copilot/request/otel.ts
Normal file
588
apps/sim/lib/copilot/request/otel.ts
Normal file
@@ -0,0 +1,588 @@
|
||||
import { randomBytes } from 'crypto'
|
||||
import {
|
||||
type Context,
|
||||
context,
|
||||
ROOT_CONTEXT,
|
||||
type Span,
|
||||
type SpanContext,
|
||||
SpanKind,
|
||||
SpanStatusCode,
|
||||
TraceFlags,
|
||||
trace,
|
||||
} from '@opentelemetry/api'
|
||||
import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1'
|
||||
import {
|
||||
CopilotBranchKind,
|
||||
CopilotRequestCancelReason,
|
||||
type CopilotRequestCancelReasonValue,
|
||||
CopilotSurface,
|
||||
CopilotTransport,
|
||||
} from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { contextFromRequestHeaders } from '@/lib/copilot/request/go/propagation'
|
||||
import { isExplicitStopReason } from '@/lib/copilot/request/session/abort-reason'
|
||||
|
||||
// OTel GenAI content-capture env var (spec:
|
||||
// https://opentelemetry.io/docs/specs/semconv/gen-ai/). Mirrored on
|
||||
// the Go side so a single var controls both halves.
|
||||
const GENAI_CAPTURE_ENV = 'OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT'
|
||||
|
||||
// OTLP backends commonly reject attrs over 64 KiB; cap proactively.
|
||||
const GENAI_MESSAGE_ATTR_MAX_BYTES = 60 * 1024
|
||||
|
||||
function isGenAIMessageCaptureEnabled(): boolean {
|
||||
const raw = (process.env[GENAI_CAPTURE_ENV] || '').toLowerCase().trim()
|
||||
return raw === 'true' || raw === '1' || raw === 'yes'
|
||||
}
|
||||
|
||||
// True iff `err` represents the user explicitly clicking Stop — the
|
||||
// only cancellation we treat as expected (non-error).
|
||||
//
|
||||
// Policy across the codebase: an explicit user stop leaves span
|
||||
// status UNSET; every other cancellation (client tab close,
|
||||
// network drop, internal timeout, uncategorized abort) escalates
|
||||
// to `status=error` so it shows up on error dashboards. This is
|
||||
// the Sim mirror of `requestctx.IsExplicitUserStop` on the Go
|
||||
// side; keep the two semantically aligned.
|
||||
//
|
||||
// Detection modes:
|
||||
//
|
||||
// - Plain-string reject value: `controller.abort('user_stop:...')`
|
||||
// rejects fetch() with the reason STRING directly. Matches
|
||||
// `isExplicitStopReason()` exactly (UserStop / RedisPoller).
|
||||
// - DOMException / Error object: `controller.abort()` with no arg
|
||||
// (or older runtimes) rejects with an AbortError whose `.cause`
|
||||
// or `.message` may carry the reason. We inspect both.
|
||||
//
|
||||
// Anything that doesn't resolve to an explicit-stop reason (plain
|
||||
// AbortError with no identifiable cause, timeout-flavored aborts,
|
||||
// arbitrary Error instances) returns false and gets `status=error`.
|
||||
export function isExplicitUserStopError(err: unknown): boolean {
|
||||
if (err == null) return false
|
||||
if (typeof err === 'string') return isExplicitStopReason(err)
|
||||
if (typeof err === 'object') {
|
||||
const e = err as { cause?: unknown; message?: unknown }
|
||||
if (isExplicitStopReason(e.cause)) return true
|
||||
if (typeof e.message === 'string' && isExplicitStopReason(e.message)) return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* True iff an HTTP response status code represents a real server-side
|
||||
* problem (5xx) or a user-visible condition we want to alert on
|
||||
* (402 Payment Required, 409 Conflict, 429 Too Many Requests).
|
||||
*
|
||||
* Everything else — in particular the 4xx flood from bot probes and
|
||||
* expected auth/validation rejections — stays UNSET on the span so
|
||||
* dashboards don't treat normal rejections as errors.
|
||||
*
|
||||
* Mirrored on the Go side in
|
||||
* `copilot/internal/http/middleware/telemetry.go`. Keep the two in
|
||||
* sync if you change the actionable set.
|
||||
*/
|
||||
export function isActionableErrorStatus(code: number): boolean {
|
||||
if (code >= 500) return true
|
||||
return code === 402 || code === 409 || code === 429
|
||||
}
|
||||
|
||||
// Record exception + set ERROR unless the error is an explicit user
|
||||
// stop (see `isExplicitUserStopError`). Every other cancellation —
|
||||
// client disconnect, internal timeout, uncategorized AbortError —
|
||||
// becomes a real error that the dashboards will surface.
|
||||
export function markSpanForError(span: Span, error: unknown): void {
|
||||
const asError = error instanceof Error ? error : new Error(String(error))
|
||||
span.recordException(asError)
|
||||
if (!isExplicitUserStopError(error)) {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// OTel GenAI message shape (kept minimal). Mirror changes on the Go side.
|
||||
interface GenAIAgentPart {
|
||||
type: 'text' | 'tool_call' | 'tool_call_response'
|
||||
content?: string
|
||||
id?: string
|
||||
name?: string
|
||||
arguments?: Record<string, unknown>
|
||||
response?: string
|
||||
}
|
||||
|
||||
interface GenAIAgentMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool'
|
||||
parts: GenAIAgentPart[]
|
||||
}
|
||||
|
||||
function marshalAgentMessages(messages: GenAIAgentMessage[]): string | undefined {
|
||||
if (messages.length === 0) return undefined
|
||||
const json = JSON.stringify(messages)
|
||||
if (json.length <= GENAI_MESSAGE_ATTR_MAX_BYTES) return json
|
||||
// Simple tail-preserving truncation: drop from the front until we
|
||||
// fit. Matches the Go side's behavior. The last message is
|
||||
// usually the most diagnostic for span-level outcome.
|
||||
let remaining = messages.slice()
|
||||
while (remaining.length > 1) {
|
||||
remaining = remaining.slice(1)
|
||||
const candidate = JSON.stringify(remaining)
|
||||
if (candidate.length <= GENAI_MESSAGE_ATTR_MAX_BYTES) return candidate
|
||||
}
|
||||
// Single message still over cap — truncate the text part in place
|
||||
// with a marker so the partial content is still readable.
|
||||
const only = remaining[0]
|
||||
for (const part of only.parts) {
|
||||
if (part.type === 'text' && part.content) {
|
||||
const headroom = GENAI_MESSAGE_ATTR_MAX_BYTES - 1024
|
||||
if (part.content.length > headroom) {
|
||||
part.content = `${part.content.slice(0, headroom)}\n\n[truncated: capture cap ${GENAI_MESSAGE_ATTR_MAX_BYTES} bytes]`
|
||||
}
|
||||
}
|
||||
}
|
||||
const final = JSON.stringify([only])
|
||||
return final.length <= GENAI_MESSAGE_ATTR_MAX_BYTES ? final : undefined
|
||||
}
|
||||
|
||||
interface CopilotAgentInputMessages {
|
||||
userMessage?: string
|
||||
systemPrompt?: string
|
||||
}
|
||||
|
||||
interface CopilotAgentOutputMessages {
|
||||
assistantText?: string
|
||||
toolCalls?: Array<{
|
||||
id: string
|
||||
name: string
|
||||
arguments?: Record<string, unknown>
|
||||
}>
|
||||
}
|
||||
|
||||
function setAgentInputMessages(span: Span, input: CopilotAgentInputMessages): void {
|
||||
if (!isGenAIMessageCaptureEnabled()) return
|
||||
const messages: GenAIAgentMessage[] = []
|
||||
if (input.systemPrompt) {
|
||||
messages.push({
|
||||
role: 'system',
|
||||
parts: [{ type: 'text', content: input.systemPrompt }],
|
||||
})
|
||||
}
|
||||
if (input.userMessage) {
|
||||
messages.push({
|
||||
role: 'user',
|
||||
parts: [{ type: 'text', content: input.userMessage }],
|
||||
})
|
||||
}
|
||||
const serialized = marshalAgentMessages(messages)
|
||||
if (serialized) {
|
||||
span.setAttribute(TraceAttr.GenAiInputMessages, serialized)
|
||||
}
|
||||
}
|
||||
|
||||
function setAgentOutputMessages(span: Span, output: CopilotAgentOutputMessages): void {
|
||||
if (!isGenAIMessageCaptureEnabled()) return
|
||||
const parts: GenAIAgentPart[] = []
|
||||
if (output.assistantText) {
|
||||
parts.push({ type: 'text', content: output.assistantText })
|
||||
}
|
||||
for (const tc of output.toolCalls ?? []) {
|
||||
parts.push({
|
||||
type: 'tool_call',
|
||||
id: tc.id,
|
||||
name: tc.name,
|
||||
...(tc.arguments ? { arguments: tc.arguments } : {}),
|
||||
})
|
||||
}
|
||||
if (parts.length === 0) return
|
||||
const serialized = marshalAgentMessages([{ role: 'assistant', parts }])
|
||||
if (serialized) {
|
||||
span.setAttribute(TraceAttr.GenAiOutputMessages, serialized)
|
||||
}
|
||||
}
|
||||
|
||||
export type CopilotLifecycleOutcome =
|
||||
(typeof RequestTraceV1Outcome)[keyof typeof RequestTraceV1Outcome]
|
||||
|
||||
// Lazy tracer — Next 16/Turbopack can evaluate modules before NodeSDK
|
||||
// installs the real TracerProvider; resolving per call avoids a
|
||||
// cached NoOpTracer silently disabling OTel.
|
||||
export function getCopilotTracer() {
|
||||
return trace.getTracer('sim-ai-platform', '1.0.0')
|
||||
}
|
||||
|
||||
function getTracer() {
|
||||
return getCopilotTracer()
|
||||
}
|
||||
|
||||
// Wrap an inbound handler that Go called into so its span parents
|
||||
// under the Go-side trace (via `traceparent`).
|
||||
export async function withIncomingGoSpan<T>(
|
||||
headers: Headers,
|
||||
spanName: string,
|
||||
attributes: Record<string, string | number | boolean> | undefined,
|
||||
fn: (span: Span) => Promise<T>
|
||||
): Promise<T> {
|
||||
const parentContext = contextFromRequestHeaders(headers)
|
||||
const tracer = getTracer()
|
||||
return tracer.startActiveSpan(
|
||||
spanName,
|
||||
{ kind: SpanKind.SERVER, attributes },
|
||||
parentContext,
|
||||
async (span) => {
|
||||
try {
|
||||
const result = await fn(span)
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
return result
|
||||
} catch (error) {
|
||||
markSpanForError(span, error)
|
||||
throw error
|
||||
} finally {
|
||||
span.end()
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// Wrap a copilot-lifecycle op in an OTel span. Pass `parentContext`
|
||||
// explicitly when AsyncLocalStorage-tracked context can be dropped
|
||||
// across multiple awaits (otherwise the child falls back to a framework
|
||||
// span that the sampler drops).
|
||||
export async function withCopilotSpan<T>(
|
||||
spanName: string,
|
||||
attributes: Record<string, string | number | boolean> | undefined,
|
||||
fn: (span: Span) => Promise<T>,
|
||||
parentContext?: Context
|
||||
): Promise<T> {
|
||||
const tracer = getTracer()
|
||||
const runBody = async (span: Span) => {
|
||||
try {
|
||||
const result = await fn(span)
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
return result
|
||||
} catch (error) {
|
||||
markSpanForError(span, error)
|
||||
throw error
|
||||
} finally {
|
||||
span.end()
|
||||
}
|
||||
}
|
||||
if (parentContext) {
|
||||
return tracer.startActiveSpan(spanName, { attributes }, parentContext, runBody)
|
||||
}
|
||||
return tracer.startActiveSpan(spanName, { attributes }, runBody)
|
||||
}
|
||||
|
||||
// External OTel `tool.execute` span for Sim-side tool work (the Go
|
||||
// side's `tool.execute` is just the enqueue, stays ~0ms).
|
||||
export async function withCopilotToolSpan<T>(
|
||||
input: {
|
||||
toolName: string
|
||||
toolCallId: string
|
||||
runId?: string
|
||||
chatId?: string
|
||||
argsBytes?: number
|
||||
argsPreview?: string
|
||||
},
|
||||
fn: (span: Span) => Promise<T>
|
||||
): Promise<T> {
|
||||
const tracer = getTracer()
|
||||
return tracer.startActiveSpan(
|
||||
`tool.execute ${input.toolName}`,
|
||||
{
|
||||
attributes: {
|
||||
[TraceAttr.ToolName]: input.toolName,
|
||||
[TraceAttr.ToolCallId]: input.toolCallId,
|
||||
[TraceAttr.ToolExecutor]: 'sim',
|
||||
...(input.runId ? { [TraceAttr.RunId]: input.runId } : {}),
|
||||
...(input.chatId ? { [TraceAttr.ChatId]: input.chatId } : {}),
|
||||
...(typeof input.argsBytes === 'number'
|
||||
? { [TraceAttr.ToolArgsBytes]: input.argsBytes }
|
||||
: {}),
|
||||
// argsPreview can leak pasted credentials in tool args; gate
|
||||
// behind the GenAI content-capture env var.
|
||||
...(input.argsPreview && isGenAIMessageCaptureEnabled()
|
||||
? { [TraceAttr.ToolArgsPreview]: input.argsPreview }
|
||||
: {}),
|
||||
},
|
||||
},
|
||||
async (span) => {
|
||||
try {
|
||||
const result = await fn(span)
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
return result
|
||||
} catch (error) {
|
||||
markSpanForError(span, error)
|
||||
throw error
|
||||
} finally {
|
||||
span.end()
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function isValidSpanContext(spanContext: SpanContext): boolean {
|
||||
return (
|
||||
/^[0-9a-f]{32}$/.test(spanContext.traceId) &&
|
||||
spanContext.traceId !== '00000000000000000000000000000000' &&
|
||||
/^[0-9a-f]{16}$/.test(spanContext.spanId) &&
|
||||
spanContext.spanId !== '0000000000000000'
|
||||
)
|
||||
}
|
||||
|
||||
function createFallbackSpanContext(): SpanContext {
|
||||
return {
|
||||
traceId: randomBytes(16).toString('hex'),
|
||||
spanId: randomBytes(8).toString('hex'),
|
||||
traceFlags: TraceFlags.SAMPLED,
|
||||
}
|
||||
}
|
||||
|
||||
interface CopilotOtelScope {
|
||||
// Leave unset on the chat POST — startCopilotOtelRoot will derive
|
||||
// from the root span's OTel trace ID (same value Grafana uses).
|
||||
// Set explicitly on paths that need a non-trace-derived ID (headless,
|
||||
// resume taking an ID from persisted state).
|
||||
requestId?: string
|
||||
route?: string
|
||||
chatId?: string
|
||||
workflowId?: string
|
||||
executionId?: string
|
||||
runId?: string
|
||||
streamId?: string
|
||||
transport: 'headless' | 'stream'
|
||||
userMessagePreview?: string
|
||||
}
|
||||
|
||||
// Dashboard-column width; long enough for triage disambiguation.
|
||||
const USER_MESSAGE_PREVIEW_MAX_CHARS = 500
|
||||
function buildAgentSpanAttributes(
|
||||
scope: CopilotOtelScope & { requestId: string }
|
||||
): Record<string, string | number | boolean> {
|
||||
// Gated behind the same env var as full GenAI message capture — a
|
||||
// 500-char preview is still user prompt content.
|
||||
const preview = isGenAIMessageCaptureEnabled()
|
||||
? truncateUserMessagePreview(scope.userMessagePreview)
|
||||
: undefined
|
||||
return {
|
||||
[TraceAttr.GenAiAgentName]: 'mothership',
|
||||
[TraceAttr.GenAiAgentId]:
|
||||
scope.transport === CopilotTransport.Stream ? 'mothership-stream' : 'mothership-headless',
|
||||
[TraceAttr.GenAiOperationName]:
|
||||
scope.transport === CopilotTransport.Stream ? 'chat' : 'invoke_agent',
|
||||
[TraceAttr.RequestId]: scope.requestId,
|
||||
[TraceAttr.SimRequestId]: scope.requestId,
|
||||
[TraceAttr.CopilotRoute]: scope.route ?? '',
|
||||
[TraceAttr.CopilotTransport]: scope.transport,
|
||||
...(scope.chatId ? { [TraceAttr.ChatId]: scope.chatId } : {}),
|
||||
...(scope.workflowId ? { [TraceAttr.WorkflowId]: scope.workflowId } : {}),
|
||||
...(scope.executionId ? { [TraceAttr.CopilotExecutionId]: scope.executionId } : {}),
|
||||
...(scope.runId ? { [TraceAttr.RunId]: scope.runId } : {}),
|
||||
...(scope.streamId ? { [TraceAttr.StreamId]: scope.streamId } : {}),
|
||||
...(preview ? { [TraceAttr.CopilotUserMessagePreview]: preview } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
function truncateUserMessagePreview(raw: unknown): string | undefined {
|
||||
if (typeof raw !== 'string') return undefined
|
||||
const collapsed = raw.replace(/\s+/g, ' ').trim()
|
||||
if (!collapsed) return undefined
|
||||
if (collapsed.length <= USER_MESSAGE_PREVIEW_MAX_CHARS) return collapsed
|
||||
return `${collapsed.slice(0, USER_MESSAGE_PREVIEW_MAX_CHARS - 1)}…`
|
||||
}
|
||||
|
||||
// Request-shape metadata known only after branch resolution. Stamped
|
||||
// on the root span for dashboard filtering.
|
||||
interface CopilotOtelRequestShape {
|
||||
branchKind?: 'workflow' | 'workspace'
|
||||
mode?: string
|
||||
model?: string
|
||||
provider?: string
|
||||
createNewChat?: boolean
|
||||
prefetch?: boolean
|
||||
fileAttachmentsCount?: number
|
||||
resourceAttachmentsCount?: number
|
||||
contextsCount?: number
|
||||
commandsCount?: number
|
||||
pendingStreamWaitMs?: number
|
||||
interruptedPriorStream?: boolean
|
||||
}
|
||||
|
||||
interface CopilotOtelRoot {
|
||||
span: Span
|
||||
context: Context
|
||||
/**
|
||||
* Finalize the root span. `cancelReason`, when provided, decides
|
||||
* whether a `cancelled` outcome leaves span status UNSET (for
|
||||
* explicit user stops — our single non-error cancel class) or
|
||||
* escalates to ERROR (client disconnect, unknown, etc.). Omit it
|
||||
* for non-cancellation outcomes.
|
||||
*/
|
||||
finish: (
|
||||
outcome?: CopilotLifecycleOutcome,
|
||||
error?: unknown,
|
||||
cancelReason?: CopilotRequestCancelReasonValue
|
||||
) => void
|
||||
setInputMessages: (input: CopilotAgentInputMessages) => void
|
||||
setOutputMessages: (output: CopilotAgentOutputMessages) => void
|
||||
setRequestShape: (shape: CopilotOtelRequestShape) => void
|
||||
}
|
||||
|
||||
export function startCopilotOtelRoot(
|
||||
scope: CopilotOtelScope
|
||||
): CopilotOtelRoot & { requestId: string } {
|
||||
// TRUE root — don't inherit from Next's HTTP handler span (the
|
||||
// sampler drops those; we'd orphan the whole mothership tree).
|
||||
const parentContext = ROOT_CONTEXT
|
||||
// Start with a placeholder `requestId`, then overwrite using the
|
||||
// span's actual trace ID so the UI copy-button value pastes
|
||||
// directly into Grafana.
|
||||
const span = getTracer().startSpan(
|
||||
TraceSpan.GenAiAgentExecute,
|
||||
{ attributes: buildAgentSpanAttributes({ ...scope, requestId: '' }) },
|
||||
parentContext
|
||||
)
|
||||
const carrierSpan = isValidSpanContext(span.spanContext())
|
||||
? span
|
||||
: trace.wrapSpanContext(createFallbackSpanContext())
|
||||
const spanContext = carrierSpan.spanContext()
|
||||
const requestId =
|
||||
scope.requestId ??
|
||||
(spanContext.traceId && spanContext.traceId.length === 32 ? spanContext.traceId : '')
|
||||
span.setAttribute(TraceAttr.RequestId, requestId)
|
||||
span.setAttribute(TraceAttr.SimRequestId, requestId)
|
||||
const rootContext = trace.setSpan(parentContext, carrierSpan)
|
||||
|
||||
let finished = false
|
||||
const finish: CopilotOtelRoot['finish'] = (outcome, error, cancelReason) => {
|
||||
if (finished) return
|
||||
finished = true
|
||||
const resolvedOutcome = outcome ?? RequestTraceV1Outcome.success
|
||||
span.setAttribute(TraceAttr.CopilotRequestOutcome, resolvedOutcome)
|
||||
// Policy: `explicit_stop` is the ONLY cancellation we treat as
|
||||
// expected (status unset → dashboards see it as OK). Everything
|
||||
// else — client_disconnect, unknown reason, bug-case cancels —
|
||||
// escalates to ERROR so it shows up on error panels.
|
||||
const isExplicitStop = cancelReason === CopilotRequestCancelReason.ExplicitStop
|
||||
if (error) {
|
||||
markSpanForError(span, error)
|
||||
if (isExplicitStop || isExplicitUserStopError(error)) {
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
}
|
||||
} else if (resolvedOutcome === RequestTraceV1Outcome.success) {
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
} else if (resolvedOutcome === RequestTraceV1Outcome.cancelled) {
|
||||
if (isExplicitStop) {
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
} else {
|
||||
span.setStatus({
|
||||
code: SpanStatusCode.ERROR,
|
||||
message: `cancelled: ${cancelReason ?? 'unknown'}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
span.end()
|
||||
}
|
||||
|
||||
return {
|
||||
span,
|
||||
context: rootContext,
|
||||
requestId,
|
||||
finish,
|
||||
setInputMessages: (input) => setAgentInputMessages(span, input),
|
||||
setOutputMessages: (output) => setAgentOutputMessages(span, output),
|
||||
setRequestShape: (shape) => applyRequestShape(span, shape),
|
||||
}
|
||||
}
|
||||
|
||||
// Pending-stream-lock wait above this = inferred send-to-interrupt.
|
||||
const INTERRUPT_WAIT_MS_THRESHOLD = 50
|
||||
|
||||
function applyRequestShape(span: Span, shape: CopilotOtelRequestShape): void {
|
||||
if (shape.branchKind) {
|
||||
span.setAttribute(TraceAttr.CopilotBranchKind, shape.branchKind)
|
||||
span.setAttribute(
|
||||
TraceAttr.CopilotSurface,
|
||||
shape.branchKind === CopilotBranchKind.Workflow
|
||||
? CopilotSurface.Copilot
|
||||
: CopilotSurface.Mothership
|
||||
)
|
||||
}
|
||||
if (shape.mode) span.setAttribute(TraceAttr.CopilotMode, shape.mode)
|
||||
if (shape.model) span.setAttribute(TraceAttr.GenAiRequestModel, shape.model)
|
||||
if (shape.provider) span.setAttribute(TraceAttr.GenAiSystem, shape.provider)
|
||||
if (typeof shape.createNewChat === 'boolean') {
|
||||
span.setAttribute(TraceAttr.CopilotChatIsNew, shape.createNewChat)
|
||||
}
|
||||
if (typeof shape.prefetch === 'boolean') {
|
||||
span.setAttribute(TraceAttr.CopilotPrefetch, shape.prefetch)
|
||||
}
|
||||
if (typeof shape.fileAttachmentsCount === 'number') {
|
||||
span.setAttribute(TraceAttr.CopilotFileAttachmentsCount, shape.fileAttachmentsCount)
|
||||
}
|
||||
if (typeof shape.resourceAttachmentsCount === 'number') {
|
||||
span.setAttribute(TraceAttr.CopilotResourceAttachmentsCount, shape.resourceAttachmentsCount)
|
||||
}
|
||||
if (typeof shape.contextsCount === 'number') {
|
||||
span.setAttribute(TraceAttr.CopilotContextsCount, shape.contextsCount)
|
||||
}
|
||||
if (typeof shape.commandsCount === 'number') {
|
||||
span.setAttribute(TraceAttr.CopilotCommandsCount, shape.commandsCount)
|
||||
}
|
||||
if (typeof shape.pendingStreamWaitMs === 'number') {
|
||||
span.setAttribute(TraceAttr.CopilotPendingStreamWaitMs, shape.pendingStreamWaitMs)
|
||||
const interrupted =
|
||||
typeof shape.interruptedPriorStream === 'boolean'
|
||||
? shape.interruptedPriorStream
|
||||
: shape.pendingStreamWaitMs > INTERRUPT_WAIT_MS_THRESHOLD
|
||||
span.setAttribute(TraceAttr.CopilotInterruptedPriorStream, interrupted)
|
||||
} else if (typeof shape.interruptedPriorStream === 'boolean') {
|
||||
span.setAttribute(TraceAttr.CopilotInterruptedPriorStream, shape.interruptedPriorStream)
|
||||
}
|
||||
}
|
||||
|
||||
export async function withCopilotOtelContext<T>(
|
||||
scope: CopilotOtelScope,
|
||||
fn: (otelContext: Context) => Promise<T>
|
||||
): Promise<T> {
|
||||
const parentContext = context.active()
|
||||
// Same trace-id-derives-requestId dance as startCopilotOtelRoot — see
|
||||
// that function for the rationale. Stamp a placeholder, read the real
|
||||
// trace ID off the span, then overwrite.
|
||||
const span = getTracer().startSpan(
|
||||
TraceSpan.GenAiAgentExecute,
|
||||
{ attributes: buildAgentSpanAttributes({ ...scope, requestId: scope.requestId ?? '' }) },
|
||||
parentContext
|
||||
)
|
||||
const carrierSpan = isValidSpanContext(span.spanContext())
|
||||
? span
|
||||
: trace.wrapSpanContext(createFallbackSpanContext())
|
||||
const spanContext = carrierSpan.spanContext()
|
||||
const resolvedRequestId =
|
||||
scope.requestId ??
|
||||
(spanContext.traceId && spanContext.traceId.length === 32 ? spanContext.traceId : '')
|
||||
if (resolvedRequestId) {
|
||||
span.setAttribute(TraceAttr.RequestId, resolvedRequestId)
|
||||
span.setAttribute(TraceAttr.SimRequestId, resolvedRequestId)
|
||||
}
|
||||
const otelContext = trace.setSpan(parentContext, carrierSpan)
|
||||
let terminalStatusSet = false
|
||||
|
||||
try {
|
||||
const result = await context.with(otelContext, () => fn(otelContext))
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
terminalStatusSet = true
|
||||
return result
|
||||
} catch (error) {
|
||||
markSpanForError(span, error)
|
||||
terminalStatusSet = true
|
||||
throw error
|
||||
} finally {
|
||||
if (!terminalStatusSet) {
|
||||
// Extremely defensive: should be unreachable, but avoids leaking
|
||||
// an unset span status if some future refactor breaks both arms.
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
}
|
||||
span.end()
|
||||
}
|
||||
}
|
||||
42
apps/sim/lib/copilot/request/session/abort-reason.ts
Normal file
42
apps/sim/lib/copilot/request/session/abort-reason.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
/**
|
||||
* Abort-reason vocabulary for Sim-originated cancellations.
|
||||
*
|
||||
* This is deliberately a zero-dependency module (no OTel, no logger,
|
||||
* no DB) so it can be imported from both the telemetry layer
|
||||
* (`request/otel.ts`) and the abort layer (`request/session/abort.ts`)
|
||||
* without creating a circular dependency. The longer prose lives in
|
||||
* `abort.ts`; anything here is the raw classification vocabulary
|
||||
* consumed by span-status / finalizer code.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Reason strings passed to `AbortController.abort(reason)` for every
|
||||
* Sim-originated cancel path.
|
||||
*/
|
||||
export const AbortReason = {
|
||||
/** Same-process stop: browser→Sim→abortActiveStream. */
|
||||
UserStop: 'user_stop:abortActiveStream',
|
||||
/**
|
||||
* Cross-process stop: the Sim node that holds the SSE didn't
|
||||
* receive the Stop HTTP call, but it polled the Redis abort marker
|
||||
* that the node that DID receive it wrote, and aborts on the poll.
|
||||
*/
|
||||
RedisPoller: 'redis_abort_marker:poller',
|
||||
/** Internal timeout on the outbound explicit-abort fetch to Go. */
|
||||
ExplicitAbortFetchTimeout: 'timeout:go_explicit_abort_fetch',
|
||||
} as const
|
||||
|
||||
export type AbortReasonValue = (typeof AbortReason)[keyof typeof AbortReason]
|
||||
|
||||
/**
|
||||
* True iff `reason` indicates the user explicitly triggered the abort
|
||||
* (as opposed to an implicit client disconnect or server timeout).
|
||||
* Treated as a small closed vocabulary — any string not in
|
||||
* `AbortReason` is presumed non-explicit. This is the canonical
|
||||
* "should I treat this cancellation as expected?" predicate: span
|
||||
* status-setters consult it to suppress ERROR only for user-initiated
|
||||
* stops, mirroring `requestctx.IsExplicitUserStop` on the Go side.
|
||||
*/
|
||||
export function isExplicitStopReason(reason: unknown): boolean {
|
||||
return reason === AbortReason.UserStop || reason === AbortReason.RedisPoller
|
||||
}
|
||||
@@ -1,7 +1,12 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { sleep } from '@sim/utils/helpers'
|
||||
import { AbortBackend } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
import { acquireLock, getRedisClient, releaseLock } from '@/lib/core/config/redis'
|
||||
import { AbortReason } from './abort-reason'
|
||||
import { clearAbortMarker, hasAbortMarker, writeAbortMarker } from './buffer'
|
||||
|
||||
const logger = createLogger('SessionAbort')
|
||||
@@ -122,74 +127,136 @@ export async function acquirePendingChatStream(
|
||||
streamId: string,
|
||||
timeoutMs = 5_000
|
||||
): Promise<boolean> {
|
||||
const redis = getRedisClient()
|
||||
if (redis) {
|
||||
const deadline = Date.now() + timeoutMs
|
||||
for (;;) {
|
||||
try {
|
||||
const acquired = await acquireLock(
|
||||
getChatStreamLockKey(chatId),
|
||||
streamId,
|
||||
CHAT_STREAM_LOCK_TTL_SECONDS
|
||||
)
|
||||
if (acquired) {
|
||||
// Span records wall time spent waiting for the per-chat stream lock.
|
||||
// Typical case: sub-10ms uncontested acquire. Worst case: up to
|
||||
// `timeoutMs` spent polling while a prior stream finishes. Previously
|
||||
// this time looked like "unexplained gap before llm.stream".
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotChatAcquirePendingStreamLock,
|
||||
{
|
||||
[TraceAttr.ChatId]: chatId,
|
||||
[TraceAttr.StreamId]: streamId,
|
||||
[TraceAttr.LockTimeoutMs]: timeoutMs,
|
||||
},
|
||||
async (span) => {
|
||||
const redis = getRedisClient()
|
||||
span.setAttribute(TraceAttr.LockBackend, redis ? AbortBackend.Redis : AbortBackend.InProcess)
|
||||
if (redis) {
|
||||
const deadline = Date.now() + timeoutMs
|
||||
for (;;) {
|
||||
try {
|
||||
const acquired = await acquireLock(
|
||||
getChatStreamLockKey(chatId),
|
||||
streamId,
|
||||
CHAT_STREAM_LOCK_TTL_SECONDS
|
||||
)
|
||||
if (acquired) {
|
||||
registerPendingChatStream(chatId, streamId)
|
||||
span.setAttribute(TraceAttr.LockAcquired, true)
|
||||
return true
|
||||
}
|
||||
if (!pendingChatStreams.has(chatId)) {
|
||||
const ownerStreamId = await redis.get(getChatStreamLockKey(chatId))
|
||||
if (ownerStreamId) {
|
||||
const settled = await waitForPendingChatStream(chatId, 0, ownerStreamId)
|
||||
if (settled) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn('Failed to acquire chat stream lock', {
|
||||
chatId,
|
||||
streamId,
|
||||
error: toError(error).message,
|
||||
})
|
||||
}
|
||||
|
||||
if (Date.now() >= deadline) {
|
||||
span.setAttribute(TraceAttr.LockAcquired, false)
|
||||
span.setAttribute(TraceAttr.LockTimedOut, true)
|
||||
return false
|
||||
}
|
||||
await sleep(200)
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
const existing = pendingChatStreams.get(chatId)
|
||||
if (!existing) {
|
||||
registerPendingChatStream(chatId, streamId)
|
||||
span.setAttribute(TraceAttr.LockAcquired, true)
|
||||
return true
|
||||
}
|
||||
if (!pendingChatStreams.has(chatId)) {
|
||||
const ownerStreamId = await redis.get(getChatStreamLockKey(chatId))
|
||||
if (ownerStreamId) {
|
||||
const settled = await waitForPendingChatStream(chatId, 0, ownerStreamId)
|
||||
if (settled) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
const settled = await Promise.race([
|
||||
existing.promise.then(() => true),
|
||||
new Promise<boolean>((resolve) => setTimeout(() => resolve(false), timeoutMs)),
|
||||
])
|
||||
if (!settled) {
|
||||
span.setAttribute(TraceAttr.LockAcquired, false)
|
||||
span.setAttribute(TraceAttr.LockTimedOut, true)
|
||||
return false
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn('Failed to acquire chat stream lock', {
|
||||
chatId,
|
||||
streamId,
|
||||
error: toError(error).message,
|
||||
})
|
||||
}
|
||||
|
||||
if (Date.now() >= deadline) {
|
||||
return false
|
||||
}
|
||||
await sleep(200)
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
const existing = pendingChatStreams.get(chatId)
|
||||
if (!existing) {
|
||||
registerPendingChatStream(chatId, streamId)
|
||||
return true
|
||||
}
|
||||
|
||||
const settled = await Promise.race([
|
||||
existing.promise.then(() => true),
|
||||
new Promise<boolean>((resolve) => setTimeout(() => resolve(false), timeoutMs)),
|
||||
])
|
||||
if (!settled) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns `true` if it aborted an in-process controller,
|
||||
* `false` if it only wrote the marker (no local controller found).
|
||||
*
|
||||
* Spanned because the two operations inside can stall independently
|
||||
* — Redis latency on `writeAbortMarker` was previously invisible, and
|
||||
* the "no local controller" branch (happens when the stream handler
|
||||
* is on a different Sim box than the one receiving /chat/abort) is
|
||||
* a subtle but important outcome to distinguish from "aborted a live
|
||||
* controller" in dashboards.
|
||||
*/
|
||||
export async function abortActiveStream(streamId: string): Promise<boolean> {
|
||||
await writeAbortMarker(streamId)
|
||||
const controller = activeStreams.get(streamId)
|
||||
if (!controller) return false
|
||||
controller.abort('user_stop:abortActiveStream')
|
||||
activeStreams.delete(streamId)
|
||||
return true
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotChatAbortActiveStream,
|
||||
{ [TraceAttr.StreamId]: streamId },
|
||||
async (span) => {
|
||||
await writeAbortMarker(streamId)
|
||||
span.setAttribute(TraceAttr.CopilotAbortMarkerWritten, true)
|
||||
const controller = activeStreams.get(streamId)
|
||||
if (!controller) {
|
||||
span.setAttribute(TraceAttr.CopilotAbortControllerFired, false)
|
||||
return false
|
||||
}
|
||||
controller.abort(AbortReason.UserStop)
|
||||
activeStreams.delete(streamId)
|
||||
span.setAttribute(TraceAttr.CopilotAbortControllerFired, true)
|
||||
return true
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export type { AbortReasonValue } from './abort-reason'
|
||||
/**
|
||||
* `AbortReason` vocabulary and the `isExplicitStopReason` classifier
|
||||
* live in a sibling zero-dependency module so the telemetry layer
|
||||
* (`request/otel.ts`) can import them without creating a circular
|
||||
* import back through `session/abort.ts`'s OTel-wrapped helpers.
|
||||
*
|
||||
* Context on why the distinction matters: when the user clicks Stop,
|
||||
* we fire `abortController.abort(AbortReason.UserStop)` from
|
||||
* `abortActiveStream()`. That causes Sim's SSE writer to close,
|
||||
* which in turn makes the BROWSER's SSE reader see the stream end
|
||||
* — which fires the browser-side fetch AbortController and
|
||||
* propagates back to Sim as `publisher.markDisconnected()`. So on
|
||||
* an explicit Stop you observe BOTH "explicit reason" AND
|
||||
* "client disconnected" — the discriminator is the reason string,
|
||||
* not the client flag.
|
||||
*
|
||||
* For any NEW abort path, add its reason in `./abort-reason.ts` and
|
||||
* update `isExplicitStopReason` if it should be classified as a user
|
||||
* stop.
|
||||
*/
|
||||
export { AbortReason, isExplicitStopReason } from './abort-reason'
|
||||
|
||||
const pollingStreams = new Set<string>()
|
||||
|
||||
export function startAbortPoller(
|
||||
@@ -208,7 +275,7 @@ export function startAbortPoller(
|
||||
try {
|
||||
const shouldAbort = await hasAbortMarker(streamId)
|
||||
if (shouldAbort && !abortController.signal.aborted) {
|
||||
abortController.abort('redis_abort_marker:poller')
|
||||
abortController.abort(AbortReason.RedisPoller)
|
||||
await clearAbortMarker(streamId)
|
||||
}
|
||||
} catch (error) {
|
||||
|
||||
@@ -27,6 +27,10 @@ describe('stream session contract parser', () => {
|
||||
it('accepts contract text events', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
trace: {
|
||||
...BASE_ENVELOPE.trace,
|
||||
goTraceId: 'go-trace-1',
|
||||
},
|
||||
type: 'text' as const,
|
||||
payload: {
|
||||
channel: 'assistant' as const,
|
||||
@@ -97,7 +101,11 @@ describe('stream session contract parser', () => {
|
||||
const event = {
|
||||
...BASE_ENVELOPE,
|
||||
type: 'span' as const,
|
||||
payload: { kind: 'subagent' as const, event: 'start' as const, agent: 'file' },
|
||||
payload: {
|
||||
kind: 'subagent' as const,
|
||||
event: 'start' as const,
|
||||
agent: 'file',
|
||||
},
|
||||
}
|
||||
|
||||
expect(isContractStreamEventEnvelope(event)).toBe(true)
|
||||
|
||||
@@ -171,7 +171,12 @@ function isStreamRef(value: unknown): value is MothershipStreamV1StreamRef {
|
||||
}
|
||||
|
||||
function isTrace(value: unknown): value is MothershipStreamV1Trace {
|
||||
return isRecord(value) && typeof value.requestId === 'string' && isOptionalString(value.spanId)
|
||||
return (
|
||||
isRecord(value) &&
|
||||
typeof value.requestId === 'string' &&
|
||||
isOptionalString(value.goTraceId) &&
|
||||
isOptionalString(value.spanId)
|
||||
)
|
||||
}
|
||||
|
||||
function isStreamScope(value: unknown): value is MothershipStreamV1StreamScope {
|
||||
@@ -317,9 +322,12 @@ function isContractEnvelope(value: unknown): value is MothershipStreamV1EventEnv
|
||||
// Synthetic file-preview envelope validators
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function isSyntheticEnvelopeBase(
|
||||
value: unknown
|
||||
): value is Omit<SyntheticFilePreviewEventEnvelope, 'payload'> & { payload?: unknown } {
|
||||
function isSyntheticEnvelopeBase(value: unknown): value is Omit<
|
||||
SyntheticFilePreviewEventEnvelope,
|
||||
'payload'
|
||||
> & {
|
||||
payload?: unknown
|
||||
} {
|
||||
return (
|
||||
isRecord(value) &&
|
||||
value.v === 1 &&
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import type { Context } from '@opentelemetry/api'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { fetchGo } from '@/lib/copilot/request/go/fetch'
|
||||
import { AbortReason } from '@/lib/copilot/request/session/abort'
|
||||
import { env } from '@/lib/core/config/env'
|
||||
|
||||
export const DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS = 3000
|
||||
@@ -8,19 +12,31 @@ export async function requestExplicitStreamAbort(params: {
|
||||
userId: string
|
||||
chatId?: string
|
||||
timeoutMs?: number
|
||||
otelContext?: Context
|
||||
}): Promise<void> {
|
||||
const { streamId, userId, chatId, timeoutMs = DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS } = params
|
||||
const {
|
||||
streamId,
|
||||
userId,
|
||||
chatId,
|
||||
timeoutMs = DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS,
|
||||
otelContext,
|
||||
} = params
|
||||
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
if (env.COPILOT_API_KEY) {
|
||||
headers['x-api-key'] = env.COPILOT_API_KEY
|
||||
}
|
||||
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort('timeout:go_explicit_abort_fetch'), timeoutMs)
|
||||
const timeout = setTimeout(
|
||||
() => controller.abort(AbortReason.ExplicitAbortFetchTimeout),
|
||||
timeoutMs
|
||||
)
|
||||
|
||||
try {
|
||||
const response = await fetch(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, {
|
||||
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
signal: controller.signal,
|
||||
@@ -29,6 +45,13 @@ export async function requestExplicitStreamAbort(params: {
|
||||
userId,
|
||||
...(chatId ? { chatId } : {}),
|
||||
}),
|
||||
otelContext,
|
||||
spanName: 'sim → go /api/streams/explicit-abort',
|
||||
operation: 'explicit_abort',
|
||||
attributes: {
|
||||
[TraceAttr.StreamId]: streamId,
|
||||
...(chatId ? { [TraceAttr.ChatId]: chatId } : {}),
|
||||
},
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
export {
|
||||
AbortReason,
|
||||
type AbortReasonValue,
|
||||
abortActiveStream,
|
||||
acquirePendingChatStream,
|
||||
cleanupAbortMarker,
|
||||
getPendingChatStreamId,
|
||||
isExplicitStopReason,
|
||||
registerActiveStream,
|
||||
releasePendingChatStream,
|
||||
startAbortPoller,
|
||||
|
||||
38
apps/sim/lib/copilot/request/session/recovery.test.ts
Normal file
38
apps/sim/lib/copilot/request/session/recovery.test.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* @vitest-environment node
|
||||
*/
|
||||
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
|
||||
const { getLatestSeq, getOldestSeq, readEvents } = vi.hoisted(() => ({
|
||||
getLatestSeq: vi.fn(),
|
||||
getOldestSeq: vi.fn(),
|
||||
readEvents: vi.fn(),
|
||||
}))
|
||||
|
||||
vi.mock('./buffer', () => ({
|
||||
getLatestSeq,
|
||||
getOldestSeq,
|
||||
readEvents,
|
||||
}))
|
||||
|
||||
import { checkForReplayGap } from './recovery'
|
||||
|
||||
describe('checkForReplayGap', () => {
|
||||
it('uses the latest buffered request id when run metadata is missing it', async () => {
|
||||
getOldestSeq.mockResolvedValue(10)
|
||||
getLatestSeq.mockResolvedValue(12)
|
||||
readEvents.mockResolvedValue([
|
||||
{
|
||||
trace: { requestId: 'req-live-123' },
|
||||
},
|
||||
])
|
||||
|
||||
const result = await checkForReplayGap('stream-1', '1')
|
||||
|
||||
expect(readEvents).toHaveBeenCalledWith('stream-1', '11')
|
||||
expect(result?.gapDetected).toBe(true)
|
||||
expect(result?.envelopes[0].trace.requestId).toBe('req-live-123')
|
||||
expect(result?.envelopes[1].trace.requestId).toBe('req-live-123')
|
||||
})
|
||||
})
|
||||
@@ -3,7 +3,11 @@ import {
|
||||
MothershipStreamV1CompletionStatus,
|
||||
MothershipStreamV1EventType,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import { getLatestSeq, getOldestSeq } from './buffer'
|
||||
import { CopilotRecoveryOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
import { getLatestSeq, getOldestSeq, readEvents } from './buffer'
|
||||
import { createEvent } from './event'
|
||||
|
||||
const logger = createLogger('SessionRecovery')
|
||||
@@ -15,62 +19,105 @@ export interface ReplayGapResult {
|
||||
|
||||
export async function checkForReplayGap(
|
||||
streamId: string,
|
||||
afterCursor: string
|
||||
afterCursor: string,
|
||||
requestId?: string
|
||||
): Promise<ReplayGapResult | null> {
|
||||
const requestedAfterSeq = Number(afterCursor || '0')
|
||||
if (requestedAfterSeq <= 0) {
|
||||
// Fast path: no cursor → nothing to check. Skip the span to avoid
|
||||
// emitting zero-work spans on every stream connect.
|
||||
return null
|
||||
}
|
||||
|
||||
const oldestSeq = await getOldestSeq(streamId)
|
||||
const latestSeq = await getLatestSeq(streamId)
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotRecoveryCheckReplayGap,
|
||||
{
|
||||
[TraceAttr.StreamId]: streamId,
|
||||
[TraceAttr.CopilotRecoveryRequestedAfterSeq]: requestedAfterSeq,
|
||||
...(requestId ? { [TraceAttr.RequestId]: requestId } : {}),
|
||||
},
|
||||
async (span) => {
|
||||
const oldestSeq = await getOldestSeq(streamId)
|
||||
const latestSeq = await getLatestSeq(streamId)
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotRecoveryOldestSeq]: oldestSeq ?? -1,
|
||||
[TraceAttr.CopilotRecoveryLatestSeq]: latestSeq ?? -1,
|
||||
})
|
||||
|
||||
if (
|
||||
latestSeq !== null &&
|
||||
latestSeq > 0 &&
|
||||
oldestSeq !== null &&
|
||||
requestedAfterSeq < oldestSeq - 1
|
||||
) {
|
||||
logger.warn('Replay gap detected: requested cursor is below oldest available event', {
|
||||
streamId,
|
||||
requestedAfterSeq,
|
||||
oldestAvailableSeq: oldestSeq,
|
||||
latestSeq,
|
||||
})
|
||||
|
||||
const gapEnvelope = createEvent({
|
||||
streamId,
|
||||
cursor: String(latestSeq + 1),
|
||||
seq: latestSeq + 1,
|
||||
requestId: '',
|
||||
type: MothershipStreamV1EventType.error,
|
||||
payload: {
|
||||
message: 'Replay history is no longer available. Some events may have been lost.',
|
||||
code: 'replay_gap',
|
||||
data: {
|
||||
oldestAvailableSeq: oldestSeq,
|
||||
if (
|
||||
latestSeq !== null &&
|
||||
latestSeq > 0 &&
|
||||
oldestSeq !== null &&
|
||||
requestedAfterSeq < oldestSeq - 1
|
||||
) {
|
||||
const resolvedRequestId = await resolveReplayGapRequestId(streamId, latestSeq, requestId)
|
||||
logger.warn('Replay gap detected: requested cursor is below oldest available event', {
|
||||
streamId,
|
||||
requestedAfterSeq,
|
||||
},
|
||||
},
|
||||
})
|
||||
oldestAvailableSeq: oldestSeq,
|
||||
latestSeq,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotRecoveryOutcome, CopilotRecoveryOutcome.GapDetected)
|
||||
|
||||
const terminalEnvelope = createEvent({
|
||||
streamId,
|
||||
cursor: String(latestSeq + 2),
|
||||
seq: latestSeq + 2,
|
||||
requestId: '',
|
||||
type: MothershipStreamV1EventType.complete,
|
||||
payload: {
|
||||
status: MothershipStreamV1CompletionStatus.error,
|
||||
reason: 'replay_gap',
|
||||
},
|
||||
})
|
||||
const gapEnvelope = createEvent({
|
||||
streamId,
|
||||
cursor: String(latestSeq + 1),
|
||||
seq: latestSeq + 1,
|
||||
requestId: resolvedRequestId,
|
||||
type: MothershipStreamV1EventType.error,
|
||||
payload: {
|
||||
message: 'Replay history is no longer available. Some events may have been lost.',
|
||||
code: 'replay_gap',
|
||||
data: {
|
||||
oldestAvailableSeq: oldestSeq,
|
||||
requestedAfterSeq,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
return {
|
||||
gapDetected: true,
|
||||
envelopes: [gapEnvelope, terminalEnvelope],
|
||||
const terminalEnvelope = createEvent({
|
||||
streamId,
|
||||
cursor: String(latestSeq + 2),
|
||||
seq: latestSeq + 2,
|
||||
requestId: resolvedRequestId,
|
||||
type: MothershipStreamV1EventType.complete,
|
||||
payload: {
|
||||
status: MothershipStreamV1CompletionStatus.error,
|
||||
reason: 'replay_gap',
|
||||
},
|
||||
})
|
||||
|
||||
return {
|
||||
gapDetected: true,
|
||||
envelopes: [gapEnvelope, terminalEnvelope],
|
||||
}
|
||||
}
|
||||
|
||||
span.setAttribute(TraceAttr.CopilotRecoveryOutcome, CopilotRecoveryOutcome.InRange)
|
||||
return null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function resolveReplayGapRequestId(
|
||||
streamId: string,
|
||||
latestSeq: number,
|
||||
requestId?: string
|
||||
): Promise<string> {
|
||||
if (typeof requestId === 'string' && requestId.length > 0) {
|
||||
return requestId
|
||||
}
|
||||
|
||||
return null
|
||||
try {
|
||||
const latestEvents = await readEvents(streamId, String(Math.max(latestSeq - 1, 0)))
|
||||
const latestRequestId = latestEvents[0]?.trace?.requestId
|
||||
return typeof latestRequestId === 'string' ? latestRequestId : ''
|
||||
} catch (error) {
|
||||
logger.warn('Failed to resolve request ID for replay gap', {
|
||||
streamId,
|
||||
latestSeq,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
})
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,14 +2,17 @@ import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { generateId } from '@sim/utils/id'
|
||||
import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import { SIM_AGENT_API_URL, SIM_AGENT_VERSION } from '@/lib/copilot/constants'
|
||||
import {
|
||||
MothershipStreamV1EventType,
|
||||
MothershipStreamV1SpanPayloadKind,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { createStreamingContext } from '@/lib/copilot/request/context/request-context'
|
||||
import { buildToolCallSummaries } from '@/lib/copilot/request/context/result'
|
||||
import { runStreamLoop } from '@/lib/copilot/request/go/stream'
|
||||
import { withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
import type {
|
||||
ExecutionContext,
|
||||
OrchestratorOptions,
|
||||
@@ -30,6 +33,7 @@ export interface SubagentOrchestratorOptions extends Omit<OrchestratorOptions, '
|
||||
workflowId?: string
|
||||
workspaceId?: string
|
||||
userPermission?: string
|
||||
simRequestId?: string
|
||||
onComplete?: (result: SubagentOrchestratorResult) => void | Promise<void>
|
||||
}
|
||||
|
||||
@@ -51,6 +55,47 @@ export async function orchestrateSubagentStream(
|
||||
agentId: string,
|
||||
requestPayload: Record<string, unknown>,
|
||||
options: SubagentOrchestratorOptions
|
||||
): Promise<SubagentOrchestratorResult> {
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotSubagentExecute,
|
||||
{
|
||||
[TraceAttr.SubagentId]: agentId,
|
||||
// Sim-side entrypoint = MCP / headless subagent call. No parent
|
||||
// agent (the caller is an external client); treat as depth 2 and
|
||||
// mark as NOT nested so it aggregates with Go-side direct-child
|
||||
// subagent spans on dashboards. Grandchildren are stamped
|
||||
// depth=3 + nested=true in
|
||||
// `agents/nested.go:executeNestedAgent`.
|
||||
[TraceAttr.SubagentDepth]: 2,
|
||||
[TraceAttr.SubagentNested]: false,
|
||||
[TraceAttr.SubagentParentAgentId]: 'mcp',
|
||||
[TraceAttr.UserId]: options.userId,
|
||||
...(options.simRequestId ? { [TraceAttr.SimRequestId]: options.simRequestId } : {}),
|
||||
...(options.workflowId ? { [TraceAttr.WorkflowId]: options.workflowId } : {}),
|
||||
...(options.workspaceId ? { [TraceAttr.WorkspaceId]: options.workspaceId } : {}),
|
||||
},
|
||||
async (otelSpan) => {
|
||||
const result = await orchestrateSubagentStreamInner(agentId, requestPayload, options)
|
||||
otelSpan.setAttributes({
|
||||
[TraceAttr.SubagentOutcomeSuccess]: result.success,
|
||||
[TraceAttr.SubagentOutcomeToolCallCount]: result.toolCalls.length,
|
||||
[TraceAttr.SubagentOutcomeContentBytes]: result.content?.length ?? 0,
|
||||
...(result.structuredResult?.type
|
||||
? { [TraceAttr.SubagentOutcomeStructuredType]: result.structuredResult.type }
|
||||
: {}),
|
||||
...(result.error
|
||||
? { [TraceAttr.SubagentOutcomeError]: String(result.error).slice(0, 500) }
|
||||
: {}),
|
||||
})
|
||||
return result
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function orchestrateSubagentStreamInner(
|
||||
agentId: string,
|
||||
requestPayload: Record<string, unknown>,
|
||||
options: SubagentOrchestratorOptions
|
||||
): Promise<SubagentOrchestratorResult> {
|
||||
const { userId, workflowId, workspaceId, userPermission } = options
|
||||
const chatId =
|
||||
@@ -87,6 +132,7 @@ export async function orchestrateSubagentStream(
|
||||
const msgId = requestPayload?.messageId
|
||||
const context = createStreamingContext({
|
||||
chatId,
|
||||
requestId: options.simRequestId,
|
||||
messageId: typeof msgId === 'string' ? msgId : generateId(),
|
||||
})
|
||||
|
||||
@@ -100,6 +146,7 @@ export async function orchestrateSubagentStream(
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
|
||||
'X-Client-Version': SIM_AGENT_VERSION,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
...requestPayload,
|
||||
@@ -149,7 +196,10 @@ export async function orchestrateSubagentStream(
|
||||
return result
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error('Subagent orchestration failed')
|
||||
logger.error('Subagent orchestration failed', { error: err.message, agentId })
|
||||
logger.error('Subagent orchestration failed', {
|
||||
error: err.message,
|
||||
agentId,
|
||||
})
|
||||
await options.onError?.(err)
|
||||
return {
|
||||
success: false,
|
||||
|
||||
@@ -18,7 +18,9 @@ import {
|
||||
MothershipStreamV1ToolPhase,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import { CreateWorkflow } from '@/lib/copilot/generated/tool-catalog-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { publishToolConfirmation } from '@/lib/copilot/persistence/tool-confirm'
|
||||
import { withCopilotToolSpan } from '@/lib/copilot/request/otel'
|
||||
import { markToolResultSeen } from '@/lib/copilot/request/sse-utils'
|
||||
import {
|
||||
getToolCallStateOutput,
|
||||
@@ -54,6 +56,81 @@ function hasOutputValue(result: { output?: unknown } | undefined): result is { o
|
||||
return result !== undefined && Object.hasOwn(result, 'output')
|
||||
}
|
||||
|
||||
interface ToolResultSpanSummary {
|
||||
resultSuccess: boolean
|
||||
outputBytes: number
|
||||
outputKind: string
|
||||
errorMessage?: string
|
||||
imageCount?: number
|
||||
imageBytes?: number
|
||||
attachmentMediaType?: string
|
||||
}
|
||||
|
||||
function summarizeToolResultForSpan(result: {
|
||||
success: boolean
|
||||
output?: unknown
|
||||
error?: string
|
||||
}): ToolResultSpanSummary {
|
||||
const summary: ToolResultSpanSummary = {
|
||||
resultSuccess: Boolean(result.success),
|
||||
outputBytes: 0,
|
||||
outputKind: 'none',
|
||||
}
|
||||
if (!result.success && result.error) {
|
||||
summary.errorMessage = String(result.error).slice(0, 500)
|
||||
}
|
||||
if (!hasOutputValue(result)) {
|
||||
return summary
|
||||
}
|
||||
const output = (result as { output: unknown }).output
|
||||
if (typeof output === 'string') {
|
||||
summary.outputKind = 'string'
|
||||
summary.outputBytes = output.length
|
||||
} else if (output && typeof output === 'object') {
|
||||
summary.outputKind = Array.isArray(output) ? 'array' : 'object'
|
||||
try {
|
||||
summary.outputBytes = JSON.stringify(output).length
|
||||
} catch {
|
||||
summary.outputBytes = 0
|
||||
}
|
||||
const attachment = extractAttachmentShape(output)
|
||||
if (attachment) {
|
||||
summary.imageCount = attachment.imageCount
|
||||
summary.imageBytes = attachment.imageBytes
|
||||
if (attachment.mediaType) {
|
||||
summary.attachmentMediaType = attachment.mediaType
|
||||
}
|
||||
}
|
||||
} else if (output !== undefined && output !== null) {
|
||||
summary.outputKind = typeof output
|
||||
summary.outputBytes = String(output).length
|
||||
}
|
||||
return summary
|
||||
}
|
||||
|
||||
function extractAttachmentShape(
|
||||
output: unknown
|
||||
): { imageCount: number; imageBytes: number; mediaType?: string } | null {
|
||||
if (!isRecord(output)) return null
|
||||
const candidate = (output as Record<string, unknown>).attachment
|
||||
if (!isRecord(candidate)) return null
|
||||
const source = (candidate as Record<string, unknown>).source
|
||||
if (!isRecord(source)) return null
|
||||
const type =
|
||||
typeof (candidate as Record<string, unknown>).type === 'string'
|
||||
? ((candidate as Record<string, unknown>).type as string)
|
||||
: ''
|
||||
if (type !== 'image') return null
|
||||
const mediaType =
|
||||
typeof source.media_type === 'string' ? (source.media_type as string) : undefined
|
||||
const data = typeof source.data === 'string' ? (source.data as string) : ''
|
||||
return {
|
||||
imageCount: 1,
|
||||
imageBytes: data.length,
|
||||
mediaType,
|
||||
}
|
||||
}
|
||||
|
||||
function buildCompletionSignal(input: {
|
||||
status: AsyncCompletionSignal['status']
|
||||
message?: string
|
||||
@@ -164,6 +241,44 @@ export async function executeToolAndReport(
|
||||
message: 'Tool call not found',
|
||||
})
|
||||
|
||||
const argsPayload = toolCall.params
|
||||
? (() => {
|
||||
try {
|
||||
return JSON.stringify(toolCall.params)
|
||||
} catch {
|
||||
return undefined
|
||||
}
|
||||
})()
|
||||
: undefined
|
||||
return withCopilotToolSpan(
|
||||
{
|
||||
toolName: toolCall.name,
|
||||
toolCallId: toolCall.id,
|
||||
runId: context.runId,
|
||||
chatId: execContext.chatId,
|
||||
argsBytes: argsPayload?.length,
|
||||
argsPreview: argsPayload?.slice(0, 200),
|
||||
},
|
||||
async (otelSpan) => {
|
||||
const completion = await executeToolAndReportInner(toolCall, context, execContext, options)
|
||||
otelSpan.setAttribute(TraceAttr.ToolOutcome, completion.status)
|
||||
if (completion.message) {
|
||||
otelSpan.setAttribute(
|
||||
TraceAttr.ToolOutcomeMessage,
|
||||
String(completion.message).slice(0, 500)
|
||||
)
|
||||
}
|
||||
return completion
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function executeToolAndReportInner(
|
||||
toolCall: ToolCallState,
|
||||
context: StreamingContext,
|
||||
execContext: ExecutionContext,
|
||||
options?: OrchestratorOptions
|
||||
): Promise<AsyncToolCompletion> {
|
||||
if (toolCall.status === 'executing') {
|
||||
return buildCompletionSignal({
|
||||
status: MothershipStreamV1AsyncToolRecordStatus.running,
|
||||
@@ -377,6 +492,11 @@ export async function executeToolAndReport(
|
||||
endToolSpan('cancelled', { cancelReason: 'abort_during_post_processing_csv' })
|
||||
return cancelledCompletion('Request aborted during tool post-processing')
|
||||
}
|
||||
toolSpan.attributes = {
|
||||
...toolSpan.attributes,
|
||||
...summarizeToolResultForSpan(result),
|
||||
}
|
||||
|
||||
setTerminalToolCallState(toolCall, {
|
||||
status: result.success
|
||||
? MothershipStreamV1ToolOutcome.success
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { FunctionExecute, UserTable } from '@/lib/copilot/generated/tool-catalog-v1'
|
||||
import { CopilotOutputFileOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/request/types'
|
||||
import { uploadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
|
||||
|
||||
@@ -162,55 +167,80 @@ export async function maybeWriteOutputToFile(
|
||||
const explicitFormat =
|
||||
(params?.outputFormat as string | undefined) ?? (args?.outputFormat as string | undefined)
|
||||
|
||||
try {
|
||||
const fileName = normalizeOutputWorkspaceFileName(outputPath)
|
||||
const format = resolveOutputFormat(fileName, explicitFormat)
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
const content = serializeOutputForFile(result.output, format)
|
||||
const contentType = FORMAT_TO_CONTENT_TYPE[format]
|
||||
// Only span the actual write path (where we upload to storage). Fast
|
||||
// no-op returns above don't need a span — they'd just pad the trace
|
||||
// with empty work.
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotToolsWriteOutputFile,
|
||||
{
|
||||
[TraceAttr.ToolName]: toolName,
|
||||
[TraceAttr.WorkspaceId]: context.workspaceId,
|
||||
},
|
||||
async (span) => {
|
||||
try {
|
||||
const fileName = normalizeOutputWorkspaceFileName(outputPath)
|
||||
const format = resolveOutputFormat(fileName, explicitFormat)
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotOutputFileName]: fileName,
|
||||
[TraceAttr.CopilotOutputFileFormat]: format,
|
||||
})
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
const content = serializeOutputForFile(result.output, format)
|
||||
const contentType = FORMAT_TO_CONTENT_TYPE[format]
|
||||
|
||||
const buffer = Buffer.from(content, 'utf-8')
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
const uploaded = await uploadWorkspaceFile(
|
||||
context.workspaceId,
|
||||
context.userId,
|
||||
buffer,
|
||||
fileName,
|
||||
contentType
|
||||
)
|
||||
const buffer = Buffer.from(content, 'utf-8')
|
||||
span.setAttribute(TraceAttr.CopilotOutputFileBytes, buffer.length)
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
const uploaded = await uploadWorkspaceFile(
|
||||
context.workspaceId!,
|
||||
context.userId!,
|
||||
buffer,
|
||||
fileName,
|
||||
contentType
|
||||
)
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotOutputFileId]: uploaded.id,
|
||||
[TraceAttr.CopilotOutputFileOutcome]: CopilotOutputFileOutcome.Uploaded,
|
||||
})
|
||||
|
||||
logger.info('Tool output written to file', {
|
||||
toolName,
|
||||
fileName,
|
||||
size: buffer.length,
|
||||
fileId: uploaded.id,
|
||||
})
|
||||
logger.info('Tool output written to file', {
|
||||
toolName,
|
||||
fileName,
|
||||
size: buffer.length,
|
||||
fileId: uploaded.id,
|
||||
})
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
message: `Output written to files/${fileName} (${buffer.length} bytes)`,
|
||||
fileId: uploaded.id,
|
||||
fileName,
|
||||
size: buffer.length,
|
||||
downloadUrl: uploaded.url,
|
||||
},
|
||||
resources: [{ type: 'file', id: uploaded.id, title: fileName }],
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
message: `Output written to files/${fileName} (${buffer.length} bytes)`,
|
||||
fileId: uploaded.id,
|
||||
fileName,
|
||||
size: buffer.length,
|
||||
downloadUrl: uploaded.url,
|
||||
},
|
||||
resources: [{ type: 'file', id: uploaded.id, title: fileName }],
|
||||
}
|
||||
} catch (err) {
|
||||
const message = toError(err).message
|
||||
logger.warn('Failed to write tool output to file', {
|
||||
toolName,
|
||||
outputPath,
|
||||
error: message,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotOutputFileOutcome, CopilotOutputFileOutcome.Failed)
|
||||
span.addEvent(TraceEvent.CopilotOutputFileError, {
|
||||
[TraceAttr.ErrorMessage]: message.slice(0, 500),
|
||||
})
|
||||
return {
|
||||
success: false,
|
||||
error: `Failed to write output file: ${message}`,
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
const message = toError(err).message
|
||||
logger.warn('Failed to write tool output to file', {
|
||||
toolName,
|
||||
outputPath,
|
||||
error: message,
|
||||
})
|
||||
return {
|
||||
success: false,
|
||||
error: `Failed to write output file: ${message}`,
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,9 @@ import {
|
||||
MothershipStreamV1EventType,
|
||||
MothershipStreamV1ResourceOp,
|
||||
} from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
import type { StreamEvent, ToolCallResult } from '@/lib/copilot/request/types'
|
||||
import {
|
||||
extractDeletedResourcesFromToolResult,
|
||||
@@ -30,63 +33,102 @@ export async function handleResourceSideEffects(
|
||||
onEvent: ((event: StreamEvent) => void | Promise<void>) | undefined,
|
||||
isAborted: () => boolean
|
||||
): Promise<void> {
|
||||
let isDeleteOp = false
|
||||
|
||||
if (hasDeleteCapability(toolName)) {
|
||||
const deleted = extractDeletedResourcesFromToolResult(toolName, params, result.output)
|
||||
if (deleted.length > 0) {
|
||||
isDeleteOp = true
|
||||
removeChatResources(chatId, deleted).catch((err) => {
|
||||
logger.warn('Failed to remove chat resources after deletion', {
|
||||
chatId,
|
||||
error: toError(err).message,
|
||||
})
|
||||
})
|
||||
|
||||
for (const resource of deleted) {
|
||||
if (isAborted()) break
|
||||
await onEvent?.({
|
||||
type: MothershipStreamV1EventType.resource,
|
||||
payload: {
|
||||
op: MothershipStreamV1ResourceOp.remove,
|
||||
resource: { type: resource.type, id: resource.id, title: resource.title },
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
// Cheap early exit so we don't emit a span for tools that can never
|
||||
// produce resources (most of them). The span only shows up for tools
|
||||
// that might actually do resource work.
|
||||
if (
|
||||
!hasDeleteCapability(toolName) &&
|
||||
!isResourceToolName(toolName) &&
|
||||
!(result.resources && result.resources.length > 0)
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!isDeleteOp && !isAborted()) {
|
||||
const resources =
|
||||
result.resources && result.resources.length > 0
|
||||
? result.resources
|
||||
: isResourceToolName(toolName)
|
||||
? extractResourcesFromToolResult(toolName, params, result.output)
|
||||
: []
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotToolsHandleResourceSideEffects,
|
||||
{
|
||||
[TraceAttr.ToolName]: toolName,
|
||||
[TraceAttr.ChatId]: chatId,
|
||||
},
|
||||
async (span) => {
|
||||
let isDeleteOp = false
|
||||
let removedCount = 0
|
||||
let upsertedCount = 0
|
||||
|
||||
if (resources.length > 0) {
|
||||
logger.info('[file-stream-server] Emitting resource upsert events', {
|
||||
toolName,
|
||||
chatId,
|
||||
resources: resources.map((r) => ({ type: r.type, id: r.id, title: r.title })),
|
||||
})
|
||||
persistChatResources(chatId, resources).catch((err) => {
|
||||
logger.warn('Failed to persist chat resources', {
|
||||
chatId,
|
||||
error: toError(err).message,
|
||||
})
|
||||
})
|
||||
if (hasDeleteCapability(toolName)) {
|
||||
const deleted = extractDeletedResourcesFromToolResult(toolName, params, result.output)
|
||||
if (deleted.length > 0) {
|
||||
isDeleteOp = true
|
||||
removedCount = deleted.length
|
||||
// Detached from the span lifecycle — the span ends before the
|
||||
// DB call completes. That is intentional; we want the span to
|
||||
// reflect the synchronous decision + event emission, not the
|
||||
// best-effort persistence.
|
||||
removeChatResources(chatId, deleted).catch((err) => {
|
||||
logger.warn('Failed to remove chat resources after deletion', {
|
||||
chatId,
|
||||
error: toError(err).message,
|
||||
})
|
||||
})
|
||||
|
||||
for (const resource of resources) {
|
||||
if (isAborted()) break
|
||||
await onEvent?.({
|
||||
type: MothershipStreamV1EventType.resource,
|
||||
payload: {
|
||||
op: MothershipStreamV1ResourceOp.upsert,
|
||||
resource: { type: resource.type, id: resource.id, title: resource.title },
|
||||
},
|
||||
})
|
||||
for (const resource of deleted) {
|
||||
if (isAborted()) break
|
||||
await onEvent?.({
|
||||
type: MothershipStreamV1EventType.resource,
|
||||
payload: {
|
||||
op: MothershipStreamV1ResourceOp.remove,
|
||||
resource: { type: resource.type, id: resource.id, title: resource.title },
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isDeleteOp && !isAborted()) {
|
||||
const resources =
|
||||
result.resources && result.resources.length > 0
|
||||
? result.resources
|
||||
: isResourceToolName(toolName)
|
||||
? extractResourcesFromToolResult(toolName, params, result.output)
|
||||
: []
|
||||
|
||||
if (resources.length > 0) {
|
||||
upsertedCount = resources.length
|
||||
logger.info('[file-stream-server] Emitting resource upsert events', {
|
||||
toolName,
|
||||
chatId,
|
||||
resources: resources.map((r) => ({ type: r.type, id: r.id, title: r.title })),
|
||||
})
|
||||
persistChatResources(chatId, resources).catch((err) => {
|
||||
logger.warn('Failed to persist chat resources', {
|
||||
chatId,
|
||||
error: toError(err).message,
|
||||
})
|
||||
})
|
||||
|
||||
for (const resource of resources) {
|
||||
if (isAborted()) break
|
||||
await onEvent?.({
|
||||
type: MothershipStreamV1EventType.resource,
|
||||
payload: {
|
||||
op: MothershipStreamV1ResourceOp.upsert,
|
||||
resource: { type: resource.type, id: resource.id, title: resource.title },
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotResourcesOp]: isDeleteOp
|
||||
? 'delete'
|
||||
: upsertedCount > 0
|
||||
? 'upsert'
|
||||
: 'none',
|
||||
[TraceAttr.CopilotResourcesRemovedCount]: removedCount,
|
||||
[TraceAttr.CopilotResourcesUpsertedCount]: upsertedCount,
|
||||
[TraceAttr.CopilotResourcesAborted]: isAborted(),
|
||||
})
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -5,6 +5,11 @@ import { toError } from '@sim/utils/errors'
|
||||
import { parse as csvParse } from 'csv-parse/sync'
|
||||
import { eq } from 'drizzle-orm'
|
||||
import { FunctionExecute, Read as ReadTool } from '@/lib/copilot/generated/tool-catalog-v1'
|
||||
import { CopilotTableOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { withCopilotSpan } from '@/lib/copilot/request/otel'
|
||||
import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/request/types'
|
||||
import { getTableById } from '@/lib/table/service'
|
||||
|
||||
@@ -26,105 +31,126 @@ export async function maybeWriteOutputToTable(
|
||||
const outputTable = params?.outputTable as string | undefined
|
||||
if (!outputTable) return result
|
||||
|
||||
try {
|
||||
const table = await getTableById(outputTable)
|
||||
if (!table) {
|
||||
return {
|
||||
success: false,
|
||||
error: `Table "${outputTable}" not found`,
|
||||
}
|
||||
}
|
||||
|
||||
const rawOutput = result.output
|
||||
let rows: Array<Record<string, unknown>>
|
||||
|
||||
if (rawOutput && typeof rawOutput === 'object' && 'result' in rawOutput) {
|
||||
const inner = (rawOutput as Record<string, unknown>).result
|
||||
if (Array.isArray(inner)) {
|
||||
rows = inner
|
||||
} else {
|
||||
return {
|
||||
success: false,
|
||||
error: 'outputTable requires the code to return an array of objects',
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotToolsWriteOutputTable,
|
||||
{
|
||||
[TraceAttr.ToolName]: toolName,
|
||||
[TraceAttr.CopilotTableId]: outputTable,
|
||||
[TraceAttr.WorkspaceId]: context.workspaceId,
|
||||
},
|
||||
async (span) => {
|
||||
try {
|
||||
const table = await getTableById(outputTable)
|
||||
if (!table) {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.TableNotFound)
|
||||
return {
|
||||
success: false,
|
||||
error: `Table "${outputTable}" not found`,
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (Array.isArray(rawOutput)) {
|
||||
rows = rawOutput
|
||||
} else {
|
||||
return {
|
||||
success: false,
|
||||
error: 'outputTable requires the code to return an array of objects',
|
||||
}
|
||||
}
|
||||
|
||||
if (rows.length > MAX_OUTPUT_TABLE_ROWS) {
|
||||
return {
|
||||
success: false,
|
||||
error: `outputTable row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`,
|
||||
}
|
||||
}
|
||||
const rawOutput = result.output
|
||||
let rows: Array<Record<string, unknown>>
|
||||
|
||||
if (rows.length === 0) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'outputTable requires at least one row — code returned an empty array',
|
||||
}
|
||||
}
|
||||
if (rawOutput && typeof rawOutput === 'object' && 'result' in rawOutput) {
|
||||
const inner = (rawOutput as Record<string, unknown>).result
|
||||
if (Array.isArray(inner)) {
|
||||
rows = inner
|
||||
} else {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidShape)
|
||||
return {
|
||||
success: false,
|
||||
error: 'outputTable requires the code to return an array of objects',
|
||||
}
|
||||
}
|
||||
} else if (Array.isArray(rawOutput)) {
|
||||
rows = rawOutput
|
||||
} else {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidShape)
|
||||
return {
|
||||
success: false,
|
||||
error: 'outputTable requires the code to return an array of objects',
|
||||
}
|
||||
}
|
||||
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
await db.transaction(async (tx) => {
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable))
|
||||
span.setAttribute(TraceAttr.CopilotTableRowCount, rows.length)
|
||||
|
||||
if (rows.length > MAX_OUTPUT_TABLE_ROWS) {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.RowLimitExceeded)
|
||||
return {
|
||||
success: false,
|
||||
error: `outputTable row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`,
|
||||
}
|
||||
}
|
||||
|
||||
if (rows.length === 0) {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyRows)
|
||||
return {
|
||||
success: false,
|
||||
error: 'outputTable requires at least one row — code returned an empty array',
|
||||
}
|
||||
}
|
||||
|
||||
const now = new Date()
|
||||
for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) {
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE)
|
||||
const values = chunk.map((rowData, j) => ({
|
||||
id: `row_${crypto.randomUUID().replace(/-/g, '')}`,
|
||||
await db.transaction(async (tx) => {
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable))
|
||||
|
||||
const now = new Date()
|
||||
for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) {
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE)
|
||||
const values = chunk.map((rowData, j) => ({
|
||||
id: `row_${crypto.randomUUID().replace(/-/g, '')}`,
|
||||
tableId: outputTable,
|
||||
workspaceId: context.workspaceId!,
|
||||
data: rowData,
|
||||
position: i + j,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
createdBy: context.userId,
|
||||
}))
|
||||
await tx.insert(userTableRows).values(values)
|
||||
}
|
||||
})
|
||||
|
||||
logger.info('Tool output written to table', {
|
||||
toolName,
|
||||
tableId: outputTable,
|
||||
workspaceId: context.workspaceId!,
|
||||
data: rowData,
|
||||
position: i + j,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
createdBy: context.userId,
|
||||
}))
|
||||
await tx.insert(userTableRows).values(values)
|
||||
rowCount: rows.length,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Wrote)
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
message: `Wrote ${rows.length} rows to table ${outputTable}`,
|
||||
tableId: outputTable,
|
||||
rowCount: rows.length,
|
||||
},
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('Failed to write tool output to table', {
|
||||
toolName,
|
||||
outputTable,
|
||||
error: toError(err).message,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Failed)
|
||||
span.addEvent(TraceEvent.CopilotTableError, {
|
||||
[TraceAttr.ErrorMessage]: toError(err).message.slice(0, 500),
|
||||
})
|
||||
return {
|
||||
success: false,
|
||||
error: `Failed to write to table: ${toError(err).message}`,
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
logger.info('Tool output written to table', {
|
||||
toolName,
|
||||
tableId: outputTable,
|
||||
rowCount: rows.length,
|
||||
})
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
message: `Wrote ${rows.length} rows to table ${outputTable}`,
|
||||
tableId: outputTable,
|
||||
rowCount: rows.length,
|
||||
},
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('Failed to write tool output to table', {
|
||||
toolName,
|
||||
outputTable,
|
||||
error: toError(err).message,
|
||||
})
|
||||
return {
|
||||
success: false,
|
||||
error: `Failed to write to table: ${toError(err).message}`,
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export async function maybeWriteReadCsvToTable(
|
||||
@@ -140,110 +166,136 @@ export async function maybeWriteReadCsvToTable(
|
||||
const outputTable = params?.outputTable as string | undefined
|
||||
if (!outputTable) return result
|
||||
|
||||
try {
|
||||
const table = await getTableById(outputTable)
|
||||
if (!table) {
|
||||
return { success: false, error: `Table "${outputTable}" not found` }
|
||||
}
|
||||
|
||||
const output = result.output as Record<string, unknown>
|
||||
const content = (output.content as string) || ''
|
||||
if (!content.trim()) {
|
||||
return { success: false, error: 'File has no content to import into table' }
|
||||
}
|
||||
|
||||
const filePath = (params?.path as string) || ''
|
||||
const ext = filePath.split('.').pop()?.toLowerCase()
|
||||
|
||||
let rows: Record<string, unknown>[]
|
||||
|
||||
if (ext === 'json') {
|
||||
const parsed = JSON.parse(content)
|
||||
if (!Array.isArray(parsed)) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'JSON file must contain an array of objects for table import',
|
||||
return withCopilotSpan(
|
||||
TraceSpan.CopilotToolsWriteCsvToTable,
|
||||
{
|
||||
[TraceAttr.ToolName]: toolName,
|
||||
[TraceAttr.CopilotTableId]: outputTable,
|
||||
[TraceAttr.WorkspaceId]: context.workspaceId,
|
||||
},
|
||||
async (span) => {
|
||||
try {
|
||||
const table = await getTableById(outputTable)
|
||||
if (!table) {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.TableNotFound)
|
||||
return { success: false, error: `Table "${outputTable}" not found` }
|
||||
}
|
||||
}
|
||||
rows = parsed
|
||||
} else {
|
||||
rows = csvParse(content, {
|
||||
columns: true,
|
||||
skip_empty_lines: true,
|
||||
trim: true,
|
||||
relax_column_count: true,
|
||||
relax_quotes: true,
|
||||
skip_records_with_error: true,
|
||||
cast: false,
|
||||
}) as Record<string, unknown>[]
|
||||
}
|
||||
|
||||
if (rows.length === 0) {
|
||||
return { success: false, error: 'File has no data rows to import' }
|
||||
}
|
||||
const output = result.output as Record<string, unknown>
|
||||
const content = (output.content as string) || ''
|
||||
if (!content.trim()) {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyContent)
|
||||
return { success: false, error: 'File has no content to import into table' }
|
||||
}
|
||||
|
||||
if (rows.length > MAX_OUTPUT_TABLE_ROWS) {
|
||||
return {
|
||||
success: false,
|
||||
error: `Row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`,
|
||||
}
|
||||
}
|
||||
const filePath = (params?.path as string) || ''
|
||||
const ext = filePath.split('.').pop()?.toLowerCase()
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotTableSourcePath]: filePath,
|
||||
[TraceAttr.CopilotTableSourceFormat]: ext === 'json' ? 'json' : 'csv',
|
||||
[TraceAttr.CopilotTableSourceContentBytes]: content.length,
|
||||
})
|
||||
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
await db.transaction(async (tx) => {
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable))
|
||||
let rows: Record<string, unknown>[]
|
||||
|
||||
if (ext === 'json') {
|
||||
const parsed = JSON.parse(content)
|
||||
if (!Array.isArray(parsed)) {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidJsonShape)
|
||||
return {
|
||||
success: false,
|
||||
error: 'JSON file must contain an array of objects for table import',
|
||||
}
|
||||
}
|
||||
rows = parsed
|
||||
} else {
|
||||
rows = csvParse(content, {
|
||||
columns: true,
|
||||
skip_empty_lines: true,
|
||||
trim: true,
|
||||
relax_column_count: true,
|
||||
relax_quotes: true,
|
||||
skip_records_with_error: true,
|
||||
cast: false,
|
||||
}) as Record<string, unknown>[]
|
||||
}
|
||||
|
||||
span.setAttribute(TraceAttr.CopilotTableRowCount, rows.length)
|
||||
|
||||
if (rows.length === 0) {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyRows)
|
||||
return { success: false, error: 'File has no data rows to import' }
|
||||
}
|
||||
|
||||
if (rows.length > MAX_OUTPUT_TABLE_ROWS) {
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.RowLimitExceeded)
|
||||
return {
|
||||
success: false,
|
||||
error: `Row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`,
|
||||
}
|
||||
}
|
||||
|
||||
const now = new Date()
|
||||
for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) {
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE)
|
||||
const values = chunk.map((rowData, j) => ({
|
||||
id: `row_${crypto.randomUUID().replace(/-/g, '')}`,
|
||||
await db.transaction(async (tx) => {
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable))
|
||||
|
||||
const now = new Date()
|
||||
for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) {
|
||||
if (context.abortSignal?.aborted) {
|
||||
throw new Error('Request aborted before tool mutation could be applied')
|
||||
}
|
||||
const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE)
|
||||
const values = chunk.map((rowData, j) => ({
|
||||
id: `row_${crypto.randomUUID().replace(/-/g, '')}`,
|
||||
tableId: outputTable,
|
||||
workspaceId: context.workspaceId!,
|
||||
data: rowData,
|
||||
position: i + j,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
createdBy: context.userId,
|
||||
}))
|
||||
await tx.insert(userTableRows).values(values)
|
||||
}
|
||||
})
|
||||
|
||||
logger.info('Read output written to table', {
|
||||
toolName,
|
||||
tableId: outputTable,
|
||||
workspaceId: context.workspaceId!,
|
||||
data: rowData,
|
||||
position: i + j,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
createdBy: context.userId,
|
||||
}))
|
||||
await tx.insert(userTableRows).values(values)
|
||||
tableName: table.name,
|
||||
rowCount: rows.length,
|
||||
filePath,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Imported)
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
message: `Imported ${rows.length} rows from "${filePath}" into table "${table.name}"`,
|
||||
tableId: outputTable,
|
||||
tableName: table.name,
|
||||
rowCount: rows.length,
|
||||
},
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('Failed to write read output to table', {
|
||||
toolName,
|
||||
outputTable,
|
||||
error: toError(err).message,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Failed)
|
||||
span.addEvent(TraceEvent.CopilotTableError, {
|
||||
[TraceAttr.ErrorMessage]: toError(err).message.slice(0, 500),
|
||||
})
|
||||
return {
|
||||
success: false,
|
||||
error: `Failed to import into table: ${toError(err).message}`,
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
logger.info('Read output written to table', {
|
||||
toolName,
|
||||
tableId: outputTable,
|
||||
tableName: table.name,
|
||||
rowCount: rows.length,
|
||||
filePath,
|
||||
})
|
||||
|
||||
return {
|
||||
success: true,
|
||||
output: {
|
||||
message: `Imported ${rows.length} rows from "${filePath}" into table "${table.name}"`,
|
||||
tableId: outputTable,
|
||||
tableName: table.name,
|
||||
rowCount: rows.length,
|
||||
},
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('Failed to write read output to table', {
|
||||
toolName,
|
||||
outputTable,
|
||||
error: toError(err).message,
|
||||
})
|
||||
return {
|
||||
success: false,
|
||||
error: `Failed to import into table: ${toError(err).message}`,
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import type { Context } from '@opentelemetry/api'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
|
||||
import {
|
||||
@@ -9,6 +10,7 @@ import {
|
||||
RequestTraceV1SpanStatus,
|
||||
type RequestTraceV1UsageSummary,
|
||||
} from '@/lib/copilot/generated/request-trace-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { env } from '@/lib/core/config/env'
|
||||
|
||||
const logger = createLogger('RequestTrace')
|
||||
@@ -71,6 +73,13 @@ export class TraceCollector {
|
||||
chatId?: string
|
||||
runId?: string
|
||||
executionId?: string
|
||||
// Original user prompt, surfaced on the `request_traces.message`
|
||||
// column at row-insert time so it's queryable from the DB without
|
||||
// going through Tempo. Sim already has this at chat-POST time; it's
|
||||
// threaded through here to the trace report so the row is complete
|
||||
// the moment it's first written instead of waiting on the late
|
||||
// analytics UPDATE.
|
||||
userMessage?: string
|
||||
usage?: { prompt: number; completion: number }
|
||||
cost?: { input: number; output: number; total: number }
|
||||
}): RequestTraceV1SimReport {
|
||||
@@ -96,6 +105,7 @@ export class TraceCollector {
|
||||
chatId: params.chatId,
|
||||
runId: params.runId,
|
||||
executionId: params.executionId,
|
||||
...(params.userMessage ? { userMessage: params.userMessage } : {}),
|
||||
startMs: this.startMs,
|
||||
endMs,
|
||||
durationMs: endMs - this.startMs,
|
||||
@@ -107,14 +117,27 @@ export class TraceCollector {
|
||||
}
|
||||
}
|
||||
|
||||
export async function reportTrace(trace: RequestTraceV1SimReport): Promise<void> {
|
||||
const response = await fetch(`${SIM_AGENT_API_URL}/api/traces`, {
|
||||
export async function reportTrace(
|
||||
trace: RequestTraceV1SimReport,
|
||||
otelContext?: Context
|
||||
): Promise<void> {
|
||||
const { fetchGo } = await import('@/lib/copilot/request/go/fetch')
|
||||
const body = JSON.stringify(trace)
|
||||
const response = await fetchGo(`${SIM_AGENT_API_URL}/api/traces`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}),
|
||||
},
|
||||
body: JSON.stringify(trace),
|
||||
body,
|
||||
otelContext,
|
||||
spanName: 'sim → go /api/traces',
|
||||
operation: 'report_trace',
|
||||
attributes: {
|
||||
[TraceAttr.RequestId]: trace.simRequestId ?? '',
|
||||
[TraceAttr.HttpRequestContentLength]: body.length,
|
||||
[TraceAttr.CopilotTraceSpanCount]: trace.spans?.length ?? 0,
|
||||
},
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { AsyncCompletionSignal } from '@/lib/copilot/async-runs/lifecycle'
|
||||
import { MothershipStreamV1ToolOutcome } from '@/lib/copilot/generated/mothership-stream-v1'
|
||||
import type { RequestTraceV1Span } from '@/lib/copilot/generated/request-trace-v1'
|
||||
import type { StreamEvent } from '@/lib/copilot/request/session'
|
||||
import type { TraceCollector } from '@/lib/copilot/request/trace'
|
||||
import type { ToolExecutionContext, ToolExecutionResult } from '@/lib/copilot/tool-executor/types'
|
||||
@@ -99,6 +100,7 @@ export interface StreamingContext {
|
||||
edit?: Record<string, unknown>
|
||||
} | null
|
||||
trace: TraceCollector
|
||||
subAgentTraceSpans?: Map<string, RequestTraceV1Span>
|
||||
}
|
||||
|
||||
export interface FileAttachment {
|
||||
@@ -138,6 +140,19 @@ export interface OrchestratorOptions {
|
||||
|
||||
export interface OrchestratorResult {
|
||||
success: boolean
|
||||
/**
|
||||
* True iff the non-success outcome was a user-initiated cancel
|
||||
* (abort signal fired or client disconnected). Lets callers treat
|
||||
* cancels differently from actual errors — notably, `buildOnComplete`
|
||||
* must NOT finalize the chat row on cancel, because the browser's
|
||||
* `/api/copilot/chat/stop` POST owns writing the partial assistant
|
||||
* content and clearing `conversationId` in one UPDATE. Finalizing
|
||||
* here would race and clear `conversationId` first, making the stop
|
||||
* UPDATE match zero rows and the partial content vanish on refetch.
|
||||
*
|
||||
* Always false when `success=true`.
|
||||
*/
|
||||
cancelled?: boolean
|
||||
content: string
|
||||
contentBlocks: ContentBlock[]
|
||||
toolCalls: ToolCallSummary[]
|
||||
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
RunFromBlock,
|
||||
RunWorkflowUntilBlock,
|
||||
} from '@/lib/copilot/generated/tool-catalog-v1'
|
||||
import { traceparentHeader } from '@/lib/copilot/tools/client/trace-context'
|
||||
import { executeWorkflowWithFullLogging } from '@/app/workspace/[workspaceId]/w/[workflowId]/utils/workflow-execution-utils'
|
||||
import { useExecutionStore } from '@/stores/execution/store'
|
||||
import {
|
||||
@@ -466,7 +467,7 @@ async function reportCompletion(
|
||||
})
|
||||
const res = await fetch(COPILOT_CONFIRM_API_PATH, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: { 'Content-Type': 'application/json', ...traceparentHeader() },
|
||||
body,
|
||||
})
|
||||
const LARGE_PAYLOAD_THRESHOLD = 10 * 1024 * 1024
|
||||
@@ -480,7 +481,7 @@ async function reportCompletion(
|
||||
})
|
||||
const retryRes = await fetch(COPILOT_CONFIRM_API_PATH, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers: { 'Content-Type': 'application/json', ...traceparentHeader() },
|
||||
body: JSON.stringify({
|
||||
toolCallId,
|
||||
status,
|
||||
|
||||
17
apps/sim/lib/copilot/tools/client/trace-context.ts
Normal file
17
apps/sim/lib/copilot/tools/client/trace-context.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
// Browser-side W3C traceparent holder for the active copilot chat.
|
||||
// Module-level singleton because client tool callbacks fire from deep
|
||||
// inside runtime code that can't thread a React ref. The browser only
|
||||
// has one active chat at a time (gated by the stop-barrier), so a
|
||||
// singleton is safe.
|
||||
|
||||
let currentTraceparent: string | undefined
|
||||
|
||||
export function setCurrentChatTraceparent(value: string | undefined): void {
|
||||
currentTraceparent = value
|
||||
}
|
||||
|
||||
// `fetch` header spread: `headers: { ...traceparentHeader(), ... }`.
|
||||
export function traceparentHeader(): Record<string, string> {
|
||||
const tp = currentTraceparent
|
||||
return tp ? { traceparent: tp } : {}
|
||||
}
|
||||
@@ -54,13 +54,14 @@ describe('vfs handlers oversize policy', () => {
|
||||
expect(result.error).toContain('context window')
|
||||
})
|
||||
|
||||
it('fails oversized read results with grep guidance', async () => {
|
||||
it('fails oversized read results from VFS with grep guidance', async () => {
|
||||
const vfs = makeVfs()
|
||||
vfs.readFileContent.mockResolvedValue(null)
|
||||
vfs.read.mockReturnValue({ content: OVERSIZED_INLINE_CONTENT, totalLines: 1 })
|
||||
getOrMaterializeVFS.mockResolvedValue(vfs)
|
||||
|
||||
const result = await executeVfsRead(
|
||||
{ path: 'files/big.txt' },
|
||||
{ path: 'workflows/My Workflow/state.json' },
|
||||
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' }
|
||||
)
|
||||
|
||||
@@ -70,9 +71,8 @@ describe('vfs handlers oversize policy', () => {
|
||||
expect(result.error).toContain('context window')
|
||||
})
|
||||
|
||||
it('fails file-backed oversized read placeholders with grep guidance', async () => {
|
||||
it('fails file-backed oversized read placeholders with original message', async () => {
|
||||
const vfs = makeVfs()
|
||||
vfs.read.mockReturnValue(null)
|
||||
vfs.readFileContent.mockResolvedValue({
|
||||
content: '[File too large to display inline: big.txt (6000000 bytes, limit 5242880)]',
|
||||
totalLines: 1,
|
||||
@@ -85,8 +85,46 @@ describe('vfs handlers oversize policy', () => {
|
||||
)
|
||||
|
||||
expect(result.success).toBe(false)
|
||||
expect(result.error).toContain('Use grep')
|
||||
expect(result.error).toContain('offset/limit')
|
||||
expect(result.error).toContain('context window')
|
||||
expect(result.error).toContain('File too large to display inline')
|
||||
expect(result.error).toContain('big.txt')
|
||||
})
|
||||
|
||||
it('passes through image reads with attachment even when oversized', async () => {
|
||||
const vfs = makeVfs()
|
||||
const largeBase64 = 'A'.repeat(TOOL_RESULT_MAX_INLINE_CHARS + 1)
|
||||
vfs.readFileContent.mockResolvedValue({
|
||||
content: 'Image: chess.png (500.0KB, image/png)',
|
||||
totalLines: 1,
|
||||
attachment: {
|
||||
type: 'image',
|
||||
source: { type: 'base64', media_type: 'image/png', data: largeBase64 },
|
||||
},
|
||||
})
|
||||
getOrMaterializeVFS.mockResolvedValue(vfs)
|
||||
|
||||
const result = await executeVfsRead(
|
||||
{ path: 'files/chess.png' },
|
||||
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' }
|
||||
)
|
||||
|
||||
expect(result.success).toBe(true)
|
||||
expect((result.output as { attachment?: { type: string } })?.attachment?.type).toBe('image')
|
||||
})
|
||||
|
||||
it('fails oversized image placeholder when image exceeds size limit', async () => {
|
||||
const vfs = makeVfs()
|
||||
vfs.readFileContent.mockResolvedValue({
|
||||
content: '[Image too large: huge.png (10.0MB, limit 5MB)]',
|
||||
totalLines: 1,
|
||||
})
|
||||
getOrMaterializeVFS.mockResolvedValue(vfs)
|
||||
|
||||
const result = await executeVfsRead(
|
||||
{ path: 'files/huge.png' },
|
||||
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' }
|
||||
)
|
||||
|
||||
expect(result.success).toBe(false)
|
||||
expect(result.error).toContain('too large')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -161,21 +161,30 @@ export async function executeVfsRead(
|
||||
const filename = path.slice('uploads/'.length)
|
||||
const uploadResult = await readChatUpload(filename, context.chatId)
|
||||
if (uploadResult) {
|
||||
const isImage = hasImageAttachment(uploadResult)
|
||||
if (
|
||||
!hasImageAttachment(uploadResult) &&
|
||||
!isImage &&
|
||||
(isOversizedReadPlaceholder(uploadResult.content) ||
|
||||
serializedResultSize(uploadResult) > TOOL_RESULT_MAX_INLINE_CHARS)
|
||||
) {
|
||||
logger.warn('Upload read result too large', {
|
||||
path,
|
||||
hasAttachment: isImage,
|
||||
contentLength: uploadResult.content.length,
|
||||
serializedSize: serializedResultSize(uploadResult),
|
||||
})
|
||||
return {
|
||||
success: false,
|
||||
error:
|
||||
'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.',
|
||||
error: isOversizedReadPlaceholder(uploadResult.content)
|
||||
? uploadResult.content
|
||||
: 'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.',
|
||||
}
|
||||
}
|
||||
const windowedUpload = applyWindow(uploadResult)
|
||||
logger.debug('vfs_read resolved chat upload', {
|
||||
path,
|
||||
totalLines: uploadResult.totalLines,
|
||||
hasAttachment: isImage,
|
||||
offset,
|
||||
limit,
|
||||
})
|
||||
@@ -188,34 +197,47 @@ export async function executeVfsRead(
|
||||
}
|
||||
|
||||
const vfs = await getOrMaterializeVFS(workspaceId, context.userId)
|
||||
const result = vfs.read(path, offset, limit)
|
||||
if (!result) {
|
||||
const fileContent = await vfs.readFileContent(path)
|
||||
if (fileContent) {
|
||||
if (
|
||||
!hasImageAttachment(fileContent) &&
|
||||
(isOversizedReadPlaceholder(fileContent.content) ||
|
||||
serializedResultSize(fileContent) > TOOL_RESULT_MAX_INLINE_CHARS)
|
||||
) {
|
||||
return {
|
||||
success: false,
|
||||
error:
|
||||
'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.',
|
||||
}
|
||||
}
|
||||
const windowedFileContent = applyWindow(fileContent)
|
||||
logger.debug('vfs_read resolved workspace file', {
|
||||
|
||||
// For workspace file paths (files/ or recently-deleted/files/), try readFileContent
|
||||
// first so images, PDFs, and documents get proper attachment/parsing handling rather
|
||||
// than being served as raw VFS metadata text.
|
||||
const fileContent = await vfs.readFileContent(path)
|
||||
if (fileContent) {
|
||||
const isImage = hasImageAttachment(fileContent)
|
||||
if (
|
||||
!isImage &&
|
||||
(isOversizedReadPlaceholder(fileContent.content) ||
|
||||
serializedResultSize(fileContent) > TOOL_RESULT_MAX_INLINE_CHARS)
|
||||
) {
|
||||
logger.warn('File read result too large', {
|
||||
path,
|
||||
totalLines: fileContent.totalLines,
|
||||
offset,
|
||||
limit,
|
||||
hasAttachment: isImage,
|
||||
contentLength: fileContent.content.length,
|
||||
serializedSize: serializedResultSize(fileContent),
|
||||
})
|
||||
return {
|
||||
success: true,
|
||||
output: windowedFileContent,
|
||||
success: false,
|
||||
error: isOversizedReadPlaceholder(fileContent.content)
|
||||
? fileContent.content
|
||||
: 'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.',
|
||||
}
|
||||
}
|
||||
const windowedFileContent = applyWindow(fileContent)
|
||||
logger.debug('vfs_read resolved workspace file', {
|
||||
path,
|
||||
totalLines: fileContent.totalLines,
|
||||
hasAttachment: isImage,
|
||||
offset,
|
||||
limit,
|
||||
})
|
||||
return {
|
||||
success: true,
|
||||
output: windowedFileContent,
|
||||
}
|
||||
}
|
||||
|
||||
const result = vfs.read(path, offset, limit)
|
||||
if (!result) {
|
||||
const suggestions = vfs.suggestSimilar(path)
|
||||
logger.warn('vfs_read file not found', { path, suggestions })
|
||||
const hint =
|
||||
|
||||
94
apps/sim/lib/copilot/vfs/file-reader.test.ts
Normal file
94
apps/sim/lib/copilot/vfs/file-reader.test.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
/**
|
||||
* @vitest-environment node
|
||||
*/
|
||||
|
||||
import { randomFillSync } from 'node:crypto'
|
||||
import { loggerMock } from '@sim/testing'
|
||||
import { describe, expect, it, vi } from 'vitest'
|
||||
|
||||
const { downloadWorkspaceFile } = vi.hoisted(() => ({
|
||||
downloadWorkspaceFile: vi.fn(),
|
||||
}))
|
||||
|
||||
vi.mock('@sim/logger', () => loggerMock)
|
||||
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({
|
||||
downloadWorkspaceFile,
|
||||
}))
|
||||
|
||||
import { readFileRecord } from '@/lib/copilot/vfs/file-reader'
|
||||
|
||||
const MAX_IMAGE_READ_BYTES = 5 * 1024 * 1024
|
||||
|
||||
async function makeNoisePng(width: number, height: number): Promise<Buffer> {
|
||||
const sharp = (await import('sharp')).default
|
||||
const raw = Buffer.alloc(width * height * 3)
|
||||
randomFillSync(raw)
|
||||
return sharp(raw, { raw: { width, height, channels: 3 } })
|
||||
.png()
|
||||
.toBuffer()
|
||||
}
|
||||
|
||||
describe('readFileRecord', () => {
|
||||
it('returns small images as attachments without resize note', async () => {
|
||||
const sharp = (await import('sharp')).default
|
||||
const smallPng = await sharp({
|
||||
create: {
|
||||
width: 200,
|
||||
height: 200,
|
||||
channels: 3,
|
||||
background: { r: 255, g: 0, b: 0 },
|
||||
},
|
||||
})
|
||||
.png()
|
||||
.toBuffer()
|
||||
|
||||
downloadWorkspaceFile.mockResolvedValue(smallPng)
|
||||
|
||||
const result = await readFileRecord({
|
||||
id: 'wf_small',
|
||||
workspaceId: 'ws_1',
|
||||
name: 'small.png',
|
||||
key: 'uploads/small.png',
|
||||
path: '/api/files/serve/uploads%2Fsmall.png?context=mothership',
|
||||
size: smallPng.length,
|
||||
type: 'image/png',
|
||||
uploadedBy: 'user_1',
|
||||
uploadedAt: new Date(),
|
||||
deletedAt: null,
|
||||
storageContext: 'mothership',
|
||||
})
|
||||
|
||||
expect(result?.attachment?.type).toBe('image')
|
||||
expect(result?.attachment?.source.media_type).toBe('image/png')
|
||||
expect(result?.content).not.toContain('resized for vision')
|
||||
expect(Buffer.from(result?.attachment?.source.data ?? '', 'base64')).toEqual(smallPng)
|
||||
})
|
||||
|
||||
it('downscales oversized images into attachments that fit the read limit', async () => {
|
||||
const largePng = await makeNoisePng(1800, 1800)
|
||||
expect(largePng.length).toBeGreaterThan(MAX_IMAGE_READ_BYTES)
|
||||
|
||||
downloadWorkspaceFile.mockResolvedValue(largePng)
|
||||
|
||||
const result = await readFileRecord({
|
||||
id: 'wf_large',
|
||||
workspaceId: 'ws_1',
|
||||
name: 'chesspng.png',
|
||||
key: 'uploads/chesspng.png',
|
||||
path: '/api/files/serve/uploads%2Fchesspng.png?context=mothership',
|
||||
size: largePng.length,
|
||||
type: 'image/png',
|
||||
uploadedBy: 'user_1',
|
||||
uploadedAt: new Date(),
|
||||
deletedAt: null,
|
||||
storageContext: 'mothership',
|
||||
})
|
||||
|
||||
expect(result?.attachment?.type).toBe('image')
|
||||
expect(result?.content).toContain('resized for vision')
|
||||
|
||||
const decoded = Buffer.from(result?.attachment?.source.data ?? '', 'base64')
|
||||
expect(decoded.length).toBeLessThanOrEqual(MAX_IMAGE_READ_BYTES)
|
||||
expect(result?.attachment?.source.media_type).toMatch(/^image\/(jpeg|webp|png)$/)
|
||||
})
|
||||
})
|
||||
@@ -1,13 +1,39 @@
|
||||
import { type Span, trace } from '@opentelemetry/api'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import {
|
||||
CopilotVfsOutcome,
|
||||
CopilotVfsReadOutcome,
|
||||
CopilotVfsReadPath,
|
||||
} from '@/lib/copilot/generated/trace-attribute-values-v1'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1'
|
||||
import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1'
|
||||
import { markSpanForError } from '@/lib/copilot/request/otel'
|
||||
import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
|
||||
import { downloadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager'
|
||||
import { isImageFileType } from '@/lib/uploads/utils/file-utils'
|
||||
|
||||
// Lazy tracer (same pattern as lib/copilot/request/otel.ts).
|
||||
function getVfsTracer() {
|
||||
return trace.getTracer('sim-copilot-vfs', '1.0.0')
|
||||
}
|
||||
|
||||
function recordSpanError(span: Span, err: unknown) {
|
||||
markSpanForError(span, err)
|
||||
}
|
||||
|
||||
const logger = createLogger('FileReader')
|
||||
|
||||
const MAX_TEXT_READ_BYTES = 5 * 1024 * 1024 // 5 MB
|
||||
const MAX_IMAGE_READ_BYTES = 5 * 1024 * 1024 // 5 MB
|
||||
// Parseable-document byte cap. Large office/PDF files can still
|
||||
// produce huge extracted text; reject up front to avoid wasting a
|
||||
// download + parse only to blow past the tool-result budget.
|
||||
const MAX_PARSEABLE_READ_BYTES = 5 * 1024 * 1024 // 5 MB
|
||||
const MAX_IMAGE_DIMENSION = 1568
|
||||
const IMAGE_RESIZE_DIMENSIONS = [1568, 1280, 1024, 768]
|
||||
const IMAGE_QUALITY_STEPS = [85, 70, 55, 40]
|
||||
|
||||
const TEXT_TYPES = new Set([
|
||||
'text/plain',
|
||||
@@ -42,6 +68,194 @@ function detectImageMime(buf: Buffer, claimed: string): string {
|
||||
return claimed
|
||||
}
|
||||
|
||||
interface PreparedVisionImage {
|
||||
buffer: Buffer
|
||||
mediaType: string
|
||||
resized: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare an image for vision models: detect media type, optionally
|
||||
* resize/compress with sharp, and return the prepared buffer.
|
||||
*
|
||||
* Wrapped in a `copilot.vfs.prepare_image` span so the external trace
|
||||
* shows exactly when an image read blocked the request on CPU-heavy
|
||||
* encode attempts. Attributes record input dimensions, whether a resize
|
||||
* was needed, how many encode attempts it took, and the final
|
||||
* dimension/quality chosen.
|
||||
*/
|
||||
async function prepareImageForVision(
|
||||
buffer: Buffer,
|
||||
claimedType: string
|
||||
): Promise<PreparedVisionImage | null> {
|
||||
return getVfsTracer().startActiveSpan(
|
||||
TraceSpan.CopilotVfsPrepareImage,
|
||||
{
|
||||
attributes: {
|
||||
[TraceAttr.CopilotVfsInputBytes]: buffer.length,
|
||||
[TraceAttr.CopilotVfsInputMediaTypeClaimed]: claimedType,
|
||||
},
|
||||
},
|
||||
async (span) => {
|
||||
try {
|
||||
const mediaType = detectImageMime(buffer, claimedType)
|
||||
span.setAttribute(TraceAttr.CopilotVfsInputMediaTypeDetected, mediaType)
|
||||
|
||||
let sharpModule: typeof import('sharp')
|
||||
try {
|
||||
sharpModule = (await import('sharp')).default
|
||||
} catch (err) {
|
||||
logger.warn('Failed to load sharp for image preparation', {
|
||||
mediaType,
|
||||
error: toError(err).message,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotVfsSharpLoadFailed, true)
|
||||
const fitsWithoutSharp = buffer.length <= MAX_IMAGE_READ_BYTES
|
||||
span.setAttribute(
|
||||
TraceAttr.CopilotVfsOutcome,
|
||||
fitsWithoutSharp ? 'passthrough_no_sharp' : 'rejected_no_sharp'
|
||||
)
|
||||
return fitsWithoutSharp ? { buffer, mediaType, resized: false } : null
|
||||
}
|
||||
|
||||
let metadata: Awaited<ReturnType<ReturnType<typeof sharpModule>['metadata']>>
|
||||
try {
|
||||
metadata = await sharpModule(buffer, { limitInputPixels: false }).metadata()
|
||||
} catch (err) {
|
||||
logger.warn('Failed to read image metadata for VFS read', {
|
||||
mediaType,
|
||||
error: toError(err).message,
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotVfsMetadataFailed, true)
|
||||
const fitsWithoutSharp = buffer.length <= MAX_IMAGE_READ_BYTES
|
||||
span.setAttribute(
|
||||
TraceAttr.CopilotVfsOutcome,
|
||||
fitsWithoutSharp ? 'passthrough_no_metadata' : 'rejected_no_metadata'
|
||||
)
|
||||
return fitsWithoutSharp ? { buffer, mediaType, resized: false } : null
|
||||
}
|
||||
|
||||
const width = metadata.width ?? 0
|
||||
const height = metadata.height ?? 0
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotVfsInputWidth]: width,
|
||||
[TraceAttr.CopilotVfsInputHeight]: height,
|
||||
})
|
||||
|
||||
const needsResize =
|
||||
buffer.length > MAX_IMAGE_READ_BYTES ||
|
||||
width > MAX_IMAGE_DIMENSION ||
|
||||
height > MAX_IMAGE_DIMENSION
|
||||
if (!needsResize) {
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotVfsResized]: false,
|
||||
[TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.PassthroughFitsBudget,
|
||||
[TraceAttr.CopilotVfsOutputBytes]: buffer.length,
|
||||
[TraceAttr.CopilotVfsOutputMediaType]: mediaType,
|
||||
})
|
||||
return { buffer, mediaType, resized: false }
|
||||
}
|
||||
|
||||
const hasAlpha = Boolean(
|
||||
metadata.hasAlpha ||
|
||||
mediaType === 'image/png' ||
|
||||
mediaType === 'image/webp' ||
|
||||
mediaType === 'image/gif'
|
||||
)
|
||||
span.setAttribute(TraceAttr.CopilotVfsHasAlpha, hasAlpha)
|
||||
|
||||
let attempts = 0
|
||||
for (const dimension of IMAGE_RESIZE_DIMENSIONS) {
|
||||
for (const quality of IMAGE_QUALITY_STEPS) {
|
||||
attempts += 1
|
||||
try {
|
||||
const pipeline = sharpModule(buffer, { limitInputPixels: false }).rotate().resize({
|
||||
width: dimension,
|
||||
height: dimension,
|
||||
fit: 'inside',
|
||||
withoutEnlargement: true,
|
||||
})
|
||||
|
||||
const transformed = hasAlpha
|
||||
? {
|
||||
buffer: await pipeline
|
||||
.webp({ quality, alphaQuality: quality, effort: 4 })
|
||||
.toBuffer(),
|
||||
mediaType: 'image/webp',
|
||||
}
|
||||
: {
|
||||
buffer: await pipeline
|
||||
.jpeg({ quality, mozjpeg: true, chromaSubsampling: '4:4:4' })
|
||||
.toBuffer(),
|
||||
mediaType: 'image/jpeg',
|
||||
}
|
||||
|
||||
span.addEvent(TraceEvent.CopilotVfsResizeAttempt, {
|
||||
[TraceAttr.CopilotVfsResizeDimension]: dimension,
|
||||
[TraceAttr.CopilotVfsResizeQuality]: quality,
|
||||
[TraceAttr.CopilotVfsResizeOutputBytes]: transformed.buffer.length,
|
||||
[TraceAttr.CopilotVfsResizeFitsBudget]:
|
||||
transformed.buffer.length <= MAX_IMAGE_READ_BYTES,
|
||||
})
|
||||
|
||||
if (transformed.buffer.length <= MAX_IMAGE_READ_BYTES) {
|
||||
logger.info('Resized image for VFS read', {
|
||||
originalBytes: buffer.length,
|
||||
outputBytes: transformed.buffer.length,
|
||||
originalWidth: width || undefined,
|
||||
originalHeight: height || undefined,
|
||||
maxDimension: dimension,
|
||||
quality,
|
||||
originalMediaType: mediaType,
|
||||
outputMediaType: transformed.mediaType,
|
||||
})
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotVfsResized]: true,
|
||||
[TraceAttr.CopilotVfsResizeAttempts]: attempts,
|
||||
[TraceAttr.CopilotVfsResizeChosenDimension]: dimension,
|
||||
[TraceAttr.CopilotVfsResizeChosenQuality]: quality,
|
||||
[TraceAttr.CopilotVfsOutputBytes]: transformed.buffer.length,
|
||||
[TraceAttr.CopilotVfsOutputMediaType]: transformed.mediaType,
|
||||
[TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.Resized,
|
||||
})
|
||||
return {
|
||||
buffer: transformed.buffer,
|
||||
mediaType: transformed.mediaType,
|
||||
resized: true,
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('Failed image resize attempt for VFS read', {
|
||||
mediaType,
|
||||
dimension,
|
||||
quality,
|
||||
error: toError(err).message,
|
||||
})
|
||||
span.addEvent(TraceEvent.CopilotVfsResizeAttemptFailed, {
|
||||
[TraceAttr.CopilotVfsResizeDimension]: dimension,
|
||||
[TraceAttr.CopilotVfsResizeQuality]: quality,
|
||||
[TraceAttr.ErrorMessage]: toError(err).message.slice(0, 500),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotVfsResized]: false,
|
||||
[TraceAttr.CopilotVfsResizeAttempts]: attempts,
|
||||
[TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.RejectedTooLargeAfterResize,
|
||||
})
|
||||
return null
|
||||
} catch (err) {
|
||||
recordSpanError(span, err)
|
||||
throw err
|
||||
} finally {
|
||||
span.end()
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export interface FileReadResult {
|
||||
content: string
|
||||
totalLines: number
|
||||
@@ -59,75 +273,140 @@ export interface FileReadResult {
|
||||
* Read and return the content of a workspace file record.
|
||||
* Handles images (base64 attachment), parseable documents (PDF, DOCX, etc.),
|
||||
* binary files, and plain text with size guards.
|
||||
*
|
||||
* Wrapped in `copilot.vfs.read_file` so the parent mothership trace shows
|
||||
* per-file read latency, the path taken (image / text / parseable /
|
||||
* binary), and any size rejection. The `prepareImageForVision` span
|
||||
* nests underneath for the image-resize path.
|
||||
*/
|
||||
export async function readFileRecord(record: WorkspaceFileRecord): Promise<FileReadResult | null> {
|
||||
try {
|
||||
if (isImageFileType(record.type)) {
|
||||
if (record.size > MAX_IMAGE_READ_BYTES) {
|
||||
return {
|
||||
content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 5MB)]`,
|
||||
totalLines: 1,
|
||||
}
|
||||
}
|
||||
const buffer = await downloadWorkspaceFile(record)
|
||||
const mime = detectImageMime(buffer, record.type)
|
||||
return {
|
||||
content: `Image: ${record.name} (${(record.size / 1024).toFixed(1)}KB, ${mime})`,
|
||||
totalLines: 1,
|
||||
attachment: {
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64',
|
||||
media_type: mime,
|
||||
data: buffer.toString('base64'),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (isReadableType(record.type)) {
|
||||
if (record.size > MAX_TEXT_READ_BYTES) {
|
||||
return {
|
||||
content: `[File too large to display inline: ${record.name} (${record.size} bytes, limit ${MAX_TEXT_READ_BYTES})]`,
|
||||
totalLines: 1,
|
||||
}
|
||||
}
|
||||
|
||||
const buffer = await downloadWorkspaceFile(record)
|
||||
const content = buffer.toString('utf-8')
|
||||
return { content, totalLines: content.split('\n').length }
|
||||
}
|
||||
|
||||
const ext = getExtension(record.name)
|
||||
if (PARSEABLE_EXTENSIONS.has(ext)) {
|
||||
const buffer = await downloadWorkspaceFile(record)
|
||||
return getVfsTracer().startActiveSpan(
|
||||
TraceSpan.CopilotVfsReadFile,
|
||||
{
|
||||
attributes: {
|
||||
[TraceAttr.CopilotVfsFileName]: record.name,
|
||||
[TraceAttr.CopilotVfsFileMediaType]: record.type,
|
||||
[TraceAttr.CopilotVfsFileSizeBytes]: record.size,
|
||||
[TraceAttr.CopilotVfsFileExtension]: getExtension(record.name),
|
||||
},
|
||||
},
|
||||
async (span) => {
|
||||
try {
|
||||
const { parseBuffer } = await import('@/lib/file-parsers')
|
||||
const result = await parseBuffer(buffer, ext)
|
||||
const content = result.content || ''
|
||||
return { content, totalLines: content.split('\n').length }
|
||||
} catch (parseErr) {
|
||||
logger.warn('Failed to parse document', {
|
||||
fileName: record.name,
|
||||
ext,
|
||||
error: toError(parseErr).message,
|
||||
if (isImageFileType(record.type)) {
|
||||
span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.Image)
|
||||
const originalBuffer = await downloadWorkspaceFile(record)
|
||||
const prepared = await prepareImageForVision(originalBuffer, record.type)
|
||||
if (!prepared) {
|
||||
span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ImageTooLarge)
|
||||
return {
|
||||
content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 5MB after resize/compression)]`,
|
||||
totalLines: 1,
|
||||
}
|
||||
}
|
||||
const sizeKb = (prepared.buffer.length / 1024).toFixed(1)
|
||||
const resizeNote = prepared.resized ? ', resized for vision' : ''
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.ImagePrepared,
|
||||
[TraceAttr.CopilotVfsReadOutputBytes]: prepared.buffer.length,
|
||||
[TraceAttr.CopilotVfsReadOutputMediaType]: prepared.mediaType,
|
||||
[TraceAttr.CopilotVfsReadImageResized]: prepared.resized,
|
||||
})
|
||||
return {
|
||||
content: `Image: ${record.name} (${sizeKb}KB, ${prepared.mediaType}${resizeNote})`,
|
||||
totalLines: 1,
|
||||
attachment: {
|
||||
type: 'image',
|
||||
source: {
|
||||
type: 'base64' as const,
|
||||
media_type: prepared.mediaType,
|
||||
data: prepared.buffer.toString('base64'),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
if (isReadableType(record.type)) {
|
||||
span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.Text)
|
||||
if (record.size > MAX_TEXT_READ_BYTES) {
|
||||
span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.TextTooLarge)
|
||||
return {
|
||||
content: `[File too large to display inline: ${record.name} (${record.size} bytes, limit ${MAX_TEXT_READ_BYTES})]`,
|
||||
totalLines: 1,
|
||||
}
|
||||
}
|
||||
|
||||
const buffer = await downloadWorkspaceFile(record)
|
||||
const content = buffer.toString('utf-8')
|
||||
const lines = content.split('\n').length
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.TextRead,
|
||||
[TraceAttr.CopilotVfsReadOutputBytes]: buffer.length,
|
||||
[TraceAttr.CopilotVfsReadOutputLines]: lines,
|
||||
})
|
||||
return { content, totalLines: lines }
|
||||
}
|
||||
|
||||
const ext = getExtension(record.name)
|
||||
if (PARSEABLE_EXTENSIONS.has(ext)) {
|
||||
span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.ParseableDocument)
|
||||
if (record.size > MAX_PARSEABLE_READ_BYTES) {
|
||||
span.setAttribute(
|
||||
TraceAttr.CopilotVfsReadOutcome,
|
||||
CopilotVfsReadOutcome.DocumentTooLarge
|
||||
)
|
||||
return {
|
||||
content: `[Document too large to parse inline: ${record.name} (${record.size} bytes, limit ${MAX_PARSEABLE_READ_BYTES})]`,
|
||||
totalLines: 1,
|
||||
}
|
||||
}
|
||||
const buffer = await downloadWorkspaceFile(record)
|
||||
try {
|
||||
const { parseBuffer } = await import('@/lib/file-parsers')
|
||||
const result = await parseBuffer(buffer, ext)
|
||||
const content = result.content || ''
|
||||
const lines = content.split('\n').length
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.DocumentParsed,
|
||||
[TraceAttr.CopilotVfsReadOutputBytes]: content.length,
|
||||
[TraceAttr.CopilotVfsReadOutputLines]: lines,
|
||||
})
|
||||
return { content, totalLines: lines }
|
||||
} catch (parseErr) {
|
||||
logger.warn('Failed to parse document', {
|
||||
fileName: record.name,
|
||||
ext,
|
||||
error: toError(parseErr).message,
|
||||
})
|
||||
span.addEvent(TraceEvent.CopilotVfsParseFailed, {
|
||||
[TraceAttr.ErrorMessage]: toError(parseErr).message.slice(0, 500),
|
||||
})
|
||||
span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ParseFailed)
|
||||
return {
|
||||
content: `[Could not parse ${record.name} (${record.type}, ${record.size} bytes)]`,
|
||||
totalLines: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
span.setAttributes({
|
||||
[TraceAttr.CopilotVfsReadPath]: CopilotVfsReadPath.Binary,
|
||||
[TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.BinaryPlaceholder,
|
||||
})
|
||||
return {
|
||||
content: `[Could not parse ${record.name} (${record.type}, ${record.size} bytes)]`,
|
||||
content: `[Binary file: ${record.name} (${record.type}, ${record.size} bytes). Cannot display as text.]`,
|
||||
totalLines: 1,
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('Failed to read workspace file', {
|
||||
fileName: record.name,
|
||||
error: toError(err).message,
|
||||
})
|
||||
recordSpanError(span, err)
|
||||
span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ReadFailed)
|
||||
return null
|
||||
} finally {
|
||||
span.end()
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: `[Binary file: ${record.name} (${record.type}, ${record.size} bytes). Cannot display as text.]`,
|
||||
totalLines: 1,
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('Failed to read workspace file', {
|
||||
fileName: record.name,
|
||||
error: toError(err).message,
|
||||
})
|
||||
return null
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
import { context, type Span, SpanStatusCode, trace } from '@opentelemetry/api'
|
||||
import { createLogger } from '@sim/logger'
|
||||
import { toError } from '@sim/utils/errors'
|
||||
import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1'
|
||||
import type { TraceSpan } from '@/lib/logs/types'
|
||||
|
||||
/**
|
||||
@@ -279,8 +280,8 @@ export function createOTelSpanFromTraceSpan(traceSpan: TraceSpan, parentSpan?: S
|
||||
{
|
||||
attributes: {
|
||||
[GenAIAttributes.TOOL_NAME]: toolCall.name,
|
||||
'tool.status': toolCall.status,
|
||||
'tool.duration_ms': toolCall.duration || 0,
|
||||
[TraceAttr.ToolStatus]: toolCall.status,
|
||||
[TraceAttr.ToolDurationMs]: toolCall.duration || 0,
|
||||
},
|
||||
startTime: new Date(toolCall.startTime),
|
||||
},
|
||||
@@ -342,8 +343,8 @@ export function createOTelSpansForWorkflowExecution(params: {
|
||||
[GenAIAttributes.WORKFLOW_ID]: params.workflowId,
|
||||
[GenAIAttributes.WORKFLOW_NAME]: params.workflowName || params.workflowId,
|
||||
[GenAIAttributes.WORKFLOW_EXECUTION_ID]: params.executionId,
|
||||
'workflow.trigger': params.trigger,
|
||||
'workflow.duration_ms': params.totalDurationMs,
|
||||
[TraceAttr.WorkflowTrigger]: params.trigger,
|
||||
[TraceAttr.WorkflowDurationMs]: params.totalDurationMs,
|
||||
},
|
||||
startTime: new Date(params.startTime),
|
||||
},
|
||||
@@ -404,9 +405,9 @@ export async function traceBlockExecution<T>(
|
||||
blockMapping.spanName,
|
||||
{
|
||||
attributes: {
|
||||
'block.type': blockType,
|
||||
'block.id': blockId,
|
||||
'block.name': blockName,
|
||||
[TraceAttr.BlockType]: blockType,
|
||||
[TraceAttr.BlockId]: blockId,
|
||||
[TraceAttr.BlockName]: blockName,
|
||||
},
|
||||
},
|
||||
async (span) => {
|
||||
@@ -440,8 +441,8 @@ export function trackPlatformEvent(
|
||||
const span = tracer.startSpan(eventName, {
|
||||
attributes: {
|
||||
...attributes,
|
||||
'event.name': eventName,
|
||||
'event.timestamp': Date.now(),
|
||||
[TraceAttr.EventName]: eventName,
|
||||
[TraceAttr.EventTimestamp]: Date.now(),
|
||||
},
|
||||
})
|
||||
span.setStatus({ code: SpanStatusCode.OK })
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
"test:watch": "vitest",
|
||||
"test:coverage": "vitest run --coverage",
|
||||
"email:dev": "email dev --dir components/emails",
|
||||
"type-check": "tsc --noEmit",
|
||||
"type-check": "NODE_OPTIONS='--max-old-space-size=8192' tsc --noEmit",
|
||||
"lint": "biome check --write --unsafe .",
|
||||
"lint:check": "biome check .",
|
||||
"format": "biome format --write .",
|
||||
|
||||
@@ -534,6 +534,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
|
||||
updatedAt: '2026-04-16',
|
||||
},
|
||||
capabilities: {
|
||||
temperature: { min: 0, max: 1 },
|
||||
nativeStructuredOutputs: true,
|
||||
maxOutputTokens: 128000,
|
||||
thinking: {
|
||||
|
||||
@@ -29,7 +29,7 @@ const socketDb = drizzle(
|
||||
prepare: false,
|
||||
idle_timeout: 10,
|
||||
connect_timeout: 20,
|
||||
max: 10,
|
||||
max: 30,
|
||||
onnotice: () => {},
|
||||
}),
|
||||
{ schema }
|
||||
|
||||
1
bun.lock
1
bun.lock
@@ -1,6 +1,5 @@
|
||||
{
|
||||
"lockfileVersion": 1,
|
||||
"configVersion": 0,
|
||||
"workspaces": {
|
||||
"": {
|
||||
"name": "simstudio",
|
||||
|
||||
12
package.json
12
package.json
@@ -27,8 +27,16 @@
|
||||
"mship-tools:check": "bun run scripts/sync-tool-catalog.ts --check",
|
||||
"trace-contracts:generate": "bun run scripts/sync-request-trace-contract.ts",
|
||||
"trace-contracts:check": "bun run scripts/sync-request-trace-contract.ts --check",
|
||||
"mship:generate": "bun run mship-contracts:generate && bun run mship-tools:generate && bun run trace-contracts:generate",
|
||||
"mship:check": "bun run mship-contracts:check && bun run mship-tools:check && bun run trace-contracts:check",
|
||||
"trace-spans-contract:generate": "bun run scripts/sync-trace-spans-contract.ts",
|
||||
"trace-spans-contract:check": "bun run scripts/sync-trace-spans-contract.ts --check",
|
||||
"trace-attributes-contract:generate": "bun run scripts/sync-trace-attributes-contract.ts",
|
||||
"trace-attributes-contract:check": "bun run scripts/sync-trace-attributes-contract.ts --check",
|
||||
"trace-attribute-values-contract:generate": "bun run scripts/sync-trace-attribute-values-contract.ts",
|
||||
"trace-attribute-values-contract:check": "bun run scripts/sync-trace-attribute-values-contract.ts --check",
|
||||
"trace-events-contract:generate": "bun run scripts/sync-trace-events-contract.ts",
|
||||
"trace-events-contract:check": "bun run scripts/sync-trace-events-contract.ts --check",
|
||||
"mship:generate": "bun run scripts/generate-mship-contracts.ts",
|
||||
"mship:check": "bun run scripts/generate-mship-contracts.ts --check",
|
||||
"prepare": "bun husky",
|
||||
"type-check": "turbo run type-check",
|
||||
"release": "bun run scripts/create-single-release.ts"
|
||||
|
||||
@@ -14,7 +14,7 @@ const postgresClient = postgres(connectionString, {
|
||||
prepare: false,
|
||||
idle_timeout: 20,
|
||||
connect_timeout: 30,
|
||||
max: 10,
|
||||
max: 30,
|
||||
onnotice: () => {},
|
||||
})
|
||||
|
||||
|
||||
122
scripts/generate-mship-contracts.ts
Normal file
122
scripts/generate-mship-contracts.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env bun
|
||||
// Drive every mothership contract generator, then biome-format the
|
||||
// outputs so the committed files match what biome produces on commit
|
||||
// (avoids the stale-drift that comes from comparing raw json2ts output
|
||||
// against biome-formatted source).
|
||||
//
|
||||
// `--check` regenerates into a temp directory, formats identically,
|
||||
// and compares against the committed files — same semantics as the
|
||||
// old per-script `--check`, but accounts for post-generate formatting.
|
||||
|
||||
import { spawnSync } from 'node:child_process'
|
||||
import { copyFileSync, cpSync, mkdirSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'
|
||||
import { tmpdir } from 'node:os'
|
||||
import { dirname, join, resolve } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
|
||||
const ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '..')
|
||||
|
||||
const GENERATORS = [
|
||||
'scripts/sync-mothership-stream-contract.ts',
|
||||
'scripts/sync-tool-catalog.ts',
|
||||
'scripts/sync-request-trace-contract.ts',
|
||||
'scripts/sync-trace-spans-contract.ts',
|
||||
'scripts/sync-trace-attributes-contract.ts',
|
||||
'scripts/sync-trace-attribute-values-contract.ts',
|
||||
'scripts/sync-trace-events-contract.ts',
|
||||
]
|
||||
|
||||
// Generated files under this path. We biome-format this whole dir on
|
||||
// each generate (and the temp copy on each check).
|
||||
const GENERATED_DIR = 'apps/sim/lib/copilot/generated'
|
||||
|
||||
// `tool-schemas-v1.ts` goes through biome's `--unsafe` bracket-quote
|
||||
// fixer which reformats every key of TOOL_RUNTIME_SCHEMAS. Strip it
|
||||
// from the format pass so generator output stays stable on both sides.
|
||||
const FORMAT_EXCLUDE = new Set(['tool-schemas-v1.ts'])
|
||||
|
||||
function run(cmd: string[], cwd: string, env: NodeJS.ProcessEnv = process.env): void {
|
||||
const result = spawnSync(cmd[0], cmd.slice(1), {
|
||||
cwd,
|
||||
env,
|
||||
stdio: 'inherit',
|
||||
})
|
||||
if (result.status !== 0) {
|
||||
process.exit(result.status ?? 1)
|
||||
}
|
||||
}
|
||||
|
||||
function runGenerators(outputOverride?: string): void {
|
||||
const env = { ...process.env }
|
||||
for (const script of GENERATORS) {
|
||||
const args = ['bun', 'run', script]
|
||||
if (outputOverride) {
|
||||
// Individual scripts don't accept a custom output dir; for
|
||||
// --check we generate in place and snapshot before/after via
|
||||
// git-index comparison (see runCheck).
|
||||
}
|
||||
run(args, ROOT, env)
|
||||
}
|
||||
}
|
||||
|
||||
function formatGenerated(dir: string): void {
|
||||
const files = readdirNoThrow(dir).filter((f) => !FORMAT_EXCLUDE.has(f) && f.endsWith('.ts'))
|
||||
if (files.length === 0) return
|
||||
const paths = files.map((f) => join(dir, f))
|
||||
run(['bunx', 'biome', 'check', '--write', ...paths], ROOT)
|
||||
}
|
||||
|
||||
function readdirNoThrow(dir: string): string[] {
|
||||
try {
|
||||
// Bun has fs.readdirSync available as a CommonJS import
|
||||
const fs = require('node:fs') as typeof import('node:fs')
|
||||
return fs.readdirSync(dir)
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
function runCheck(): void {
|
||||
const targetDir = resolve(ROOT, GENERATED_DIR)
|
||||
// Snapshot current committed state
|
||||
const committed: Record<string, string> = {}
|
||||
for (const f of readdirNoThrow(targetDir)) {
|
||||
if (!f.endsWith('.ts')) continue
|
||||
committed[f] = readFileSync(join(targetDir, f), 'utf8')
|
||||
}
|
||||
|
||||
// Regenerate in place + format, then diff against the snapshot
|
||||
runGenerators()
|
||||
formatGenerated(targetDir)
|
||||
|
||||
const stale: string[] = []
|
||||
for (const [name, oldContent] of Object.entries(committed)) {
|
||||
if (FORMAT_EXCLUDE.has(name)) continue
|
||||
const newContent = readFileSync(join(targetDir, name), 'utf8')
|
||||
if (newContent !== oldContent) stale.push(name)
|
||||
}
|
||||
|
||||
// Restore the committed state regardless of outcome (--check is readonly).
|
||||
for (const [name, content] of Object.entries(committed)) {
|
||||
const fs = require('node:fs') as typeof import('node:fs')
|
||||
fs.writeFileSync(join(targetDir, name), content, 'utf8')
|
||||
}
|
||||
|
||||
if (stale.length > 0) {
|
||||
console.error(
|
||||
`Generated contracts are stale: ${stale.join(', ')}. Run: bun run mship:generate`,
|
||||
)
|
||||
process.exit(1)
|
||||
}
|
||||
console.log('All generated contracts up to date.')
|
||||
}
|
||||
|
||||
function runGenerate(): void {
|
||||
runGenerators()
|
||||
formatGenerated(resolve(ROOT, GENERATED_DIR))
|
||||
console.log('Generated + formatted mothership contracts.')
|
||||
}
|
||||
|
||||
const checkOnly = process.argv.includes('--check')
|
||||
if (checkOnly) runCheck()
|
||||
else runGenerate()
|
||||
155
scripts/sync-trace-attribute-values-contract.ts
Normal file
155
scripts/sync-trace-attribute-values-contract.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
import { mkdir, readFile, writeFile } from 'node:fs/promises'
|
||||
import { dirname, resolve } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
|
||||
/**
|
||||
* Generate `apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts`
|
||||
* from the Go-side `contracts/trace-attribute-values-v1.schema.json`
|
||||
* contract.
|
||||
*
|
||||
* Unlike span-names / attribute-keys / event-names (each of which is a
|
||||
* single enum), this contract carries MULTIPLE enums — one per span
|
||||
* attribute whose value set is closed. The schema's `$defs` holds one
|
||||
* definition per enum (e.g. `CopilotRequestCancelReason`,
|
||||
* `CopilotAbortOutcome`, …). For each $def we emit a TS `as const`
|
||||
* object named after the Go type, so call sites read as:
|
||||
*
|
||||
* span.setAttribute(
|
||||
* TraceAttr.CopilotRequestCancelReason,
|
||||
* CopilotRequestCancelReason.ExplicitStop,
|
||||
* )
|
||||
*
|
||||
* Skipped $defs: anything that doesn't have a string-only `enum`
|
||||
* array. That filters out wrapper structs the reflector adds
|
||||
* incidentally (e.g. `TraceAttributeValuesV1AllDefs`).
|
||||
*/
|
||||
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url))
|
||||
const ROOT = resolve(SCRIPT_DIR, '..')
|
||||
const DEFAULT_CONTRACT_PATH = resolve(
|
||||
ROOT,
|
||||
'../copilot/copilot/contracts/trace-attribute-values-v1.schema.json',
|
||||
)
|
||||
const OUTPUT_PATH = resolve(
|
||||
ROOT,
|
||||
'apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts',
|
||||
)
|
||||
|
||||
interface ExtractedEnum {
|
||||
/** The Go type name — becomes the TS const + type name. */
|
||||
name: string
|
||||
/** The value strings, sorted for diff stability. */
|
||||
values: string[]
|
||||
}
|
||||
|
||||
function extractEnums(schema: Record<string, unknown>): ExtractedEnum[] {
|
||||
const defs = (schema.$defs ?? {}) as Record<string, unknown>
|
||||
const out: ExtractedEnum[] = []
|
||||
for (const [name, def] of Object.entries(defs)) {
|
||||
if (!def || typeof def !== 'object') continue
|
||||
const enumValues = (def as Record<string, unknown>).enum
|
||||
if (!Array.isArray(enumValues)) continue
|
||||
if (!enumValues.every((v) => typeof v === 'string')) continue
|
||||
out.push({ name, values: (enumValues as string[]).slice().sort() })
|
||||
}
|
||||
out.sort((a, b) => a.name.localeCompare(b.name))
|
||||
return out
|
||||
}
|
||||
|
||||
/**
|
||||
* PascalCase identifier for a wire enum value. Mirrors the algorithm
|
||||
* used by the span-names + attribute-keys scripts, so
|
||||
* `explicit_stop` -> `ExplicitStop`, matching what a reader would
|
||||
* guess from Go's exported constants.
|
||||
*/
|
||||
function toValueIdent(value: string): string {
|
||||
const parts = value.split(/[^A-Za-z0-9]+/).filter(Boolean)
|
||||
if (parts.length === 0) {
|
||||
throw new Error(`Cannot derive identifier for enum value: ${value}`)
|
||||
}
|
||||
const ident = parts
|
||||
.map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase())
|
||||
.join('')
|
||||
if (/^[0-9]/.test(ident)) {
|
||||
throw new Error(
|
||||
`Derived identifier "${ident}" for value "${value}" starts with a digit`,
|
||||
)
|
||||
}
|
||||
return ident
|
||||
}
|
||||
|
||||
function renderEnum(e: ExtractedEnum): string {
|
||||
const seen = new Map<string, string>()
|
||||
const lines = e.values.map((v) => {
|
||||
const ident = toValueIdent(v)
|
||||
const prev = seen.get(ident)
|
||||
if (prev && prev !== v) {
|
||||
throw new Error(
|
||||
`Enum ${e.name}: identifier collision — "${prev}" and "${v}" both map to "${ident}"`,
|
||||
)
|
||||
}
|
||||
seen.set(ident, v)
|
||||
return ` ${ident}: ${JSON.stringify(v)},`
|
||||
})
|
||||
|
||||
return `export const ${e.name} = {
|
||||
${lines.join('\n')}
|
||||
} as const;
|
||||
|
||||
export type ${e.name}Key = keyof typeof ${e.name};
|
||||
export type ${e.name}Value = (typeof ${e.name})[${e.name}Key];`
|
||||
}
|
||||
|
||||
function render(enums: ExtractedEnum[]): string {
|
||||
const body = enums.map(renderEnum).join('\n\n')
|
||||
return `// AUTO-GENERATED FILE. DO NOT EDIT.
|
||||
//
|
||||
// Source: copilot/copilot/contracts/trace-attribute-values-v1.schema.json
|
||||
// Regenerate with: bun run trace-attribute-values-contract:generate
|
||||
//
|
||||
// Canonical closed-set value vocabularies for mothership OTel
|
||||
// attributes. Call sites should reference e.g.
|
||||
// \`CopilotRequestCancelReason.ExplicitStop\` rather than the raw
|
||||
// string literal, so typos become compile errors and the Go contract
|
||||
// remains the single source of truth.
|
||||
|
||||
${body}
|
||||
`
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const checkOnly = process.argv.includes('--check')
|
||||
const inputArg = process.argv.find((a) => a.startsWith('--input='))
|
||||
const inputPath = inputArg
|
||||
? resolve(ROOT, inputArg.slice('--input='.length))
|
||||
: DEFAULT_CONTRACT_PATH
|
||||
|
||||
const raw = await readFile(inputPath, 'utf8')
|
||||
const schema = JSON.parse(raw)
|
||||
const enums = extractEnums(schema)
|
||||
if (enums.length === 0) {
|
||||
throw new Error(
|
||||
'No enum $defs found in trace-attribute-values-v1.schema.json — did you add the Go type to TraceAttributeValuesV1AllDefs?',
|
||||
)
|
||||
}
|
||||
const rendered = render(enums)
|
||||
|
||||
if (checkOnly) {
|
||||
const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null)
|
||||
if (existing !== rendered) {
|
||||
throw new Error(
|
||||
'Generated trace attribute values contract is stale. Run: bun run trace-attribute-values-contract:generate',
|
||||
)
|
||||
}
|
||||
console.log('Trace attribute values contract is up to date.')
|
||||
return
|
||||
}
|
||||
|
||||
await mkdir(dirname(OUTPUT_PATH), { recursive: true })
|
||||
await writeFile(OUTPUT_PATH, rendered, 'utf8')
|
||||
console.log(`Generated trace attribute values types -> ${OUTPUT_PATH}`)
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
||||
168
scripts/sync-trace-attributes-contract.ts
Normal file
168
scripts/sync-trace-attributes-contract.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
import { mkdir, readFile, writeFile } from 'node:fs/promises'
|
||||
import { dirname, resolve } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
|
||||
/**
|
||||
* Generate `apps/sim/lib/copilot/generated/trace-attributes-v1.ts`
|
||||
* from the Go-side `contracts/trace-attributes-v1.schema.json`
|
||||
* contract.
|
||||
*
|
||||
* The contract is a single-enum JSON Schema listing every CUSTOM
|
||||
* (non-OTel-semconv) span attribute key used in mothership. We emit:
|
||||
* - A `TraceAttr` const object keyed by PascalCase identifier whose
|
||||
* values are the exact wire strings, so call sites look like
|
||||
* `span.setAttribute(TraceAttr.ChatId, …)` instead of the raw
|
||||
* `span.setAttribute('chat.id', …)`.
|
||||
* - A `TraceAttrKey` union and a `TraceAttrValue` union type so
|
||||
* helpers that take an attribute key are well-typed.
|
||||
* - A sorted `TraceAttrValues` readonly array for tests/enumeration.
|
||||
*
|
||||
* This is the attribute-key twin of `sync-trace-spans-contract.ts`
|
||||
* (span names). The two files share the enum-extraction + identifier
|
||||
* PascalCase + collision-detection pattern so a reader who understands
|
||||
* one understands both.
|
||||
*
|
||||
* For OTel semantic-convention keys (e.g. `http.request.method`,
|
||||
* `db.system`, `gen_ai.system`, `messaging.*`, `net.*`,
|
||||
* `service.name`, `deployment.environment`), import from
|
||||
* `@opentelemetry/semantic-conventions` directly — they live in the
|
||||
* upstream package, not in this contract.
|
||||
*/
|
||||
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url))
|
||||
const ROOT = resolve(SCRIPT_DIR, '..')
|
||||
const DEFAULT_CONTRACT_PATH = resolve(
|
||||
ROOT,
|
||||
'../copilot/copilot/contracts/trace-attributes-v1.schema.json',
|
||||
)
|
||||
const OUTPUT_PATH = resolve(
|
||||
ROOT,
|
||||
'apps/sim/lib/copilot/generated/trace-attributes-v1.ts',
|
||||
)
|
||||
|
||||
function extractAttrKeys(schema: Record<string, unknown>): string[] {
|
||||
const defs = (schema.$defs ?? {}) as Record<string, unknown>
|
||||
const nameDef = defs.TraceAttributesV1Name
|
||||
if (
|
||||
!nameDef ||
|
||||
typeof nameDef !== 'object' ||
|
||||
!Array.isArray((nameDef as Record<string, unknown>).enum)
|
||||
) {
|
||||
throw new Error(
|
||||
'trace-attributes-v1.schema.json is missing $defs.TraceAttributesV1Name.enum',
|
||||
)
|
||||
}
|
||||
const enumValues = (nameDef as Record<string, unknown>).enum as unknown[]
|
||||
if (!enumValues.every((v) => typeof v === 'string')) {
|
||||
throw new Error('TraceAttributesV1Name enum must be string-only')
|
||||
}
|
||||
return (enumValues as string[]).slice().sort()
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a wire attribute key like `copilot.vfs.input.media_type_claimed`
|
||||
* into an identifier-safe PascalCase key like
|
||||
* `CopilotVfsInputMediaTypeClaimed`.
|
||||
*
|
||||
* Same algorithm as the span-name sync script so readers can learn one
|
||||
* and reuse it.
|
||||
*/
|
||||
function toIdentifier(name: string): string {
|
||||
const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean)
|
||||
if (parts.length === 0) {
|
||||
throw new Error(`Cannot derive identifier for attribute key: ${name}`)
|
||||
}
|
||||
const ident = parts
|
||||
.map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase())
|
||||
.join('')
|
||||
if (/^[0-9]/.test(ident)) {
|
||||
throw new Error(
|
||||
`Derived identifier "${ident}" for attribute "${name}" starts with a digit`,
|
||||
)
|
||||
}
|
||||
return ident
|
||||
}
|
||||
|
||||
function render(attrKeys: string[]): string {
|
||||
const pairs = attrKeys.map((name) => ({ name, ident: toIdentifier(name) }))
|
||||
|
||||
// Identifier collisions silently override earlier keys and break
|
||||
// type safety — fail loudly instead.
|
||||
const seen = new Map<string, string>()
|
||||
for (const p of pairs) {
|
||||
const prev = seen.get(p.ident)
|
||||
if (prev && prev !== p.name) {
|
||||
throw new Error(
|
||||
`Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`,
|
||||
)
|
||||
}
|
||||
seen.set(p.ident, p.name)
|
||||
}
|
||||
|
||||
const constLines = pairs
|
||||
.map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`)
|
||||
.join('\n')
|
||||
const arrayEntries = attrKeys.map((n) => ` ${JSON.stringify(n)},`).join('\n')
|
||||
|
||||
return `// AUTO-GENERATED FILE. DO NOT EDIT.
|
||||
//
|
||||
// Source: copilot/copilot/contracts/trace-attributes-v1.schema.json
|
||||
// Regenerate with: bun run trace-attributes-contract:generate
|
||||
//
|
||||
// Canonical custom mothership OTel span attribute keys. Call sites
|
||||
// should reference \`TraceAttr.<Identifier>\` (e.g.
|
||||
// \`TraceAttr.ChatId\`, \`TraceAttr.ToolCallId\`) rather than raw
|
||||
// string literals, so the Go-side contract is the single source of
|
||||
// truth and typos become compile errors.
|
||||
//
|
||||
// For OTel semantic-convention keys (\`http.*\`, \`db.*\`,
|
||||
// \`gen_ai.*\`, \`net.*\`, \`messaging.*\`, \`service.*\`,
|
||||
// \`deployment.environment\`), import from
|
||||
// \`@opentelemetry/semantic-conventions\` directly — those are owned
|
||||
// by the upstream OTel spec, not by this contract.
|
||||
|
||||
export const TraceAttr = {
|
||||
${constLines}
|
||||
} as const;
|
||||
|
||||
export type TraceAttrKey = keyof typeof TraceAttr;
|
||||
export type TraceAttrValue = (typeof TraceAttr)[TraceAttrKey];
|
||||
|
||||
/** Readonly sorted list of every canonical custom attribute key. */
|
||||
export const TraceAttrValues: readonly TraceAttrValue[] = [
|
||||
${arrayEntries}
|
||||
] as const;
|
||||
`
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const checkOnly = process.argv.includes('--check')
|
||||
const inputArg = process.argv.find((a) => a.startsWith('--input='))
|
||||
const inputPath = inputArg
|
||||
? resolve(ROOT, inputArg.slice('--input='.length))
|
||||
: DEFAULT_CONTRACT_PATH
|
||||
|
||||
const raw = await readFile(inputPath, 'utf8')
|
||||
const schema = JSON.parse(raw)
|
||||
const attrKeys = extractAttrKeys(schema)
|
||||
const rendered = render(attrKeys)
|
||||
|
||||
if (checkOnly) {
|
||||
const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null)
|
||||
if (existing !== rendered) {
|
||||
throw new Error(
|
||||
'Generated trace attributes contract is stale. Run: bun run trace-attributes-contract:generate',
|
||||
)
|
||||
}
|
||||
console.log('Trace attributes contract is up to date.')
|
||||
return
|
||||
}
|
||||
|
||||
await mkdir(dirname(OUTPUT_PATH), { recursive: true })
|
||||
await writeFile(OUTPUT_PATH, rendered, 'utf8')
|
||||
console.log(`Generated trace attributes types -> ${OUTPUT_PATH}`)
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
||||
137
scripts/sync-trace-events-contract.ts
Normal file
137
scripts/sync-trace-events-contract.ts
Normal file
@@ -0,0 +1,137 @@
|
||||
import { mkdir, readFile, writeFile } from 'node:fs/promises'
|
||||
import { dirname, resolve } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
|
||||
/**
|
||||
* Generate `apps/sim/lib/copilot/generated/trace-events-v1.ts` from
|
||||
* the Go-side `contracts/trace-events-v1.schema.json` contract.
|
||||
*
|
||||
* Mirrors the span-names + attribute-keys sync scripts exactly — the
|
||||
* only difference is the $defs key (`TraceEventsV1Name`), the output
|
||||
* path, and the generated const name (`TraceEvent`). Keeping the
|
||||
* scripts structurally identical means a reader who understands one
|
||||
* understands all three, and drift between them gets caught
|
||||
* immediately in code review.
|
||||
*/
|
||||
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url))
|
||||
const ROOT = resolve(SCRIPT_DIR, '..')
|
||||
const DEFAULT_CONTRACT_PATH = resolve(
|
||||
ROOT,
|
||||
'../copilot/copilot/contracts/trace-events-v1.schema.json',
|
||||
)
|
||||
const OUTPUT_PATH = resolve(
|
||||
ROOT,
|
||||
'apps/sim/lib/copilot/generated/trace-events-v1.ts',
|
||||
)
|
||||
|
||||
function extractEventNames(schema: Record<string, unknown>): string[] {
|
||||
const defs = (schema.$defs ?? {}) as Record<string, unknown>
|
||||
const nameDef = defs.TraceEventsV1Name
|
||||
if (
|
||||
!nameDef ||
|
||||
typeof nameDef !== 'object' ||
|
||||
!Array.isArray((nameDef as Record<string, unknown>).enum)
|
||||
) {
|
||||
throw new Error(
|
||||
'trace-events-v1.schema.json is missing $defs.TraceEventsV1Name.enum',
|
||||
)
|
||||
}
|
||||
const enumValues = (nameDef as Record<string, unknown>).enum as unknown[]
|
||||
if (!enumValues.every((v) => typeof v === 'string')) {
|
||||
throw new Error('TraceEventsV1Name enum must be string-only')
|
||||
}
|
||||
return (enumValues as string[]).slice().sort()
|
||||
}
|
||||
|
||||
function toIdentifier(name: string): string {
|
||||
const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean)
|
||||
if (parts.length === 0) {
|
||||
throw new Error(`Cannot derive identifier for event name: ${name}`)
|
||||
}
|
||||
const ident = parts
|
||||
.map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase())
|
||||
.join('')
|
||||
if (/^[0-9]/.test(ident)) {
|
||||
throw new Error(
|
||||
`Derived identifier "${ident}" for event "${name}" starts with a digit`,
|
||||
)
|
||||
}
|
||||
return ident
|
||||
}
|
||||
|
||||
function render(eventNames: string[]): string {
|
||||
const pairs = eventNames.map((name) => ({ name, ident: toIdentifier(name) }))
|
||||
|
||||
const seen = new Map<string, string>()
|
||||
for (const p of pairs) {
|
||||
const prev = seen.get(p.ident)
|
||||
if (prev && prev !== p.name) {
|
||||
throw new Error(
|
||||
`Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`,
|
||||
)
|
||||
}
|
||||
seen.set(p.ident, p.name)
|
||||
}
|
||||
|
||||
const constLines = pairs
|
||||
.map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`)
|
||||
.join('\n')
|
||||
const arrayEntries = eventNames.map((n) => ` ${JSON.stringify(n)},`).join('\n')
|
||||
|
||||
return `// AUTO-GENERATED FILE. DO NOT EDIT.
|
||||
//
|
||||
// Source: copilot/copilot/contracts/trace-events-v1.schema.json
|
||||
// Regenerate with: bun run trace-events-contract:generate
|
||||
//
|
||||
// Canonical mothership OTel span event names. Call sites should
|
||||
// reference \`TraceEvent.<Identifier>\` (e.g.
|
||||
// \`TraceEvent.RequestCancelled\`) rather than raw string literals,
|
||||
// so the Go-side contract is the single source of truth and typos
|
||||
// become compile errors.
|
||||
|
||||
export const TraceEvent = {
|
||||
${constLines}
|
||||
} as const;
|
||||
|
||||
export type TraceEventKey = keyof typeof TraceEvent;
|
||||
export type TraceEventValue = (typeof TraceEvent)[TraceEventKey];
|
||||
|
||||
/** Readonly sorted list of every canonical event name. */
|
||||
export const TraceEventValues: readonly TraceEventValue[] = [
|
||||
${arrayEntries}
|
||||
] as const;
|
||||
`
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const checkOnly = process.argv.includes('--check')
|
||||
const inputArg = process.argv.find((a) => a.startsWith('--input='))
|
||||
const inputPath = inputArg
|
||||
? resolve(ROOT, inputArg.slice('--input='.length))
|
||||
: DEFAULT_CONTRACT_PATH
|
||||
|
||||
const raw = await readFile(inputPath, 'utf8')
|
||||
const schema = JSON.parse(raw)
|
||||
const eventNames = extractEventNames(schema)
|
||||
const rendered = render(eventNames)
|
||||
|
||||
if (checkOnly) {
|
||||
const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null)
|
||||
if (existing !== rendered) {
|
||||
throw new Error(
|
||||
'Generated trace events contract is stale. Run: bun run trace-events-contract:generate',
|
||||
)
|
||||
}
|
||||
console.log('Trace events contract is up to date.')
|
||||
return
|
||||
}
|
||||
|
||||
await mkdir(dirname(OUTPUT_PATH), { recursive: true })
|
||||
await writeFile(OUTPUT_PATH, rendered, 'utf8')
|
||||
console.log(`Generated trace events types -> ${OUTPUT_PATH}`)
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
||||
155
scripts/sync-trace-spans-contract.ts
Normal file
155
scripts/sync-trace-spans-contract.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
import { mkdir, readFile, writeFile } from 'node:fs/promises'
|
||||
import { dirname, resolve } from 'node:path'
|
||||
import { fileURLToPath } from 'node:url'
|
||||
|
||||
/**
|
||||
* Generate `apps/sim/lib/copilot/generated/trace-spans-v1.ts` from the
|
||||
* Go-side `contracts/trace-spans-v1.schema.json` contract.
|
||||
*
|
||||
* The contract is a single-enum JSON Schema. We emit:
|
||||
* - A `TraceSpansV1Name` const object (key-as-value) for ergonomic
|
||||
* access: `TraceSpansV1Name['copilot.vfs.read_file']`.
|
||||
* - A `TraceSpansV1NameValue` union type.
|
||||
* - A sorted `TraceSpansV1Names` readonly array (useful for tests that
|
||||
* verify coverage, and for tooling that wants to enumerate names).
|
||||
*
|
||||
* We deliberately do NOT pass through `json-schema-to-typescript` —
|
||||
* it would generate a noisy `TraceSpansV1` object type for the wrapper
|
||||
* that drives reflection; the wrapper type has no runtime use on the Sim
|
||||
* side and would obscure the actual enum.
|
||||
*/
|
||||
const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url))
|
||||
const ROOT = resolve(SCRIPT_DIR, '..')
|
||||
const DEFAULT_CONTRACT_PATH = resolve(
|
||||
ROOT,
|
||||
'../copilot/copilot/contracts/trace-spans-v1.schema.json',
|
||||
)
|
||||
const OUTPUT_PATH = resolve(
|
||||
ROOT,
|
||||
'apps/sim/lib/copilot/generated/trace-spans-v1.ts',
|
||||
)
|
||||
|
||||
function extractSpanNames(schema: Record<string, unknown>): string[] {
|
||||
const defs = (schema.$defs ?? {}) as Record<string, unknown>
|
||||
const nameDef = defs.TraceSpansV1Name
|
||||
if (
|
||||
!nameDef ||
|
||||
typeof nameDef !== 'object' ||
|
||||
!Array.isArray((nameDef as Record<string, unknown>).enum)
|
||||
) {
|
||||
throw new Error(
|
||||
'trace-spans-v1.schema.json is missing $defs.TraceSpansV1Name.enum',
|
||||
)
|
||||
}
|
||||
const enumValues = (nameDef as Record<string, unknown>).enum as unknown[]
|
||||
if (!enumValues.every((v) => typeof v === 'string')) {
|
||||
throw new Error('TraceSpansV1Name enum must be string-only')
|
||||
}
|
||||
return (enumValues as string[]).slice().sort()
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a wire name like "copilot.recovery.check_replay_gap" into an
|
||||
* identifier-safe PascalCase key like "CopilotRecoveryCheckReplayGap",
|
||||
* so call sites read as `TraceSpan.CopilotRecoveryCheckReplayGap`
|
||||
* instead of `TraceSpan["copilot.recovery.check_replay_gap"]`.
|
||||
*
|
||||
* Splits on `.`, `_`, and non-alphanumeric characters; capitalizes each
|
||||
* part; collapses. Strict mapping (not a best-effort heuristic), so the
|
||||
* same input always produces the same identifier.
|
||||
*/
|
||||
function toIdentifier(name: string): string {
|
||||
const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean)
|
||||
if (parts.length === 0) {
|
||||
throw new Error(`Cannot derive identifier for span name: ${name}`)
|
||||
}
|
||||
const ident = parts
|
||||
.map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase())
|
||||
.join('')
|
||||
// Safety: identifiers may not start with a digit.
|
||||
if (/^[0-9]/.test(ident)) {
|
||||
throw new Error(
|
||||
`Derived identifier "${ident}" for span "${name}" starts with a digit`,
|
||||
)
|
||||
}
|
||||
return ident
|
||||
}
|
||||
|
||||
function render(spanNames: string[]): string {
|
||||
const pairs = spanNames.map((name) => ({ name, ident: toIdentifier(name) }))
|
||||
|
||||
// Guard against collisions: if two wire names ever collapse to the
|
||||
// same PascalCase identifier, we want a clear build failure, not a
|
||||
// silent override.
|
||||
const seen = new Map<string, string>()
|
||||
for (const p of pairs) {
|
||||
const prev = seen.get(p.ident)
|
||||
if (prev && prev !== p.name) {
|
||||
throw new Error(
|
||||
`Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`,
|
||||
)
|
||||
}
|
||||
seen.set(p.ident, p.name)
|
||||
}
|
||||
|
||||
const constLines = pairs
|
||||
.map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`)
|
||||
.join('\n')
|
||||
const arrayEntries = spanNames.map((n) => ` ${JSON.stringify(n)},`).join('\n')
|
||||
|
||||
return `// AUTO-GENERATED FILE. DO NOT EDIT.
|
||||
//
|
||||
// Source: copilot/copilot/contracts/trace-spans-v1.schema.json
|
||||
// Regenerate with: bun run trace-spans-contract:generate
|
||||
//
|
||||
// Canonical mothership OTel span names. Call sites should reference
|
||||
// \`TraceSpan.<Identifier>\` (e.g. \`TraceSpan.CopilotVfsReadFile\`)
|
||||
// rather than raw string literals, so the Go-side contract is the
|
||||
// single source of truth and typos become compile errors.
|
||||
|
||||
export const TraceSpan = {
|
||||
${constLines}
|
||||
} as const;
|
||||
|
||||
export type TraceSpanKey = keyof typeof TraceSpan;
|
||||
export type TraceSpanValue = (typeof TraceSpan)[TraceSpanKey];
|
||||
|
||||
/** Readonly sorted list of every canonical span name. */
|
||||
export const TraceSpanValues: readonly TraceSpanValue[] = [
|
||||
${arrayEntries}
|
||||
] as const;
|
||||
`
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const checkOnly = process.argv.includes('--check')
|
||||
const inputArg = process.argv.find((a) => a.startsWith('--input='))
|
||||
const inputPath = inputArg
|
||||
? resolve(ROOT, inputArg.slice('--input='.length))
|
||||
: DEFAULT_CONTRACT_PATH
|
||||
|
||||
const raw = await readFile(inputPath, 'utf8')
|
||||
const schema = JSON.parse(raw)
|
||||
const spanNames = extractSpanNames(schema)
|
||||
const rendered = render(spanNames)
|
||||
|
||||
if (checkOnly) {
|
||||
const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null)
|
||||
if (existing !== rendered) {
|
||||
throw new Error(
|
||||
'Generated trace spans contract is stale. Run: bun run trace-spans-contract:generate',
|
||||
)
|
||||
}
|
||||
console.log('Trace spans contract is up to date.')
|
||||
return
|
||||
}
|
||||
|
||||
await mkdir(dirname(OUTPUT_PATH), { recursive: true })
|
||||
await writeFile(OUTPUT_PATH, rendered, 'utf8')
|
||||
console.log(`Generated trace spans types -> ${OUTPUT_PATH}`)
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
||||
Reference in New Issue
Block a user