fix(wand): remove duplicate transfer encoding header meant to be set by nginx proxy (#1221)

This commit is contained in:
Waleed
2025-09-02 09:15:25 -07:00
committed by GitHub
parent 2c2c32c64b
commit 54d42b33eb

View File

@@ -49,7 +49,6 @@ interface RequestBody {
history?: ChatMessage[]
}
// Helper: safe stringify for error payloads that may include circular structures
function safeStringify(value: unknown): string {
try {
return JSON.stringify(value)
@@ -83,18 +82,14 @@ export async function POST(req: NextRequest) {
)
}
// Use provided system prompt or default
const finalSystemPrompt =
systemPrompt ||
'You are a helpful AI assistant. Generate content exactly as requested by the user.'
// Prepare messages for OpenAI API
const messages: ChatMessage[] = [{ role: 'system', content: finalSystemPrompt }]
// Add previous messages from history
messages.push(...history.filter((msg) => msg.role !== 'system'))
// Add the current user prompt
messages.push({ role: 'user', content: prompt })
logger.debug(
@@ -108,7 +103,6 @@ export async function POST(req: NextRequest) {
}
)
// For streaming responses
if (stream) {
try {
logger.debug(
@@ -119,7 +113,6 @@ export async function POST(req: NextRequest) {
`[${requestId}] About to create stream with model: ${useWandAzure ? wandModelName : 'gpt-4o'}`
)
// Use native fetch for streaming to avoid OpenAI SDK issues with Node.js runtime
const apiUrl = useWandAzure
? `${azureEndpoint}/openai/deployments/${wandModelName}/chat/completions?api-version=${azureApiVersion}`
: 'https://api.openai.com/v1/chat/completions'
@@ -161,7 +154,6 @@ export async function POST(req: NextRequest) {
logger.info(`[${requestId}] Stream response received, starting processing`)
// Create a TransformStream to process the SSE data
const encoder = new TextEncoder()
const decoder = new TextDecoder()
@@ -187,12 +179,10 @@ export async function POST(req: NextRequest) {
break
}
// Decode the chunk
buffer += decoder.decode(value, { stream: true })
// Process complete SSE messages
const lines = buffer.split('\n')
buffer = lines.pop() || '' // Keep incomplete line in buffer
buffer = lines.pop() || ''
for (const line of lines) {
if (line.startsWith('data: ')) {
@@ -217,25 +207,21 @@ export async function POST(req: NextRequest) {
logger.info(`[${requestId}] Received first content chunk`)
}
// Forward the content
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ chunk: content })}\n\n`)
)
}
// Log usage if present
if (parsed.usage) {
logger.info(
`[${requestId}] Received usage data: ${JSON.stringify(parsed.usage)}`
)
}
// Log progress periodically
if (chunkCount % 10 === 0) {
logger.debug(`[${requestId}] Processed ${chunkCount} chunks`)
}
} catch (parseError) {
// Skip invalid JSON lines
logger.debug(
`[${requestId}] Skipped non-JSON line: ${data.substring(0, 100)}`
)
@@ -252,7 +238,6 @@ export async function POST(req: NextRequest) {
stack: streamError?.stack,
})
// Send error to client
const errorData = `data: ${JSON.stringify({ error: 'Streaming failed', done: true })}\n\n`
controller.enqueue(encoder.encode(errorData))
controller.close()
@@ -262,14 +247,12 @@ export async function POST(req: NextRequest) {
},
})
// Return Response with proper headers for Node.js runtime
return new Response(readable, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache, no-transform',
Connection: 'keep-alive',
'X-Accel-Buffering': 'no', // Disable Nginx buffering
'Transfer-Encoding': 'chunked', // Important for Node.js runtime
'X-Accel-Buffering': 'no',
},
})
} catch (error: any) {
@@ -294,7 +277,6 @@ export async function POST(req: NextRequest) {
}
}
// For non-streaming responses
const completion = await client.chat.completions.create({
model: useWandAzure ? wandModelName : 'gpt-4o',
messages: messages,