mirror of
https://github.com/simstudioai/sim.git
synced 2026-04-06 03:00:16 -04:00
fix(voice): added voice functionality back to chat client (#676)
* fix(voice): added voice functioanlity back to chat clinet * add logic to support deployed chat in staging
This commit is contained in:
@@ -328,124 +328,27 @@ export default function ChatClient({ subdomain }: { subdomain: string }) {
|
||||
throw new Error('Response body is missing')
|
||||
}
|
||||
|
||||
const messageIdMap = new Map<string, string>()
|
||||
// Use the streaming hook with audio support
|
||||
const shouldPlayAudio = isVoiceInput || isVoiceFirstMode
|
||||
const audioHandler = shouldPlayAudio
|
||||
? createAudioStreamHandler(streamTextToAudio, DEFAULT_VOICE_SETTINGS.voiceId)
|
||||
: undefined
|
||||
|
||||
// Get reader with proper cleanup
|
||||
const reader = response.body.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
|
||||
const processStream = async () => {
|
||||
let streamAborted = false
|
||||
|
||||
// Add cleanup handler for abort
|
||||
const cleanup = () => {
|
||||
streamAborted = true
|
||||
try {
|
||||
reader.releaseLock()
|
||||
} catch (error) {
|
||||
// Reader might already be released
|
||||
logger.debug('Reader already released:', error)
|
||||
}
|
||||
setIsLoading(false)
|
||||
await handleStreamedResponse(
|
||||
response,
|
||||
setMessages,
|
||||
setIsLoading,
|
||||
scrollToBottom,
|
||||
userHasScrolled,
|
||||
{
|
||||
voiceSettings: {
|
||||
isVoiceEnabled: shouldPlayAudio,
|
||||
voiceId: DEFAULT_VOICE_SETTINGS.voiceId,
|
||||
autoPlayResponses: shouldPlayAudio,
|
||||
},
|
||||
audioStreamHandler: audioHandler,
|
||||
}
|
||||
|
||||
// Listen for abort events
|
||||
abortController.signal.addEventListener('abort', cleanup)
|
||||
|
||||
try {
|
||||
while (!streamAborted) {
|
||||
const { done, value } = await reader.read()
|
||||
|
||||
if (done) {
|
||||
setIsLoading(false)
|
||||
break
|
||||
}
|
||||
|
||||
if (streamAborted) {
|
||||
break
|
||||
}
|
||||
|
||||
const chunk = decoder.decode(value, { stream: true })
|
||||
const lines = chunk.split('\n\n')
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
try {
|
||||
const json = JSON.parse(line.substring(6))
|
||||
const { blockId, chunk: contentChunk, event: eventType } = json
|
||||
|
||||
if (eventType === 'final' && json.data) {
|
||||
setIsLoading(false)
|
||||
|
||||
// Process final execution result for field extraction
|
||||
const result = json.data
|
||||
const nonStreamingLogs =
|
||||
result.logs?.filter((log: any) => !messageIdMap.has(log.blockId)) || []
|
||||
|
||||
// Chat field extraction will be handled by the backend using deployment outputConfigs
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if (blockId && contentChunk) {
|
||||
if (!messageIdMap.has(blockId)) {
|
||||
const newMessageId = crypto.randomUUID()
|
||||
messageIdMap.set(blockId, newMessageId)
|
||||
setMessages((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id: newMessageId,
|
||||
content: contentChunk,
|
||||
type: 'assistant',
|
||||
timestamp: new Date(),
|
||||
isStreaming: true,
|
||||
},
|
||||
])
|
||||
} else {
|
||||
const messageId = messageIdMap.get(blockId)
|
||||
if (messageId) {
|
||||
setMessages((prev) =>
|
||||
prev.map((msg) =>
|
||||
msg.id === messageId
|
||||
? { ...msg, content: msg.content + contentChunk }
|
||||
: msg
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
} else if (blockId && eventType === 'end') {
|
||||
const messageId = messageIdMap.get(blockId)
|
||||
if (messageId) {
|
||||
setMessages((prev) =>
|
||||
prev.map((msg) =>
|
||||
msg.id === messageId ? { ...msg, isStreaming: false } : msg
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
} catch (parseError) {
|
||||
logger.error('Error parsing stream data:', parseError)
|
||||
// Continue processing other lines even if one fails
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (streamError: unknown) {
|
||||
if (streamError instanceof Error && streamError.name === 'AbortError') {
|
||||
logger.info('Stream processing aborted by user')
|
||||
return
|
||||
}
|
||||
|
||||
logger.error('Error processing stream:', streamError)
|
||||
throw streamError
|
||||
} finally {
|
||||
// Ensure cleanup always happens
|
||||
cleanup()
|
||||
abortController.signal.removeEventListener('abort', cleanup)
|
||||
}
|
||||
}
|
||||
|
||||
await processStream()
|
||||
)
|
||||
} catch (error: any) {
|
||||
// Clear timeout in case of error
|
||||
clearTimeout(timeoutId)
|
||||
|
||||
@@ -75,262 +75,149 @@ export function useChatStreaming() {
|
||||
userHasScrolled?: boolean,
|
||||
streamingOptions?: StreamingOptions
|
||||
) => {
|
||||
const messageId = crypto.randomUUID()
|
||||
|
||||
// Set streaming state before adding the assistant message
|
||||
// Set streaming state
|
||||
setIsStreamingResponse(true)
|
||||
|
||||
// Reset refs
|
||||
accumulatedTextRef.current = ''
|
||||
lastStreamedPositionRef.current = 0
|
||||
lastDisplayedPositionRef.current = 0
|
||||
audioStreamingActiveRef.current = false
|
||||
abortControllerRef.current = new AbortController()
|
||||
|
||||
// Check if we should stream audio
|
||||
const shouldStreamAudio =
|
||||
const shouldPlayAudio =
|
||||
streamingOptions?.voiceSettings?.isVoiceEnabled &&
|
||||
streamingOptions?.voiceSettings?.autoPlayResponses &&
|
||||
streamingOptions?.audioStreamHandler
|
||||
|
||||
// Get voice-first mode settings
|
||||
const voiceFirstMode = streamingOptions?.voiceSettings?.voiceFirstMode
|
||||
const textStreamingMode = streamingOptions?.voiceSettings?.textStreamingInVoiceMode || 'normal'
|
||||
const conversationMode = streamingOptions?.voiceSettings?.conversationMode
|
||||
const reader = response.body?.getReader()
|
||||
if (!reader) {
|
||||
setIsLoading(false)
|
||||
setIsStreamingResponse(false)
|
||||
return
|
||||
}
|
||||
|
||||
// In voice-first mode with hidden text, don't show text at all
|
||||
const shouldShowText = !voiceFirstMode || textStreamingMode !== 'hidden'
|
||||
const decoder = new TextDecoder()
|
||||
let accumulatedText = ''
|
||||
let lastAudioPosition = 0
|
||||
|
||||
// Add placeholder message
|
||||
const messageId = crypto.randomUUID()
|
||||
setMessages((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id: messageId,
|
||||
content: shouldShowText ? '' : '🎵 Generating audio response...',
|
||||
content: '',
|
||||
type: 'assistant',
|
||||
timestamp: new Date(),
|
||||
isStreaming: true,
|
||||
isVoiceOnly: voiceFirstMode && textStreamingMode === 'hidden',
|
||||
},
|
||||
])
|
||||
|
||||
// Stop showing loading indicator once streaming begins
|
||||
setIsLoading(false)
|
||||
|
||||
// Start audio if in voice mode
|
||||
if (shouldStreamAudio) {
|
||||
streamingOptions.onAudioStart?.()
|
||||
audioStreamingActiveRef.current = true
|
||||
}
|
||||
try {
|
||||
while (true) {
|
||||
// Check if aborted
|
||||
if (abortControllerRef.current === null) {
|
||||
break
|
||||
}
|
||||
|
||||
// Helper function to update displayed text based on mode
|
||||
const updateDisplayedText = (fullText: string, audioPosition?: number) => {
|
||||
let displayText = fullText
|
||||
const { done, value } = await reader.read()
|
||||
|
||||
if (voiceFirstMode && textStreamingMode === 'synced') {
|
||||
// Only show text up to where audio has been streamed
|
||||
displayText = fullText.substring(0, audioPosition || lastStreamedPositionRef.current)
|
||||
} else if (voiceFirstMode && textStreamingMode === 'hidden') {
|
||||
// Don't update text content, keep voice indicator
|
||||
return
|
||||
}
|
||||
|
||||
setMessages((prev) =>
|
||||
prev.map((msg) => {
|
||||
if (msg.id === messageId) {
|
||||
return {
|
||||
...msg,
|
||||
content: displayText,
|
||||
if (done) {
|
||||
// Stream any remaining text for TTS
|
||||
if (
|
||||
shouldPlayAudio &&
|
||||
streamingOptions?.audioStreamHandler &&
|
||||
accumulatedText.length > lastAudioPosition
|
||||
) {
|
||||
const remainingText = accumulatedText.substring(lastAudioPosition).trim()
|
||||
if (remainingText) {
|
||||
try {
|
||||
await streamingOptions.audioStreamHandler(remainingText)
|
||||
} catch (error) {
|
||||
logger.error('TTS error for remaining text:', error)
|
||||
}
|
||||
}
|
||||
}
|
||||
return msg
|
||||
})
|
||||
)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// Helper function to clean up after streaming ends (success or error)
|
||||
const cleanupStreaming = (messageContent?: string, appendContent = false) => {
|
||||
// Reset streaming state and controller
|
||||
const chunk = decoder.decode(value, { stream: true })
|
||||
const lines = chunk.split('\n\n')
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('data: ')) {
|
||||
try {
|
||||
const json = JSON.parse(line.substring(6))
|
||||
const { blockId, chunk: contentChunk, event: eventType } = json
|
||||
|
||||
if (eventType === 'final' && json.data) {
|
||||
setMessages((prev) =>
|
||||
prev.map((msg) => (msg.id === messageId ? { ...msg, isStreaming: false } : msg))
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
if (blockId && contentChunk) {
|
||||
accumulatedText += contentChunk
|
||||
setMessages((prev) =>
|
||||
prev.map((msg) =>
|
||||
msg.id === messageId ? { ...msg, content: accumulatedText } : msg
|
||||
)
|
||||
)
|
||||
|
||||
// Real-time TTS for voice mode
|
||||
if (shouldPlayAudio && streamingOptions?.audioStreamHandler) {
|
||||
const newText = accumulatedText.substring(lastAudioPosition)
|
||||
const sentenceEndings = ['. ', '! ', '? ', '.\n', '!\n', '?\n', '.', '!', '?']
|
||||
let sentenceEnd = -1
|
||||
|
||||
for (const ending of sentenceEndings) {
|
||||
const index = newText.indexOf(ending)
|
||||
if (index > 0) {
|
||||
sentenceEnd = index + ending.length
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (sentenceEnd > 0) {
|
||||
const sentence = newText.substring(0, sentenceEnd).trim()
|
||||
if (sentence && sentence.length >= 3) {
|
||||
try {
|
||||
await streamingOptions.audioStreamHandler(sentence)
|
||||
lastAudioPosition += sentenceEnd
|
||||
} catch (error) {
|
||||
logger.error('TTS error:', error)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (blockId && eventType === 'end') {
|
||||
setMessages((prev) =>
|
||||
prev.map((msg) => (msg.id === messageId ? { ...msg, isStreaming: false } : msg))
|
||||
)
|
||||
}
|
||||
} catch (parseError) {
|
||||
logger.error('Error parsing stream data:', parseError)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error processing stream:', error)
|
||||
setMessages((prev) =>
|
||||
prev.map((msg) => (msg.id === messageId ? { ...msg, isStreaming: false } : msg))
|
||||
)
|
||||
} finally {
|
||||
setIsStreamingResponse(false)
|
||||
abortControllerRef.current = null
|
||||
accumulatedTextRef.current = ''
|
||||
lastStreamedPositionRef.current = 0
|
||||
lastDisplayedPositionRef.current = 0
|
||||
audioStreamingActiveRef.current = false
|
||||
|
||||
// Update message content and remove isStreaming flag
|
||||
setMessages((prev) =>
|
||||
prev.map((msg) => {
|
||||
if (msg.id === messageId) {
|
||||
return {
|
||||
...msg,
|
||||
content: appendContent
|
||||
? msg.content + (messageContent || '')
|
||||
: messageContent || msg.content,
|
||||
isStreaming: false,
|
||||
isVoiceOnly: false,
|
||||
}
|
||||
}
|
||||
return msg
|
||||
})
|
||||
)
|
||||
|
||||
// Only scroll to bottom if user hasn't manually scrolled
|
||||
if (!userHasScrolled) {
|
||||
setTimeout(() => {
|
||||
scrollToBottom()
|
||||
}, 300)
|
||||
}
|
||||
|
||||
// End audio streaming
|
||||
if (shouldStreamAudio) {
|
||||
streamingOptions.onAudioEnd?.()
|
||||
if (shouldPlayAudio) {
|
||||
streamingOptions?.onAudioEnd?.()
|
||||
}
|
||||
}
|
||||
|
||||
// Check if response body exists and is a ReadableStream
|
||||
if (!response.body) {
|
||||
cleanupStreaming("Error: Couldn't receive streaming response from server.")
|
||||
return
|
||||
}
|
||||
|
||||
const reader = response.body.getReader()
|
||||
if (reader) {
|
||||
const decoder = new TextDecoder()
|
||||
let done = false
|
||||
|
||||
try {
|
||||
while (!done) {
|
||||
// Check if aborted before awaiting reader.read()
|
||||
if (abortControllerRef.current === null) {
|
||||
break
|
||||
}
|
||||
|
||||
const { value, done: readerDone } = await reader.read()
|
||||
done = readerDone
|
||||
|
||||
if (value) {
|
||||
const chunk = decoder.decode(value, { stream: true })
|
||||
if (chunk) {
|
||||
// Accumulate text
|
||||
accumulatedTextRef.current += chunk
|
||||
|
||||
// Update the message with the accumulated text based on mode
|
||||
if (shouldShowText) {
|
||||
updateDisplayedText(accumulatedTextRef.current)
|
||||
}
|
||||
|
||||
// Stream audio in real-time for meaningful sentences
|
||||
if (
|
||||
shouldStreamAudio &&
|
||||
streamingOptions.audioStreamHandler &&
|
||||
audioStreamingActiveRef.current
|
||||
) {
|
||||
const newText = accumulatedTextRef.current.substring(
|
||||
lastStreamedPositionRef.current
|
||||
)
|
||||
|
||||
// Use sentence-based streaming for natural audio flow
|
||||
const sentenceEndings = ['. ', '! ', '? ', '.\n', '!\n', '?\n', '.', '!', '?']
|
||||
let sentenceEnd = -1
|
||||
|
||||
// Find the first complete sentence
|
||||
for (const ending of sentenceEndings) {
|
||||
const index = newText.indexOf(ending)
|
||||
if (index > 0) {
|
||||
// Make sure we include the punctuation
|
||||
sentenceEnd = index + ending.length
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If we found a complete sentence, stream it
|
||||
if (sentenceEnd > 0) {
|
||||
const sentence = newText.substring(0, sentenceEnd).trim()
|
||||
if (sentence && sentence.length >= 3) {
|
||||
// Only send meaningful sentences
|
||||
try {
|
||||
// Stream this sentence to audio
|
||||
await streamingOptions.audioStreamHandler(sentence)
|
||||
lastStreamedPositionRef.current += sentenceEnd
|
||||
|
||||
// Update displayed text in synced mode
|
||||
if (voiceFirstMode && textStreamingMode === 'synced') {
|
||||
updateDisplayedText(
|
||||
accumulatedTextRef.current,
|
||||
lastStreamedPositionRef.current
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error streaming audio sentence:', error)
|
||||
// Don't stop on individual sentence errors, but log them
|
||||
if (error instanceof Error && error.message.includes('401')) {
|
||||
logger.warn('TTS authentication error, stopping audio streaming')
|
||||
audioStreamingActiveRef.current = false
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (newText.length > 200 && done) {
|
||||
// If streaming has ended and we have a long incomplete sentence, stream it anyway
|
||||
const incompleteSentence = newText.trim()
|
||||
if (incompleteSentence && incompleteSentence.length >= 10) {
|
||||
try {
|
||||
await streamingOptions.audioStreamHandler(incompleteSentence)
|
||||
lastStreamedPositionRef.current += newText.length
|
||||
|
||||
if (voiceFirstMode && textStreamingMode === 'synced') {
|
||||
updateDisplayedText(
|
||||
accumulatedTextRef.current,
|
||||
lastStreamedPositionRef.current
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error streaming incomplete sentence:', error)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle any remaining text for audio streaming when streaming completes
|
||||
if (
|
||||
shouldStreamAudio &&
|
||||
streamingOptions.audioStreamHandler &&
|
||||
audioStreamingActiveRef.current
|
||||
) {
|
||||
const remainingText = accumulatedTextRef.current
|
||||
.substring(lastStreamedPositionRef.current)
|
||||
.trim()
|
||||
if (remainingText && remainingText.length >= 3) {
|
||||
try {
|
||||
await streamingOptions.audioStreamHandler(remainingText)
|
||||
|
||||
// Final update for synced mode
|
||||
if (voiceFirstMode && textStreamingMode === 'synced') {
|
||||
updateDisplayedText(accumulatedTextRef.current, accumulatedTextRef.current.length)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error streaming final remaining text:', error)
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Show error to user in the message
|
||||
const errorMessage =
|
||||
error instanceof Error ? error.message : 'Unknown error during streaming'
|
||||
logger.error('Error reading stream:', error)
|
||||
cleanupStreaming(`\n\n_Error: ${errorMessage}_`, true)
|
||||
return // Skip the finally block's cleanupStreaming call
|
||||
} finally {
|
||||
// Don't call cleanupStreaming here if we already called it in the catch block
|
||||
if (abortControllerRef.current !== null) {
|
||||
cleanupStreaming()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cleanupStreaming("Error: Couldn't process streaming response.")
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
|
||||
@@ -27,16 +27,46 @@ export async function middleware(request: NextRequest) {
|
||||
const url = request.nextUrl
|
||||
const hostname = request.headers.get('host') || ''
|
||||
|
||||
// Extract subdomain
|
||||
const isCustomDomain =
|
||||
hostname !== BASE_DOMAIN &&
|
||||
!hostname.startsWith('www.') &&
|
||||
hostname.includes(isDevelopment ? 'localhost' : 'simstudio.ai')
|
||||
// Extract subdomain - handle nested subdomains for any domain
|
||||
const isCustomDomain = (() => {
|
||||
// Standard check for non-base domains
|
||||
if (hostname === BASE_DOMAIN || hostname.startsWith('www.')) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Extract root domain from BASE_DOMAIN (e.g., "simstudio.ai" from "staging.simstudio.ai")
|
||||
const baseParts = BASE_DOMAIN.split('.')
|
||||
const rootDomain = isDevelopment
|
||||
? 'localhost'
|
||||
: baseParts.length >= 2
|
||||
? baseParts
|
||||
.slice(-2)
|
||||
.join('.') // Last 2 parts: ["simstudio", "ai"] -> "simstudio.ai"
|
||||
: BASE_DOMAIN
|
||||
|
||||
// Check if hostname is under the same root domain
|
||||
if (!hostname.includes(rootDomain)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// For nested subdomain environments: handle cases like myapp.staging.example.com
|
||||
const hostParts = hostname.split('.')
|
||||
const basePartCount = BASE_DOMAIN.split('.').length
|
||||
|
||||
// If hostname has more parts than base domain, it's a nested subdomain
|
||||
if (hostParts.length > basePartCount) {
|
||||
return true
|
||||
}
|
||||
|
||||
// For single-level subdomains: regular subdomain logic
|
||||
return hostname !== BASE_DOMAIN
|
||||
})()
|
||||
|
||||
const subdomain = isCustomDomain ? hostname.split('.')[0] : null
|
||||
|
||||
// Handle chat subdomains
|
||||
if (subdomain && isCustomDomain) {
|
||||
if (url.pathname.startsWith('/api/chat/')) {
|
||||
if (url.pathname.startsWith('/api/chat/') || url.pathname.startsWith('/api/proxy/')) {
|
||||
return NextResponse.next()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user