fix(home): voice input text persistence bugs (#3737)

* fix(home): voice input text persistence bugs

* fix(home): gate setIsListening on startRecognition success

* fix(home): handle startRecognition failure in restartRecognition

* fix(home): reset speech prefix on submit while mic is active
This commit is contained in:
Waleed
2026-03-24 09:55:13 -07:00
committed by GitHub
parent a783b9d4ce
commit 83eb3ed211

View File

@@ -106,6 +106,7 @@ const SEND_BUTTON_ACTIVE =
const SEND_BUTTON_DISABLED = 'bg-[var(--c-808080)] dark:bg-[var(--c-808080)]'
const MAX_CHAT_TEXTAREA_HEIGHT = 200
const SPEECH_RECOGNITION_LANG = 'en-US'
const DROP_OVERLAY_ICONS = [
PdfIcon,
@@ -267,6 +268,7 @@ export function UserInput({
const [isListening, setIsListening] = useState(false)
const recognitionRef = useRef<SpeechRecognitionInstance | null>(null)
const prefixRef = useRef('')
const valueRef = useRef(value)
useEffect(() => {
return () => {
@@ -274,6 +276,10 @@ export function UserInput({
}
}, [])
useEffect(() => {
valueRef.current = value
}, [value])
const textareaRef = mentionMenu.textareaRef
const wasSendingRef = useRef(false)
const atInsertPosRef = useRef<number | null>(null)
@@ -390,6 +396,84 @@ export function UserInput({
[textareaRef]
)
const startRecognition = useCallback((): boolean => {
const w = window as WindowWithSpeech
const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition
if (!SpeechRecognitionAPI) return false
const recognition = new SpeechRecognitionAPI()
recognition.continuous = true
recognition.interimResults = true
recognition.lang = SPEECH_RECOGNITION_LANG
recognition.onresult = (event: SpeechRecognitionEvent) => {
let transcript = ''
for (let i = 0; i < event.results.length; i++) {
transcript += event.results[i][0].transcript
}
const prefix = prefixRef.current
const newVal = prefix ? `${prefix} ${transcript}` : transcript
setValue(newVal)
valueRef.current = newVal
}
recognition.onend = () => {
if (recognitionRef.current === recognition) {
prefixRef.current = valueRef.current
try {
recognition.start()
} catch {
recognitionRef.current = null
setIsListening(false)
}
}
}
recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
if (recognitionRef.current !== recognition) return
if (e.error === 'aborted' || e.error === 'not-allowed') {
recognitionRef.current = null
setIsListening(false)
}
}
recognitionRef.current = recognition
try {
recognition.start()
return true
} catch {
recognitionRef.current = null
return false
}
}, [])
const restartRecognition = useCallback(
(newPrefix: string) => {
if (!recognitionRef.current) return
prefixRef.current = newPrefix
recognitionRef.current.abort()
recognitionRef.current = null
if (!startRecognition()) {
setIsListening(false)
}
},
[startRecognition]
)
const toggleListening = useCallback(() => {
if (isListening) {
recognitionRef.current?.stop()
recognitionRef.current = null
setIsListening(false)
return
}
prefixRef.current = value
if (startRecognition()) {
setIsListening(true)
}
}, [isListening, value, startRecognition])
const handleSubmit = useCallback(() => {
const fileAttachmentsForApi: FileAttachmentForApi[] = files.attachedFiles
.filter((f) => !f.uploading && f.key)
@@ -407,13 +491,14 @@ export function UserInput({
contextManagement.selectedContexts.length > 0 ? contextManagement.selectedContexts : undefined
)
setValue('')
restartRecognition('')
files.clearAttachedFiles()
contextManagement.clearContexts()
if (textareaRef.current) {
textareaRef.current.style.height = 'auto'
}
}, [onSubmit, files, value, contextManagement, textareaRef])
}, [onSubmit, files, value, contextManagement, textareaRef, restartRecognition])
const handleKeyDown = useCallback(
(e: React.KeyboardEvent<HTMLTextAreaElement>) => {
@@ -488,27 +573,33 @@ export function UserInput({
[handleSubmit, mentionTokensWithContext, value, textareaRef]
)
const handleInputChange = useCallback((e: React.ChangeEvent<HTMLTextAreaElement>) => {
const newValue = e.target.value
const caret = e.target.selectionStart ?? newValue.length
const handleInputChange = useCallback(
(e: React.ChangeEvent<HTMLTextAreaElement>) => {
const newValue = e.target.value
const caret = e.target.selectionStart ?? newValue.length
if (
caret > 0 &&
newValue.charAt(caret - 1) === '@' &&
(caret === 1 || /\s/.test(newValue.charAt(caret - 2)))
) {
const before = newValue.slice(0, caret - 1)
const after = newValue.slice(caret)
setValue(`${before}${after}`)
atInsertPosRef.current = caret - 1
setPlusMenuOpen(true)
setPlusMenuSearch('')
setPlusMenuActiveIndex(0)
return
}
if (
caret > 0 &&
newValue.charAt(caret - 1) === '@' &&
(caret === 1 || /\s/.test(newValue.charAt(caret - 2)))
) {
const before = newValue.slice(0, caret - 1)
const after = newValue.slice(caret)
const adjusted = `${before}${after}`
setValue(adjusted)
atInsertPosRef.current = caret - 1
setPlusMenuOpen(true)
setPlusMenuSearch('')
setPlusMenuActiveIndex(0)
restartRecognition(adjusted)
return
}
setValue(newValue)
}, [])
setValue(newValue)
restartRecognition(newValue)
},
[restartRecognition]
)
const handleSelectAdjust = useCallback(() => {
const textarea = textareaRef.current
@@ -536,56 +627,6 @@ export function UserInput({
[isInitialView]
)
const toggleListening = useCallback(() => {
if (isListening) {
recognitionRef.current?.stop()
recognitionRef.current = null
setIsListening(false)
return
}
const w = window as WindowWithSpeech
const SpeechRecognitionAPI = w.SpeechRecognition || w.webkitSpeechRecognition
if (!SpeechRecognitionAPI) return
prefixRef.current = value
const recognition = new SpeechRecognitionAPI()
recognition.continuous = true
recognition.interimResults = true
recognition.lang = 'en-US'
recognition.onresult = (event: SpeechRecognitionEvent) => {
let transcript = ''
for (let i = 0; i < event.results.length; i++) {
transcript += event.results[i][0].transcript
}
const prefix = prefixRef.current
setValue(prefix ? `${prefix} ${transcript}` : transcript)
}
recognition.onend = () => {
if (recognitionRef.current === recognition) {
try {
recognition.start()
} catch {
recognitionRef.current = null
setIsListening(false)
}
}
}
recognition.onerror = (e: SpeechRecognitionErrorEvent) => {
if (e.error === 'aborted' || e.error === 'not-allowed') {
recognitionRef.current = null
setIsListening(false)
}
}
recognitionRef.current = recognition
recognition.start()
setIsListening(true)
}, [isListening, value])
const renderOverlayContent = useCallback(() => {
const contexts = contextManagement.selectedContexts