mirror of
https://github.com/danielmiessler/Fabric.git
synced 2026-02-12 15:05:10 -05:00
## CHANGES - Remove redundant byte conversion from audio data extraction - Extract audio data as string before converting once - Simplify audio data processing in chat handler - Fix potential data corruption in audio output
128 lines
4.1 KiB
Go
128 lines
4.1 KiB
Go
package cli
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/danielmiessler/fabric/internal/core"
|
|
"github.com/danielmiessler/fabric/internal/domain"
|
|
"github.com/danielmiessler/fabric/internal/plugins/db/fsdb"
|
|
)
|
|
|
|
// handleChatProcessing handles the main chat processing logic
|
|
func handleChatProcessing(currentFlags *Flags, registry *core.PluginRegistry, messageTools string) (err error) {
|
|
if messageTools != "" {
|
|
currentFlags.AppendMessage(messageTools)
|
|
}
|
|
|
|
var chatter *core.Chatter
|
|
if chatter, err = registry.GetChatter(currentFlags.Model, currentFlags.ModelContextLength,
|
|
currentFlags.Strategy, currentFlags.Stream, currentFlags.DryRun); err != nil {
|
|
return
|
|
}
|
|
|
|
var session *fsdb.Session
|
|
var chatReq *domain.ChatRequest
|
|
if chatReq, err = currentFlags.BuildChatRequest(strings.Join(os.Args[1:], " ")); err != nil {
|
|
return
|
|
}
|
|
|
|
if chatReq.Language == "" {
|
|
chatReq.Language = registry.Language.DefaultLanguage.Value
|
|
}
|
|
var chatOptions *domain.ChatOptions
|
|
if chatOptions, err = currentFlags.BuildChatOptions(); err != nil {
|
|
return
|
|
}
|
|
|
|
// Check if user is requesting audio output or using a TTS model
|
|
isAudioOutput := currentFlags.Output != "" && IsAudioFormat(currentFlags.Output)
|
|
isTTSModel := isTTSModel(currentFlags.Model)
|
|
|
|
if isTTSModel && !isAudioOutput {
|
|
err = fmt.Errorf("TTS model '%s' requires audio output. Please specify an audio output file with -o flag (e.g., -o output.wav)", currentFlags.Model)
|
|
return
|
|
}
|
|
|
|
if isAudioOutput && !isTTSModel {
|
|
err = fmt.Errorf("audio output file '%s' specified but model '%s' is not a TTS model. Please use a TTS model like gemini-2.5-flash-preview-tts", currentFlags.Output, currentFlags.Model)
|
|
return
|
|
}
|
|
|
|
// For TTS models, check if output file already exists BEFORE processing
|
|
if isTTSModel && isAudioOutput {
|
|
outputFile := currentFlags.Output
|
|
// Add .wav extension if not provided
|
|
if filepath.Ext(outputFile) == "" {
|
|
outputFile += ".wav"
|
|
}
|
|
if _, err = os.Stat(outputFile); err == nil {
|
|
err = fmt.Errorf("file %s already exists. Please choose a different filename or remove the existing file", outputFile)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Set audio options in chat config
|
|
chatOptions.AudioOutput = isAudioOutput
|
|
if isAudioOutput {
|
|
chatOptions.AudioFormat = "wav" // Default to WAV format
|
|
}
|
|
|
|
if session, err = chatter.Send(chatReq, chatOptions); err != nil {
|
|
return
|
|
}
|
|
|
|
result := session.GetLastMessage().Content
|
|
|
|
if !currentFlags.Stream || currentFlags.SuppressThink {
|
|
// For TTS models with audio output, show a user-friendly message instead of raw data
|
|
if isTTSModel && isAudioOutput && strings.HasPrefix(result, "FABRIC_AUDIO_DATA:") {
|
|
fmt.Printf("TTS audio generated successfully and saved to: %s\n", currentFlags.Output)
|
|
} else {
|
|
// print the result if it was not streamed already or suppress-think disabled streaming output
|
|
fmt.Println(result)
|
|
}
|
|
}
|
|
|
|
// if the copy flag is set, copy the message to the clipboard
|
|
if currentFlags.Copy {
|
|
if err = CopyToClipboard(result); err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
// if the output flag is set, create an output file
|
|
if currentFlags.Output != "" {
|
|
if currentFlags.OutputSession {
|
|
sessionAsString := session.String()
|
|
err = CreateOutputFile(sessionAsString, currentFlags.Output)
|
|
} else {
|
|
// For TTS models, we need to handle audio output differently
|
|
if isTTSModel && isAudioOutput {
|
|
// Check if result contains actual audio data
|
|
if strings.HasPrefix(result, "FABRIC_AUDIO_DATA:") {
|
|
// Extract the binary audio data
|
|
audioData := result[len("FABRIC_AUDIO_DATA:"):]
|
|
err = CreateAudioOutputFile([]byte(audioData), currentFlags.Output)
|
|
} else {
|
|
// Fallback for any error messages or unexpected responses
|
|
err = CreateOutputFile(result, currentFlags.Output)
|
|
}
|
|
} else {
|
|
err = CreateOutputFile(result, currentFlags.Output)
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// isTTSModel checks if the model is a text-to-speech model
|
|
func isTTSModel(modelName string) bool {
|
|
lowerModel := strings.ToLower(modelName)
|
|
return strings.Contains(lowerModel, "tts") ||
|
|
strings.Contains(lowerModel, "preview-tts") ||
|
|
strings.Contains(lowerModel, "text-to-speech")
|
|
}
|