diff --git a/.vscode/settings.json b/.vscode/settings.json index 64e5010a..ac137d59 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,23 +1,32 @@ { "cSpell.words": [ + "Achird", "addextension", "adduser", "AIML", "anthropics", + "Aoede", "atotto", + "Autonoe", "badfile", "Behrens", "blindspots", "Bombal", + "Callirhoe", + "Callirrhoe", "Cerebras", + "compadd", + "compdef", "compinit", "creatordate", + "curcontext", "custompatterns", "danielmiessler", "davidanson", "Debugf", "dedup", "deepseek", + "Despina", "direnv", "dryrun", "dsrp", @@ -25,6 +34,7 @@ "Eisler", "elif", "envrc", + "Erinome", "Errorf", "eugeis", "Eugen", @@ -66,6 +76,7 @@ "Kore", "ksylvan", "Langdock", + "Laomedeia", "ldflags", "libexec", "listcontexts", @@ -89,6 +100,7 @@ "openaiapi", "opencode", "openrouter", + "Orus", "otiai", "pdflatex", "pipx", @@ -97,11 +109,14 @@ "presencepenalty", "printcontext", "printsession", + "Pulcherrima", "pycache", "pyperclip", "readystream", "restapi", "rmextension", + "Sadachbia", + "Sadaltager", "samber", "sashabaranov", "sdist", @@ -112,6 +127,7 @@ "Streamlit", "stretchr", "subchunk", + "Sulafat", "talkpanel", "Telos", "testpattern", diff --git a/README.md b/README.md index 30b0bb2e..65dd7941 100644 --- a/README.md +++ b/README.md @@ -548,6 +548,9 @@ Application Options: --think-start-tag= Start tag for thinking sections (default: ) --think-end-tag= End tag for thinking sections (default: ) --disable-responses-api Disable OpenAI Responses API (default: false) + --voice= TTS voice name for supported models (e.g., Kore, Charon, Puck) + (default: Kore) + --list-gemini-voices List all available Gemini TTS voices Help Options: -h, --help Show this help message diff --git a/completions/_fabric b/completions/_fabric index 7659bb05..e92fec7e 100644 --- a/completions/_fabric +++ b/completions/_fabric @@ -14,16 +14,19 @@ _fabric_models() { models=(${(f)"$(fabric --listmodels --shell-complete-list 2>/dev/null)"}) compadd -X "Models:" ${models} } + _fabric_contexts() { local -a contexts contexts=(${(f)"$(fabric --listcontexts --shell-complete-list 2>/dev/null)"}) compadd -X "Contexts:" ${contexts} } + _fabric_sessions() { local -a sessions sessions=(${(f)"$(fabric --listsessions --shell-complete-list 2>/dev/null)"}) compadd -X "Sessions:" ${sessions} } + _fabric_strategies() { local -a strategies strategies=(${(f)"$(fabric --liststrategies --shell-complete-list 2>/dev/null)"}) @@ -34,14 +37,12 @@ _fabric_extensions() { local -a extensions extensions=(${(f)"$(fabric --listextensions --shell-complete-list 2>/dev/null)"}) compadd -X "Extensions:" ${extensions} - '(-L --listmodels)'{-L,--listmodels}'[List all available models]:list models:_fabric_models' \ - '(-x --listcontexts)'{-x,--listcontexts}'[List all contexts]:list contexts:_fabric_contexts' \ - '(-X --listsessions)'{-X,--listsessions}'[List all sessions]:list sessions:_fabric_sessions' \ - '(--listextensions)--listextensions[List all registered extensions]' \ - '(--liststrategies)--liststrategies[List all strategies]:list strategies:_fabric_strategies' \ - '(--listvendors)--listvendors[List all vendors]' \ - vendors=(${(f)"$(fabric --listvendors 2>/dev/null)"}) - compadd -X "Vendors:" ${vendors} +} + +_fabric_gemini_voices() { + local -a voices + voices=(${(f)"$(fabric --list-gemini-voices --shell-complete-list 2>/dev/null)"}) + compadd -X "Gemini TTS Voices:" ${voices} } _fabric() { @@ -109,6 +110,8 @@ _fabric() { '(--strategy)--strategy[Choose a strategy from the available strategies]:strategy:_fabric_strategies' \ '(--liststrategies)--liststrategies[List all strategies]' \ '(--listvendors)--listvendors[List all vendors]' \ + '(--voice)--voice[TTS voice name for supported models]:voice:_fabric_gemini_voices' \ + '(--list-gemini-voices)--list-gemini-voices[List all available Gemini TTS voices]' \ '(--shell-complete-list)--shell-complete-list[Output raw list without headers/formatting (for shell completion)]' \ '(--suppress-think)--suppress-think[Suppress text enclosed in thinking tags]' \ '(--think-start-tag)--think-start-tag[Start tag for thinking sections (default: )]:start tag:' \ @@ -119,4 +122,3 @@ _fabric() { } _fabric "$@" - diff --git a/completions/fabric.bash b/completions/fabric.bash index ca47e804..1acbc8db 100644 --- a/completions/fabric.bash +++ b/completions/fabric.bash @@ -13,7 +13,7 @@ _fabric() { _get_comp_words_by_ref -n : cur prev words cword # Define all possible options/flags - local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h" + local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h" # Helper function for dynamic completions _fabric_get_list() { @@ -62,6 +62,10 @@ _fabric() { COMPREPLY=($(compgen -W "$(_fabric_get_list --liststrategies)" -- "${cur}")) return 0 ;; + --voice) + COMPREPLY=($(compgen -W "$(_fabric_get_list --list-gemini-voices)" -- "${cur}")) + return 0 + ;; # Options requiring file/directory paths -a | --attachment | -o | --output | --config | --addextension | --image-file) _filedir diff --git a/completions/fabric.fish b/completions/fabric.fish index 7e464ba7..24ae5afc 100755 --- a/completions/fabric.fish +++ b/completions/fabric.fish @@ -31,6 +31,10 @@ function __fabric_get_extensions fabric --listextensions --shell-complete-list 2>/dev/null end +function __fabric_get_gemini_voices + fabric --list-gemini-voices --shell-complete-list 2>/dev/null +end + # Main completion function complete -c fabric -f @@ -71,6 +75,7 @@ complete -c fabric -l rmextension -d "Remove a registered extension by name" -a complete -c fabric -l strategy -d "Choose a strategy from the available strategies" -a "(__fabric_get_strategies)" complete -c fabric -l think-start-tag -d "Start tag for thinking sections (default: )" complete -c fabric -l think-end-tag -d "End tag for thinking sections (default: )" +complete -c fabric -l voice -d "TTS voice name for supported models (e.g., Kore, Charon, Puck)" -a "(__fabric_get_gemini_voices)" # Boolean flags (no arguments) complete -c fabric -s S -l setup -d "Run setup for all reconfigurable parts of fabric" @@ -99,6 +104,7 @@ complete -c fabric -l version -d "Print current version" complete -c fabric -l listextensions -d "List all registered extensions" complete -c fabric -l liststrategies -d "List all strategies" complete -c fabric -l listvendors -d "List all vendors" +complete -c fabric -l list-gemini-voices -d "List all available Gemini TTS voices" complete -c fabric -l shell-complete-list -d "Output raw list without headers/formatting (for shell completion)" complete -c fabric -l suppress-think -d "Suppress text enclosed in thinking tags" complete -c fabric -l disable-responses-api -d "Disable OpenAI Responses API (default: false)" diff --git a/docs/Gemini-TTS.md b/docs/Gemini-TTS.md new file mode 100644 index 00000000..bb5de2eb --- /dev/null +++ b/docs/Gemini-TTS.md @@ -0,0 +1,155 @@ +# Gemini Text-to-Speech (TTS) Guide + +Fabric supports Google Gemini's text-to-speech (TTS) capabilities, allowing you to convert text into high-quality audio using various AI-generated voices. + +## Overview + +The Gemini TTS feature in Fabric allows you to: + +- Convert text input into audio using Google's Gemini TTS models +- Choose from 30+ different AI voices with varying characteristics +- Generate high-quality WAV audio files +- Integrate TTS generation into your existing Fabric workflows + +## Usage + +### Basic TTS Generation + +To generate audio from text using TTS: + +```bash +# Basic TTS with default voice (Kore) +echo "Hello, this is a test of Gemini TTS" | fabric -m gemini-2.0-flash-tts -o output.wav + +# Using a specific voice +echo "Hello, this is a test with the Charon voice" | fabric -m gemini-2.0-flash-tts --voice Charon -o output.wav + +# Using TTS with a pattern +fabric -p summarize --voice Puck -m gemini-2.0-flash-tts -o summary.wav < document.txt +``` + +### Voice Selection + +Use the `--voice` flag to specify which voice to use for TTS generation: + +```bash +fabric -m gemini-2.0-flash-tts --voice Zephyr -o output.wav "Your text here" +``` + +If no voice is specified, the default voice "Kore" will be used. + +## Available Voices + +Gemini TTS supports 30+ different voices, each with unique characteristics: + +### Popular Voices + +- **Kore** - Firm and confident (default) +- **Charon** - Informative and clear +- **Puck** - Upbeat and energetic +- **Zephyr** - Bright and cheerful +- **Leda** - Youthful and energetic +- **Aoede** - Breezy and natural + +### Complete Voice List + +- Kore, Charon, Puck, Fenrir, Aoede, Leda, Orus, Zephyr +- Autonoe, Callirhoe, Despina, Erinome, Gacrux, Laomedeia +- Pulcherrima, Sulafat, Vindemiatrix, Achernar, Achird +- Algenib, Algieba, Alnilam, Enceladus, Iapetus, Rasalgethi +- Sadachbia, Zubenelgenubi, Vega, Capella, Lyra + +### Listing Available Voices + +To see all available voices with descriptions: + +```bash +# List all voices with characteristics +fabric --list-gemini-voices + +# List voice names only (for shell completion) +fabric --list-gemini-voices --shell-complete-list +``` + +## Rate Limits + +Google Gemini TTS has usage quotas that vary by plan: + +### Free Tier + +- **15 requests per day** per project per TTS model +- Quota resets daily +- Applies to all TTS models (e.g., `gemini-2.5-flash-preview-tts`) + +### Rate Limit Errors + +If you exceed your quota, you'll see an error like: + +```text +Error 429: You exceeded your current quota, please check your plan and billing details +``` + +**Solutions:** + +- Wait for daily quota reset (typically at midnight UTC) +- Upgrade to a paid plan for higher limits +- Use TTS generation strategically for important content + +For current rate limits and pricing, visit: + +## Configuration + +### Command Line Options + +- `--voice ` - Specify the TTS voice to use +- `-o ` - Output audio file (required for TTS models) +- `-m ` - Specify a TTS-capable model (e.g., `gemini-2.0-flash-tts`) + +### YAML Configuration + +You can also set a default voice in your Fabric configuration file (`~/.config/fabric/config.yaml`): + +```yaml +voice: "Charon" # Set your preferred default voice +``` + +## Requirements + +- Valid Google Gemini API key configured in Fabric +- TTS-capable Gemini model (models containing "tts" in the name) +- Audio output must be specified with `-o filename.wav` + +## Troubleshooting + +### Common Issues + +#### Error: "TTS model requires audio output" + +- Solution: Always specify an output file with `-o filename.wav` when using TTS models + +#### Error: "Invalid voice 'X'" + +- Solution: Check that the voice name is spelled correctly and matches one of the supported voices listed above + +#### Error: "TTS generation failed" + +- Solution: Verify your Gemini API key is valid and you have sufficient quota + +### Getting Help + +For additional help with TTS features: + +```bash +fabric --help +``` + +## Technical Details + +- **Audio Format**: WAV files with 24kHz sample rate, 16-bit depth, mono channel +- **Language Support**: Automatic language detection for 24+ languages +- **Model Requirements**: Models must contain "tts", "preview-tts", or "text-to-speech" in the name +- **Voice Selection**: Uses Google's PrebuiltVoiceConfig system for consistent voice quality + +--- + +For more information about Fabric, visit the [main documentation](../README.md). diff --git a/docs/voices/README.md b/docs/voices/README.md new file mode 100644 index 00000000..b6626243 --- /dev/null +++ b/docs/voices/README.md @@ -0,0 +1,36 @@ +# Voice Samples + +This directory contains sample audio files demonstrating different Gemini TTS voices. + +## Sample Files + +Each voice sample says "The quick brown fox jumped over the lazy dog" to demonstrate the voice characteristics: + +- **Kore.wav** - Firm and confident (default voice) +- **Charon.wav** - Informative and clear +- **Vega.wav** - Smooth and pleasant +- **Capella.wav** - Warm and welcoming +- **Achird.wav** - Friendly and approachable +- **Lyra.wav** - Melodic and expressive + +## Generating Samples + +To generate these samples, use the following commands: + +```bash +# Generate each voice sample +echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Kore -o docs/voices/Kore.wav +echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Charon -o docs/voices/Charon.wav +echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Vega -o docs/voices/Vega.wav +echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Capella -o docs/voices/Capella.wav +echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Achird -o docs/voices/Achird.wav +echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Lyra -o docs/voices/Lyra.wav +``` + +## Audio Format + +- **Format**: WAV (uncompressed) +- **Sample Rate**: 24kHz +- **Bit Depth**: 16-bit +- **Channels**: Mono +- **Approximate Size**: ~500KB per sample diff --git a/internal/cli/flags.go b/internal/cli/flags.go index 9735bab5..76719d72 100644 --- a/internal/cli/flags.go +++ b/internal/cli/flags.go @@ -87,6 +87,8 @@ type Flags struct { ThinkStartTag string `long:"think-start-tag" yaml:"thinkStartTag" description:"Start tag for thinking sections" default:""` ThinkEndTag string `long:"think-end-tag" yaml:"thinkEndTag" description:"End tag for thinking sections" default:""` DisableResponsesAPI bool `long:"disable-responses-api" yaml:"disableResponsesAPI" description:"Disable OpenAI Responses API (default: false)"` + Voice string `long:"voice" yaml:"voice" description:"TTS voice name for supported models (e.g., Kore, Charon, Puck)" default:"Kore"` + ListGeminiVoices bool `long:"list-gemini-voices" description:"List all available Gemini TTS voices"` } var debug = false @@ -441,6 +443,7 @@ func (o *Flags) BuildChatOptions() (ret *domain.ChatOptions, err error) { SuppressThink: o.SuppressThink, ThinkStartTag: startTag, ThinkEndTag: endTag, + Voice: o.Voice, } return } diff --git a/internal/cli/listing.go b/internal/cli/listing.go index ce279f5b..2bf923f1 100644 --- a/internal/cli/listing.go +++ b/internal/cli/listing.go @@ -1,11 +1,13 @@ package cli import ( + "fmt" "os" "strconv" "github.com/danielmiessler/fabric/internal/core" "github.com/danielmiessler/fabric/internal/plugins/ai" + "github.com/danielmiessler/fabric/internal/plugins/ai/gemini" "github.com/danielmiessler/fabric/internal/plugins/db/fsdb" ) @@ -58,5 +60,11 @@ func handleListingCommands(currentFlags *Flags, fabricDb *fsdb.Db, registry *cor return true, err } + if currentFlags.ListGeminiVoices { + voicesList := gemini.ListGeminiVoices(currentFlags.ShellCompleteOutput) + fmt.Print(voicesList) + return true, nil + } + return false, nil } diff --git a/internal/domain/domain.go b/internal/domain/domain.go index b37b794e..c5e82ea4 100644 --- a/internal/domain/domain.go +++ b/internal/domain/domain.go @@ -38,6 +38,7 @@ type ChatOptions struct { ThinkEndTag string AudioOutput bool AudioFormat string + Voice string } // NormalizeMessages remove empty messages and ensure messages order user-assist-user diff --git a/internal/plugins/ai/gemini/gemini.go b/internal/plugins/ai/gemini/gemini.go index ab298036..6fd7808e 100644 --- a/internal/plugins/ai/gemini/gemini.go +++ b/internal/plugins/ai/gemini/gemini.go @@ -194,6 +194,12 @@ func (o *Client) generateTTSAudio(ctx context.Context, msgs []*chat.ChatCompleti return "", err } + // Validate voice name before making API call + if opts.Voice != "" && !IsValidGeminiVoice(opts.Voice) { + validVoices := GetGeminiVoiceNames() + return "", fmt.Errorf("invalid voice '%s'. Valid voices are: %v", opts.Voice, validVoices) + } + client, err := o.createGenaiClient(ctx) if err != nil { return "", err @@ -211,12 +217,17 @@ func (o *Client) performTTSGeneration(ctx context.Context, client *genai.Client, }} // Configure for TTS generation + voiceName := opts.Voice + if voiceName == "" { + voiceName = "Kore" // Default voice if none specified + } + config := &genai.GenerateContentConfig{ ResponseModalities: []string{"AUDIO"}, SpeechConfig: &genai.SpeechConfig{ VoiceConfig: &genai.VoiceConfig{ PrebuiltVoiceConfig: &genai.PrebuiltVoiceConfig{ - VoiceName: "Kore", // Default voice + VoiceName: voiceName, }, }, }, diff --git a/internal/plugins/ai/gemini/voices.go b/internal/plugins/ai/gemini/voices.go new file mode 100644 index 00000000..339ca83c --- /dev/null +++ b/internal/plugins/ai/gemini/voices.go @@ -0,0 +1,218 @@ +package gemini + +import ( + "fmt" + "sort" +) + +// GeminiVoice represents a Gemini TTS voice with its characteristics +type GeminiVoice struct { + Name string + Description string + Characteristics []string +} + +// GetGeminiVoices returns the current list of supported Gemini TTS voices +// This list is maintained based on official Google Gemini documentation +// https://ai.google.dev/gemini-api/docs/speech-generation +func GetGeminiVoices() []GeminiVoice { + return []GeminiVoice{ + // Firm voices + {Name: "Kore", Description: "Firm and confident", Characteristics: []string{"firm", "confident", "default"}}, + {Name: "Orus", Description: "Firm and decisive", Characteristics: []string{"firm", "decisive"}}, + {Name: "Alnilam", Description: "Firm and strong", Characteristics: []string{"firm", "strong"}}, + + // Upbeat voices + {Name: "Puck", Description: "Upbeat and energetic", Characteristics: []string{"upbeat", "energetic"}}, + {Name: "Laomedeia", Description: "Upbeat and lively", Characteristics: []string{"upbeat", "lively"}}, + + // Bright voices + {Name: "Zephyr", Description: "Bright and cheerful", Characteristics: []string{"bright", "cheerful"}}, + {Name: "Autonoe", Description: "Bright and optimistic", Characteristics: []string{"bright", "optimistic"}}, + + // Informative voices + {Name: "Charon", Description: "Informative and clear", Characteristics: []string{"informative", "clear"}}, + {Name: "Rasalgethi", Description: "Informative and professional", Characteristics: []string{"informative", "professional"}}, + + // Natural voices + {Name: "Aoede", Description: "Breezy and natural", Characteristics: []string{"breezy", "natural"}}, + {Name: "Leda", Description: "Youthful and energetic", Characteristics: []string{"youthful", "energetic"}}, + + // Gentle voices + {Name: "Vindemiatrix", Description: "Gentle and kind", Characteristics: []string{"gentle", "kind"}}, + {Name: "Achernar", Description: "Soft and gentle", Characteristics: []string{"soft", "gentle"}}, + {Name: "Enceladus", Description: "Breathy and soft", Characteristics: []string{"breathy", "soft"}}, + + // Warm voices + {Name: "Sulafat", Description: "Warm and welcoming", Characteristics: []string{"warm", "welcoming"}}, + {Name: "Capella", Description: "Warm and approachable", Characteristics: []string{"warm", "approachable"}}, + + // Clear voices + {Name: "Iapetus", Description: "Clear and articulate", Characteristics: []string{"clear", "articulate"}}, + {Name: "Erinome", Description: "Clear and precise", Characteristics: []string{"clear", "precise"}}, + + // Pleasant voices + {Name: "Algieba", Description: "Smooth and pleasant", Characteristics: []string{"smooth", "pleasant"}}, + {Name: "Vega", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}}, + + // Textured voices + {Name: "Algenib", Description: "Gravelly texture", Characteristics: []string{"gravelly", "textured"}}, + + // Relaxed voices + {Name: "Callirrhoe", Description: "Easy-going and relaxed", Characteristics: []string{"relaxed", "easy-going"}}, + {Name: "Despina", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}}, + + // Mature voices + {Name: "Gacrux", Description: "Mature and experienced", Characteristics: []string{"mature", "experienced"}}, + + // Expressive voices + {Name: "Pulcherrima", Description: "Forward and expressive", Characteristics: []string{"forward", "expressive"}}, + {Name: "Lyra", Description: "Melodic and expressive", Characteristics: []string{"melodic", "expressive"}}, + + // Dynamic voices + {Name: "Fenrir", Description: "Excitable and dynamic", Characteristics: []string{"excitable", "dynamic"}}, + {Name: "Sadachbia", Description: "Lively and animated", Characteristics: []string{"lively", "animated"}}, + + // Friendly voices + {Name: "Achird", Description: "Friendly and approachable", Characteristics: []string{"friendly", "approachable"}}, + + // Casual voices + {Name: "Zubenelgenubi", Description: "Casual and conversational", Characteristics: []string{"casual", "conversational"}}, + + // Additional voices from latest API + {Name: "Sadaltager", Description: "Additional voice option", Characteristics: []string{"additional"}}, + {Name: "Schedar", Description: "Additional voice option", Characteristics: []string{"additional"}}, + {Name: "Umbriel", Description: "Additional voice option", Characteristics: []string{"additional"}}, + } +} + +// GetGeminiVoiceNames returns just the voice names in alphabetical order +func GetGeminiVoiceNames() []string { + voices := GetGeminiVoices() + names := make([]string, len(voices)) + for i, voice := range voices { + names[i] = voice.Name + } + sort.Strings(names) + return names +} + +// IsValidGeminiVoice checks if a voice name is valid +func IsValidGeminiVoice(voiceName string) bool { + if voiceName == "" { + return true // Empty voice is valid (will use default) + } + + for _, voice := range GetGeminiVoices() { + if voice.Name == voiceName { + return true + } + } + return false +} + +// GetGeminiVoiceByName returns a specific voice by name +func GetGeminiVoiceByName(name string) (*GeminiVoice, error) { + for _, voice := range GetGeminiVoices() { + if voice.Name == name { + return &voice, nil + } + } + return nil, fmt.Errorf("voice '%s' not found", name) +} + +// ListGeminiVoices formats the voice list for display +func ListGeminiVoices(shellCompleteMode bool) string { + if shellCompleteMode { + // For shell completion, just return voice names + names := GetGeminiVoiceNames() + result := "" + for _, name := range names { + result += name + "\n" + } + return result + } + + // For human-readable output + voices := GetGeminiVoices() + result := "Available Gemini Text-to-Speech voices:\n\n" + + // Group by characteristics for better readability + groups := map[string][]GeminiVoice{ + "Firm & Confident": {}, + "Bright & Cheerful": {}, + "Warm & Welcoming": {}, + "Clear & Professional": {}, + "Natural & Expressive": {}, + "Other Voices": {}, + } + + for _, voice := range voices { + placed := false + for _, char := range voice.Characteristics { + switch char { + case "firm", "confident", "decisive", "strong": + if !placed { + groups["Firm & Confident"] = append(groups["Firm & Confident"], voice) + placed = true + } + case "bright", "cheerful", "upbeat", "energetic", "lively": + if !placed { + groups["Bright & Cheerful"] = append(groups["Bright & Cheerful"], voice) + placed = true + } + case "warm", "welcoming", "friendly", "approachable": + if !placed { + groups["Warm & Welcoming"] = append(groups["Warm & Welcoming"], voice) + placed = true + } + case "clear", "informative", "professional", "articulate": + if !placed { + groups["Clear & Professional"] = append(groups["Clear & Professional"], voice) + placed = true + } + case "natural", "expressive", "melodic", "breezy": + if !placed { + groups["Natural & Expressive"] = append(groups["Natural & Expressive"], voice) + placed = true + } + } + } + if !placed { + groups["Other Voices"] = append(groups["Other Voices"], voice) + } + } + + // Output grouped voices + for groupName, groupVoices := range groups { + if len(groupVoices) > 0 { + result += fmt.Sprintf("%s:\n", groupName) + for _, voice := range groupVoices { + defaultStr := "" + if voice.Name == "Kore" { + defaultStr = " (default)" + } + result += fmt.Sprintf(" %-15s - %s%s\n", voice.Name, voice.Description, defaultStr) + } + result += "\n" + } + } + + result += "Use --voice to select a specific voice.\n" + result += "Example: fabric --voice Charon -m gemini-2.0-flash-tts -o output.wav \"Hello world\"\n" + + return result +} + +// NOTE: This implementation maintains a curated list based on official Google documentation. +// In the future, if Google provides a dynamic voice discovery API, this can be updated +// to make API calls for real-time voice discovery. +// +// The current approach ensures: +// 1. Fast response times (no API calls needed) +// 2. Reliable voice information with descriptions +// 3. Easy maintenance when new voices are added +// 4. Offline functionality +// +// To update voices: Monitor Google's Gemini TTS documentation at: +// https://ai.google.dev/gemini-api/docs/speech-generation