diff --git a/.vscode/settings.json b/.vscode/settings.json
index 64e5010a..ac137d59 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,23 +1,32 @@
{
"cSpell.words": [
+ "Achird",
"addextension",
"adduser",
"AIML",
"anthropics",
+ "Aoede",
"atotto",
+ "Autonoe",
"badfile",
"Behrens",
"blindspots",
"Bombal",
+ "Callirhoe",
+ "Callirrhoe",
"Cerebras",
+ "compadd",
+ "compdef",
"compinit",
"creatordate",
+ "curcontext",
"custompatterns",
"danielmiessler",
"davidanson",
"Debugf",
"dedup",
"deepseek",
+ "Despina",
"direnv",
"dryrun",
"dsrp",
@@ -25,6 +34,7 @@
"Eisler",
"elif",
"envrc",
+ "Erinome",
"Errorf",
"eugeis",
"Eugen",
@@ -66,6 +76,7 @@
"Kore",
"ksylvan",
"Langdock",
+ "Laomedeia",
"ldflags",
"libexec",
"listcontexts",
@@ -89,6 +100,7 @@
"openaiapi",
"opencode",
"openrouter",
+ "Orus",
"otiai",
"pdflatex",
"pipx",
@@ -97,11 +109,14 @@
"presencepenalty",
"printcontext",
"printsession",
+ "Pulcherrima",
"pycache",
"pyperclip",
"readystream",
"restapi",
"rmextension",
+ "Sadachbia",
+ "Sadaltager",
"samber",
"sashabaranov",
"sdist",
@@ -112,6 +127,7 @@
"Streamlit",
"stretchr",
"subchunk",
+ "Sulafat",
"talkpanel",
"Telos",
"testpattern",
diff --git a/README.md b/README.md
index 30b0bb2e..65dd7941 100644
--- a/README.md
+++ b/README.md
@@ -548,6 +548,9 @@ Application Options:
--think-start-tag= Start tag for thinking sections (default: )
--think-end-tag= End tag for thinking sections (default: )
--disable-responses-api Disable OpenAI Responses API (default: false)
+ --voice= TTS voice name for supported models (e.g., Kore, Charon, Puck)
+ (default: Kore)
+ --list-gemini-voices List all available Gemini TTS voices
Help Options:
-h, --help Show this help message
diff --git a/completions/_fabric b/completions/_fabric
index 7659bb05..e92fec7e 100644
--- a/completions/_fabric
+++ b/completions/_fabric
@@ -14,16 +14,19 @@ _fabric_models() {
models=(${(f)"$(fabric --listmodels --shell-complete-list 2>/dev/null)"})
compadd -X "Models:" ${models}
}
+
_fabric_contexts() {
local -a contexts
contexts=(${(f)"$(fabric --listcontexts --shell-complete-list 2>/dev/null)"})
compadd -X "Contexts:" ${contexts}
}
+
_fabric_sessions() {
local -a sessions
sessions=(${(f)"$(fabric --listsessions --shell-complete-list 2>/dev/null)"})
compadd -X "Sessions:" ${sessions}
}
+
_fabric_strategies() {
local -a strategies
strategies=(${(f)"$(fabric --liststrategies --shell-complete-list 2>/dev/null)"})
@@ -34,14 +37,12 @@ _fabric_extensions() {
local -a extensions
extensions=(${(f)"$(fabric --listextensions --shell-complete-list 2>/dev/null)"})
compadd -X "Extensions:" ${extensions}
- '(-L --listmodels)'{-L,--listmodels}'[List all available models]:list models:_fabric_models' \
- '(-x --listcontexts)'{-x,--listcontexts}'[List all contexts]:list contexts:_fabric_contexts' \
- '(-X --listsessions)'{-X,--listsessions}'[List all sessions]:list sessions:_fabric_sessions' \
- '(--listextensions)--listextensions[List all registered extensions]' \
- '(--liststrategies)--liststrategies[List all strategies]:list strategies:_fabric_strategies' \
- '(--listvendors)--listvendors[List all vendors]' \
- vendors=(${(f)"$(fabric --listvendors 2>/dev/null)"})
- compadd -X "Vendors:" ${vendors}
+}
+
+_fabric_gemini_voices() {
+ local -a voices
+ voices=(${(f)"$(fabric --list-gemini-voices --shell-complete-list 2>/dev/null)"})
+ compadd -X "Gemini TTS Voices:" ${voices}
}
_fabric() {
@@ -109,6 +110,8 @@ _fabric() {
'(--strategy)--strategy[Choose a strategy from the available strategies]:strategy:_fabric_strategies' \
'(--liststrategies)--liststrategies[List all strategies]' \
'(--listvendors)--listvendors[List all vendors]' \
+ '(--voice)--voice[TTS voice name for supported models]:voice:_fabric_gemini_voices' \
+ '(--list-gemini-voices)--list-gemini-voices[List all available Gemini TTS voices]' \
'(--shell-complete-list)--shell-complete-list[Output raw list without headers/formatting (for shell completion)]' \
'(--suppress-think)--suppress-think[Suppress text enclosed in thinking tags]' \
'(--think-start-tag)--think-start-tag[Start tag for thinking sections (default: )]:start tag:' \
@@ -119,4 +122,3 @@ _fabric() {
}
_fabric "$@"
-
diff --git a/completions/fabric.bash b/completions/fabric.bash
index ca47e804..1acbc8db 100644
--- a/completions/fabric.bash
+++ b/completions/fabric.bash
@@ -13,7 +13,7 @@ _fabric() {
_get_comp_words_by_ref -n : cur prev words cword
# Define all possible options/flags
- local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
+ local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
# Helper function for dynamic completions
_fabric_get_list() {
@@ -62,6 +62,10 @@ _fabric() {
COMPREPLY=($(compgen -W "$(_fabric_get_list --liststrategies)" -- "${cur}"))
return 0
;;
+ --voice)
+ COMPREPLY=($(compgen -W "$(_fabric_get_list --list-gemini-voices)" -- "${cur}"))
+ return 0
+ ;;
# Options requiring file/directory paths
-a | --attachment | -o | --output | --config | --addextension | --image-file)
_filedir
diff --git a/completions/fabric.fish b/completions/fabric.fish
index 7e464ba7..24ae5afc 100755
--- a/completions/fabric.fish
+++ b/completions/fabric.fish
@@ -31,6 +31,10 @@ function __fabric_get_extensions
fabric --listextensions --shell-complete-list 2>/dev/null
end
+function __fabric_get_gemini_voices
+ fabric --list-gemini-voices --shell-complete-list 2>/dev/null
+end
+
# Main completion function
complete -c fabric -f
@@ -71,6 +75,7 @@ complete -c fabric -l rmextension -d "Remove a registered extension by name" -a
complete -c fabric -l strategy -d "Choose a strategy from the available strategies" -a "(__fabric_get_strategies)"
complete -c fabric -l think-start-tag -d "Start tag for thinking sections (default: )"
complete -c fabric -l think-end-tag -d "End tag for thinking sections (default: )"
+complete -c fabric -l voice -d "TTS voice name for supported models (e.g., Kore, Charon, Puck)" -a "(__fabric_get_gemini_voices)"
# Boolean flags (no arguments)
complete -c fabric -s S -l setup -d "Run setup for all reconfigurable parts of fabric"
@@ -99,6 +104,7 @@ complete -c fabric -l version -d "Print current version"
complete -c fabric -l listextensions -d "List all registered extensions"
complete -c fabric -l liststrategies -d "List all strategies"
complete -c fabric -l listvendors -d "List all vendors"
+complete -c fabric -l list-gemini-voices -d "List all available Gemini TTS voices"
complete -c fabric -l shell-complete-list -d "Output raw list without headers/formatting (for shell completion)"
complete -c fabric -l suppress-think -d "Suppress text enclosed in thinking tags"
complete -c fabric -l disable-responses-api -d "Disable OpenAI Responses API (default: false)"
diff --git a/docs/Gemini-TTS.md b/docs/Gemini-TTS.md
new file mode 100644
index 00000000..bb5de2eb
--- /dev/null
+++ b/docs/Gemini-TTS.md
@@ -0,0 +1,155 @@
+# Gemini Text-to-Speech (TTS) Guide
+
+Fabric supports Google Gemini's text-to-speech (TTS) capabilities, allowing you to convert text into high-quality audio using various AI-generated voices.
+
+## Overview
+
+The Gemini TTS feature in Fabric allows you to:
+
+- Convert text input into audio using Google's Gemini TTS models
+- Choose from 30+ different AI voices with varying characteristics
+- Generate high-quality WAV audio files
+- Integrate TTS generation into your existing Fabric workflows
+
+## Usage
+
+### Basic TTS Generation
+
+To generate audio from text using TTS:
+
+```bash
+# Basic TTS with default voice (Kore)
+echo "Hello, this is a test of Gemini TTS" | fabric -m gemini-2.0-flash-tts -o output.wav
+
+# Using a specific voice
+echo "Hello, this is a test with the Charon voice" | fabric -m gemini-2.0-flash-tts --voice Charon -o output.wav
+
+# Using TTS with a pattern
+fabric -p summarize --voice Puck -m gemini-2.0-flash-tts -o summary.wav < document.txt
+```
+
+### Voice Selection
+
+Use the `--voice` flag to specify which voice to use for TTS generation:
+
+```bash
+fabric -m gemini-2.0-flash-tts --voice Zephyr -o output.wav "Your text here"
+```
+
+If no voice is specified, the default voice "Kore" will be used.
+
+## Available Voices
+
+Gemini TTS supports 30+ different voices, each with unique characteristics:
+
+### Popular Voices
+
+- **Kore** - Firm and confident (default)
+- **Charon** - Informative and clear
+- **Puck** - Upbeat and energetic
+- **Zephyr** - Bright and cheerful
+- **Leda** - Youthful and energetic
+- **Aoede** - Breezy and natural
+
+### Complete Voice List
+
+- Kore, Charon, Puck, Fenrir, Aoede, Leda, Orus, Zephyr
+- Autonoe, Callirhoe, Despina, Erinome, Gacrux, Laomedeia
+- Pulcherrima, Sulafat, Vindemiatrix, Achernar, Achird
+- Algenib, Algieba, Alnilam, Enceladus, Iapetus, Rasalgethi
+- Sadachbia, Zubenelgenubi, Vega, Capella, Lyra
+
+### Listing Available Voices
+
+To see all available voices with descriptions:
+
+```bash
+# List all voices with characteristics
+fabric --list-gemini-voices
+
+# List voice names only (for shell completion)
+fabric --list-gemini-voices --shell-complete-list
+```
+
+## Rate Limits
+
+Google Gemini TTS has usage quotas that vary by plan:
+
+### Free Tier
+
+- **15 requests per day** per project per TTS model
+- Quota resets daily
+- Applies to all TTS models (e.g., `gemini-2.5-flash-preview-tts`)
+
+### Rate Limit Errors
+
+If you exceed your quota, you'll see an error like:
+
+```text
+Error 429: You exceeded your current quota, please check your plan and billing details
+```
+
+**Solutions:**
+
+- Wait for daily quota reset (typically at midnight UTC)
+- Upgrade to a paid plan for higher limits
+- Use TTS generation strategically for important content
+
+For current rate limits and pricing, visit:
+
+## Configuration
+
+### Command Line Options
+
+- `--voice ` - Specify the TTS voice to use
+- `-o ` - Output audio file (required for TTS models)
+- `-m ` - Specify a TTS-capable model (e.g., `gemini-2.0-flash-tts`)
+
+### YAML Configuration
+
+You can also set a default voice in your Fabric configuration file (`~/.config/fabric/config.yaml`):
+
+```yaml
+voice: "Charon" # Set your preferred default voice
+```
+
+## Requirements
+
+- Valid Google Gemini API key configured in Fabric
+- TTS-capable Gemini model (models containing "tts" in the name)
+- Audio output must be specified with `-o filename.wav`
+
+## Troubleshooting
+
+### Common Issues
+
+#### Error: "TTS model requires audio output"
+
+- Solution: Always specify an output file with `-o filename.wav` when using TTS models
+
+#### Error: "Invalid voice 'X'"
+
+- Solution: Check that the voice name is spelled correctly and matches one of the supported voices listed above
+
+#### Error: "TTS generation failed"
+
+- Solution: Verify your Gemini API key is valid and you have sufficient quota
+
+### Getting Help
+
+For additional help with TTS features:
+
+```bash
+fabric --help
+```
+
+## Technical Details
+
+- **Audio Format**: WAV files with 24kHz sample rate, 16-bit depth, mono channel
+- **Language Support**: Automatic language detection for 24+ languages
+- **Model Requirements**: Models must contain "tts", "preview-tts", or "text-to-speech" in the name
+- **Voice Selection**: Uses Google's PrebuiltVoiceConfig system for consistent voice quality
+
+---
+
+For more information about Fabric, visit the [main documentation](../README.md).
diff --git a/docs/voices/README.md b/docs/voices/README.md
new file mode 100644
index 00000000..b6626243
--- /dev/null
+++ b/docs/voices/README.md
@@ -0,0 +1,36 @@
+# Voice Samples
+
+This directory contains sample audio files demonstrating different Gemini TTS voices.
+
+## Sample Files
+
+Each voice sample says "The quick brown fox jumped over the lazy dog" to demonstrate the voice characteristics:
+
+- **Kore.wav** - Firm and confident (default voice)
+- **Charon.wav** - Informative and clear
+- **Vega.wav** - Smooth and pleasant
+- **Capella.wav** - Warm and welcoming
+- **Achird.wav** - Friendly and approachable
+- **Lyra.wav** - Melodic and expressive
+
+## Generating Samples
+
+To generate these samples, use the following commands:
+
+```bash
+# Generate each voice sample
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Kore -o docs/voices/Kore.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Charon -o docs/voices/Charon.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Vega -o docs/voices/Vega.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Capella -o docs/voices/Capella.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Achird -o docs/voices/Achird.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Lyra -o docs/voices/Lyra.wav
+```
+
+## Audio Format
+
+- **Format**: WAV (uncompressed)
+- **Sample Rate**: 24kHz
+- **Bit Depth**: 16-bit
+- **Channels**: Mono
+- **Approximate Size**: ~500KB per sample
diff --git a/internal/cli/flags.go b/internal/cli/flags.go
index 9735bab5..76719d72 100644
--- a/internal/cli/flags.go
+++ b/internal/cli/flags.go
@@ -87,6 +87,8 @@ type Flags struct {
ThinkStartTag string `long:"think-start-tag" yaml:"thinkStartTag" description:"Start tag for thinking sections" default:""`
ThinkEndTag string `long:"think-end-tag" yaml:"thinkEndTag" description:"End tag for thinking sections" default:""`
DisableResponsesAPI bool `long:"disable-responses-api" yaml:"disableResponsesAPI" description:"Disable OpenAI Responses API (default: false)"`
+ Voice string `long:"voice" yaml:"voice" description:"TTS voice name for supported models (e.g., Kore, Charon, Puck)" default:"Kore"`
+ ListGeminiVoices bool `long:"list-gemini-voices" description:"List all available Gemini TTS voices"`
}
var debug = false
@@ -441,6 +443,7 @@ func (o *Flags) BuildChatOptions() (ret *domain.ChatOptions, err error) {
SuppressThink: o.SuppressThink,
ThinkStartTag: startTag,
ThinkEndTag: endTag,
+ Voice: o.Voice,
}
return
}
diff --git a/internal/cli/listing.go b/internal/cli/listing.go
index ce279f5b..2bf923f1 100644
--- a/internal/cli/listing.go
+++ b/internal/cli/listing.go
@@ -1,11 +1,13 @@
package cli
import (
+ "fmt"
"os"
"strconv"
"github.com/danielmiessler/fabric/internal/core"
"github.com/danielmiessler/fabric/internal/plugins/ai"
+ "github.com/danielmiessler/fabric/internal/plugins/ai/gemini"
"github.com/danielmiessler/fabric/internal/plugins/db/fsdb"
)
@@ -58,5 +60,11 @@ func handleListingCommands(currentFlags *Flags, fabricDb *fsdb.Db, registry *cor
return true, err
}
+ if currentFlags.ListGeminiVoices {
+ voicesList := gemini.ListGeminiVoices(currentFlags.ShellCompleteOutput)
+ fmt.Print(voicesList)
+ return true, nil
+ }
+
return false, nil
}
diff --git a/internal/domain/domain.go b/internal/domain/domain.go
index b37b794e..c5e82ea4 100644
--- a/internal/domain/domain.go
+++ b/internal/domain/domain.go
@@ -38,6 +38,7 @@ type ChatOptions struct {
ThinkEndTag string
AudioOutput bool
AudioFormat string
+ Voice string
}
// NormalizeMessages remove empty messages and ensure messages order user-assist-user
diff --git a/internal/plugins/ai/gemini/gemini.go b/internal/plugins/ai/gemini/gemini.go
index ab298036..6fd7808e 100644
--- a/internal/plugins/ai/gemini/gemini.go
+++ b/internal/plugins/ai/gemini/gemini.go
@@ -194,6 +194,12 @@ func (o *Client) generateTTSAudio(ctx context.Context, msgs []*chat.ChatCompleti
return "", err
}
+ // Validate voice name before making API call
+ if opts.Voice != "" && !IsValidGeminiVoice(opts.Voice) {
+ validVoices := GetGeminiVoiceNames()
+ return "", fmt.Errorf("invalid voice '%s'. Valid voices are: %v", opts.Voice, validVoices)
+ }
+
client, err := o.createGenaiClient(ctx)
if err != nil {
return "", err
@@ -211,12 +217,17 @@ func (o *Client) performTTSGeneration(ctx context.Context, client *genai.Client,
}}
// Configure for TTS generation
+ voiceName := opts.Voice
+ if voiceName == "" {
+ voiceName = "Kore" // Default voice if none specified
+ }
+
config := &genai.GenerateContentConfig{
ResponseModalities: []string{"AUDIO"},
SpeechConfig: &genai.SpeechConfig{
VoiceConfig: &genai.VoiceConfig{
PrebuiltVoiceConfig: &genai.PrebuiltVoiceConfig{
- VoiceName: "Kore", // Default voice
+ VoiceName: voiceName,
},
},
},
diff --git a/internal/plugins/ai/gemini/voices.go b/internal/plugins/ai/gemini/voices.go
new file mode 100644
index 00000000..339ca83c
--- /dev/null
+++ b/internal/plugins/ai/gemini/voices.go
@@ -0,0 +1,218 @@
+package gemini
+
+import (
+ "fmt"
+ "sort"
+)
+
+// GeminiVoice represents a Gemini TTS voice with its characteristics
+type GeminiVoice struct {
+ Name string
+ Description string
+ Characteristics []string
+}
+
+// GetGeminiVoices returns the current list of supported Gemini TTS voices
+// This list is maintained based on official Google Gemini documentation
+// https://ai.google.dev/gemini-api/docs/speech-generation
+func GetGeminiVoices() []GeminiVoice {
+ return []GeminiVoice{
+ // Firm voices
+ {Name: "Kore", Description: "Firm and confident", Characteristics: []string{"firm", "confident", "default"}},
+ {Name: "Orus", Description: "Firm and decisive", Characteristics: []string{"firm", "decisive"}},
+ {Name: "Alnilam", Description: "Firm and strong", Characteristics: []string{"firm", "strong"}},
+
+ // Upbeat voices
+ {Name: "Puck", Description: "Upbeat and energetic", Characteristics: []string{"upbeat", "energetic"}},
+ {Name: "Laomedeia", Description: "Upbeat and lively", Characteristics: []string{"upbeat", "lively"}},
+
+ // Bright voices
+ {Name: "Zephyr", Description: "Bright and cheerful", Characteristics: []string{"bright", "cheerful"}},
+ {Name: "Autonoe", Description: "Bright and optimistic", Characteristics: []string{"bright", "optimistic"}},
+
+ // Informative voices
+ {Name: "Charon", Description: "Informative and clear", Characteristics: []string{"informative", "clear"}},
+ {Name: "Rasalgethi", Description: "Informative and professional", Characteristics: []string{"informative", "professional"}},
+
+ // Natural voices
+ {Name: "Aoede", Description: "Breezy and natural", Characteristics: []string{"breezy", "natural"}},
+ {Name: "Leda", Description: "Youthful and energetic", Characteristics: []string{"youthful", "energetic"}},
+
+ // Gentle voices
+ {Name: "Vindemiatrix", Description: "Gentle and kind", Characteristics: []string{"gentle", "kind"}},
+ {Name: "Achernar", Description: "Soft and gentle", Characteristics: []string{"soft", "gentle"}},
+ {Name: "Enceladus", Description: "Breathy and soft", Characteristics: []string{"breathy", "soft"}},
+
+ // Warm voices
+ {Name: "Sulafat", Description: "Warm and welcoming", Characteristics: []string{"warm", "welcoming"}},
+ {Name: "Capella", Description: "Warm and approachable", Characteristics: []string{"warm", "approachable"}},
+
+ // Clear voices
+ {Name: "Iapetus", Description: "Clear and articulate", Characteristics: []string{"clear", "articulate"}},
+ {Name: "Erinome", Description: "Clear and precise", Characteristics: []string{"clear", "precise"}},
+
+ // Pleasant voices
+ {Name: "Algieba", Description: "Smooth and pleasant", Characteristics: []string{"smooth", "pleasant"}},
+ {Name: "Vega", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}},
+
+ // Textured voices
+ {Name: "Algenib", Description: "Gravelly texture", Characteristics: []string{"gravelly", "textured"}},
+
+ // Relaxed voices
+ {Name: "Callirrhoe", Description: "Easy-going and relaxed", Characteristics: []string{"relaxed", "easy-going"}},
+ {Name: "Despina", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}},
+
+ // Mature voices
+ {Name: "Gacrux", Description: "Mature and experienced", Characteristics: []string{"mature", "experienced"}},
+
+ // Expressive voices
+ {Name: "Pulcherrima", Description: "Forward and expressive", Characteristics: []string{"forward", "expressive"}},
+ {Name: "Lyra", Description: "Melodic and expressive", Characteristics: []string{"melodic", "expressive"}},
+
+ // Dynamic voices
+ {Name: "Fenrir", Description: "Excitable and dynamic", Characteristics: []string{"excitable", "dynamic"}},
+ {Name: "Sadachbia", Description: "Lively and animated", Characteristics: []string{"lively", "animated"}},
+
+ // Friendly voices
+ {Name: "Achird", Description: "Friendly and approachable", Characteristics: []string{"friendly", "approachable"}},
+
+ // Casual voices
+ {Name: "Zubenelgenubi", Description: "Casual and conversational", Characteristics: []string{"casual", "conversational"}},
+
+ // Additional voices from latest API
+ {Name: "Sadaltager", Description: "Additional voice option", Characteristics: []string{"additional"}},
+ {Name: "Schedar", Description: "Additional voice option", Characteristics: []string{"additional"}},
+ {Name: "Umbriel", Description: "Additional voice option", Characteristics: []string{"additional"}},
+ }
+}
+
+// GetGeminiVoiceNames returns just the voice names in alphabetical order
+func GetGeminiVoiceNames() []string {
+ voices := GetGeminiVoices()
+ names := make([]string, len(voices))
+ for i, voice := range voices {
+ names[i] = voice.Name
+ }
+ sort.Strings(names)
+ return names
+}
+
+// IsValidGeminiVoice checks if a voice name is valid
+func IsValidGeminiVoice(voiceName string) bool {
+ if voiceName == "" {
+ return true // Empty voice is valid (will use default)
+ }
+
+ for _, voice := range GetGeminiVoices() {
+ if voice.Name == voiceName {
+ return true
+ }
+ }
+ return false
+}
+
+// GetGeminiVoiceByName returns a specific voice by name
+func GetGeminiVoiceByName(name string) (*GeminiVoice, error) {
+ for _, voice := range GetGeminiVoices() {
+ if voice.Name == name {
+ return &voice, nil
+ }
+ }
+ return nil, fmt.Errorf("voice '%s' not found", name)
+}
+
+// ListGeminiVoices formats the voice list for display
+func ListGeminiVoices(shellCompleteMode bool) string {
+ if shellCompleteMode {
+ // For shell completion, just return voice names
+ names := GetGeminiVoiceNames()
+ result := ""
+ for _, name := range names {
+ result += name + "\n"
+ }
+ return result
+ }
+
+ // For human-readable output
+ voices := GetGeminiVoices()
+ result := "Available Gemini Text-to-Speech voices:\n\n"
+
+ // Group by characteristics for better readability
+ groups := map[string][]GeminiVoice{
+ "Firm & Confident": {},
+ "Bright & Cheerful": {},
+ "Warm & Welcoming": {},
+ "Clear & Professional": {},
+ "Natural & Expressive": {},
+ "Other Voices": {},
+ }
+
+ for _, voice := range voices {
+ placed := false
+ for _, char := range voice.Characteristics {
+ switch char {
+ case "firm", "confident", "decisive", "strong":
+ if !placed {
+ groups["Firm & Confident"] = append(groups["Firm & Confident"], voice)
+ placed = true
+ }
+ case "bright", "cheerful", "upbeat", "energetic", "lively":
+ if !placed {
+ groups["Bright & Cheerful"] = append(groups["Bright & Cheerful"], voice)
+ placed = true
+ }
+ case "warm", "welcoming", "friendly", "approachable":
+ if !placed {
+ groups["Warm & Welcoming"] = append(groups["Warm & Welcoming"], voice)
+ placed = true
+ }
+ case "clear", "informative", "professional", "articulate":
+ if !placed {
+ groups["Clear & Professional"] = append(groups["Clear & Professional"], voice)
+ placed = true
+ }
+ case "natural", "expressive", "melodic", "breezy":
+ if !placed {
+ groups["Natural & Expressive"] = append(groups["Natural & Expressive"], voice)
+ placed = true
+ }
+ }
+ }
+ if !placed {
+ groups["Other Voices"] = append(groups["Other Voices"], voice)
+ }
+ }
+
+ // Output grouped voices
+ for groupName, groupVoices := range groups {
+ if len(groupVoices) > 0 {
+ result += fmt.Sprintf("%s:\n", groupName)
+ for _, voice := range groupVoices {
+ defaultStr := ""
+ if voice.Name == "Kore" {
+ defaultStr = " (default)"
+ }
+ result += fmt.Sprintf(" %-15s - %s%s\n", voice.Name, voice.Description, defaultStr)
+ }
+ result += "\n"
+ }
+ }
+
+ result += "Use --voice to select a specific voice.\n"
+ result += "Example: fabric --voice Charon -m gemini-2.0-flash-tts -o output.wav \"Hello world\"\n"
+
+ return result
+}
+
+// NOTE: This implementation maintains a curated list based on official Google documentation.
+// In the future, if Google provides a dynamic voice discovery API, this can be updated
+// to make API calls for real-time voice discovery.
+//
+// The current approach ensures:
+// 1. Fast response times (no API calls needed)
+// 2. Reliable voice information with descriptions
+// 3. Easy maintenance when new voices are added
+// 4. Offline functionality
+//
+// To update voices: Monitor Google's Gemini TTS documentation at:
+// https://ai.google.dev/gemini-api/docs/speech-generation