feat: add Gemini TTS voice selection and listing functionality

## CHANGES - Add `--voice` flag for TTS voice selection - Add `--list-gemini-voices` command for voice discovery - Implement voice validation for Gemini TTS models - Update shell completions for voice options - Add comprehensive Gemini TTS documentation - Create voice samples directory structure - Extend spell checker dictionary with voice names
2026-01-07 21:44:02 -05:00 · 2025-07-26 15:11:30 -07:00
parent eab335873e
commit 614b1322d5
12 changed files with 474 additions and 11 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,23 +1,32 @@
 {
 	"cSpell.words": [
+		"Achird",
 		"addextension",
 		"adduser",
 		"AIML",
 		"anthropics",
+		"Aoede",
 		"atotto",
+		"Autonoe",
 		"badfile",
 		"Behrens",
 		"blindspots",
 		"Bombal",
+		"Callirhoe",
+		"Callirrhoe",
 		"Cerebras",
+		"compadd",
+		"compdef",
 		"compinit",
 		"creatordate",
+		"curcontext",
 		"custompatterns",
 		"danielmiessler",
 		"davidanson",
 		"Debugf",
 		"dedup",
 		"deepseek",
+		"Despina",
 		"direnv",
 		"dryrun",
 		"dsrp",
@@ -25,6 +34,7 @@
 		"Eisler",
 		"elif",
 		"envrc",
+		"Erinome",
 		"Errorf",
 		"eugeis",
 		"Eugen",
@@ -66,6 +76,7 @@
 		"Kore",
 		"ksylvan",
 		"Langdock",
+		"Laomedeia",
 		"ldflags",
 		"libexec",
 		"listcontexts",
@@ -89,6 +100,7 @@
 		"openaiapi",
 		"opencode",
 		"openrouter",
+		"Orus",
 		"otiai",
 		"pdflatex",
 		"pipx",
@@ -97,11 +109,14 @@
 		"presencepenalty",
 		"printcontext",
 		"printsession",
+		"Pulcherrima",
 		"pycache",
 		"pyperclip",
 		"readystream",
 		"restapi",
 		"rmextension",
+		"Sadachbia",
+		"Sadaltager",
 		"samber",
 		"sashabaranov",
 		"sdist",
@@ -112,6 +127,7 @@
 		"Streamlit",
 		"stretchr",
 		"subchunk",
+		"Sulafat",
 		"talkpanel",
 		"Telos",
 		"testpattern",
--- a/README.md
+++ b/README.md
@@ -548,6 +548,9 @@ Application Options:
      --think-start-tag=            Start tag for thinking sections (default: <think>)
      --think-end-tag=              End tag for thinking sections (default: </think>)
      --disable-responses-api       Disable OpenAI Responses API (default: false)
+      --voice=                      TTS voice name for supported models (e.g., Kore, Charon, Puck)
+                                    (default: Kore)
+      --list-gemini-voices          List all available Gemini TTS voices

 Help Options:
  -h, --help                        Show this help message
--- a/completions/_fabric
+++ b/completions/_fabric
@@ -14,16 +14,19 @@ _fabric_models() {
  models=(${(f)"$(fabric --listmodels --shell-complete-list 2>/dev/null)"})
  compadd -X "Models:" ${models}
 }
+
 _fabric_contexts() {
  local -a contexts
  contexts=(${(f)"$(fabric --listcontexts --shell-complete-list 2>/dev/null)"})
  compadd -X "Contexts:" ${contexts}
 }
+
 _fabric_sessions() {
  local -a sessions
  sessions=(${(f)"$(fabric --listsessions --shell-complete-list 2>/dev/null)"})
  compadd -X "Sessions:" ${sessions}
 }
+
 _fabric_strategies() {
  local -a strategies
  strategies=(${(f)"$(fabric --liststrategies --shell-complete-list 2>/dev/null)"})
@@ -34,14 +37,12 @@ _fabric_extensions() {
  local -a extensions
  extensions=(${(f)"$(fabric --listextensions --shell-complete-list 2>/dev/null)"})
  compadd -X "Extensions:" ${extensions}
-    '(-L --listmodels)'{-L,--listmodels}'[List all available models]:list models:_fabric_models' \
-    '(-x --listcontexts)'{-x,--listcontexts}'[List all contexts]:list contexts:_fabric_contexts' \
-    '(-X --listsessions)'{-X,--listsessions}'[List all sessions]:list sessions:_fabric_sessions' \
-    '(--listextensions)--listextensions[List all registered extensions]' \
-    '(--liststrategies)--liststrategies[List all strategies]:list strategies:_fabric_strategies' \
-    '(--listvendors)--listvendors[List all vendors]' \
-  vendors=(${(f)"$(fabric --listvendors 2>/dev/null)"})
-  compadd -X "Vendors:" ${vendors}
+}
+
+_fabric_gemini_voices() {
+  local -a voices
+  voices=(${(f)"$(fabric --list-gemini-voices --shell-complete-list 2>/dev/null)"})
+  compadd -X "Gemini TTS Voices:" ${voices}
 }

 _fabric() {
@@ -109,6 +110,8 @@ _fabric() {
    '(--strategy)--strategy[Choose a strategy from the available strategies]:strategy:_fabric_strategies' \
    '(--liststrategies)--liststrategies[List all strategies]' \
    '(--listvendors)--listvendors[List all vendors]' \
+    '(--voice)--voice[TTS voice name for supported models]:voice:_fabric_gemini_voices' \
+    '(--list-gemini-voices)--list-gemini-voices[List all available Gemini TTS voices]' \
    '(--shell-complete-list)--shell-complete-list[Output raw list without headers/formatting (for shell completion)]' \
    '(--suppress-think)--suppress-think[Suppress text enclosed in thinking tags]' \
    '(--think-start-tag)--think-start-tag[Start tag for thinking sections (default: <think>)]:start tag:' \
@@ -119,4 +122,3 @@ _fabric() {
 }

 _fabric "$@"
-
--- a/completions/fabric.bash
+++ b/completions/fabric.bash
@@ -13,7 +13,7 @@ _fabric() {
  _get_comp_words_by_ref -n : cur prev words cword

  # Define all possible options/flags
-  local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
+  local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"

  # Helper function for dynamic completions
  _fabric_get_list() {
@@ -62,6 +62,10 @@ _fabric() {
    COMPREPLY=($(compgen -W "$(_fabric_get_list --liststrategies)" -- "${cur}"))
    return 0
    ;;
+  --voice)
+    COMPREPLY=($(compgen -W "$(_fabric_get_list --list-gemini-voices)" -- "${cur}"))
+    return 0
+    ;;
  # Options requiring file/directory paths
  -a | --attachment | -o | --output | --config | --addextension | --image-file)
    _filedir
--- a/completions/fabric.fish
+++ b/completions/fabric.fish
@@ -31,6 +31,10 @@ function __fabric_get_extensions
 	fabric --listextensions --shell-complete-list 2>/dev/null
 end

+function __fabric_get_gemini_voices
+	fabric --list-gemini-voices --shell-complete-list 2>/dev/null
+end
+
 # Main completion function
 complete -c fabric -f

@@ -71,6 +75,7 @@ complete -c fabric -l rmextension -d "Remove a registered extension by name" -a
 complete -c fabric -l strategy -d "Choose a strategy from the available strategies" -a "(__fabric_get_strategies)"
 complete -c fabric -l think-start-tag -d "Start tag for thinking sections (default: <think>)"
 complete -c fabric -l think-end-tag -d "End tag for thinking sections (default: </think>)"
+complete -c fabric -l voice -d "TTS voice name for supported models (e.g., Kore, Charon, Puck)" -a "(__fabric_get_gemini_voices)"

 # Boolean flags (no arguments)
 complete -c fabric -s S -l setup -d "Run setup for all reconfigurable parts of fabric"
@@ -99,6 +104,7 @@ complete -c fabric -l version -d "Print current version"
 complete -c fabric -l listextensions -d "List all registered extensions"
 complete -c fabric -l liststrategies -d "List all strategies"
 complete -c fabric -l listvendors -d "List all vendors"
+complete -c fabric -l list-gemini-voices -d "List all available Gemini TTS voices"
 complete -c fabric -l shell-complete-list -d "Output raw list without headers/formatting (for shell completion)"
 complete -c fabric -l suppress-think -d "Suppress text enclosed in thinking tags"
 complete -c fabric -l disable-responses-api -d "Disable OpenAI Responses API (default: false)"
--- a/docs/Gemini-TTS.md
+++ b/docs/Gemini-TTS.md
@@ -0,0 +1,155 @@
+# Gemini Text-to-Speech (TTS) Guide
+
+Fabric supports Google Gemini's text-to-speech (TTS) capabilities, allowing you to convert text into high-quality audio using various AI-generated voices.
+
+## Overview
+
+The Gemini TTS feature in Fabric allows you to:
+
+- Convert text input into audio using Google's Gemini TTS models
+- Choose from 30+ different AI voices with varying characteristics
+- Generate high-quality WAV audio files
+- Integrate TTS generation into your existing Fabric workflows
+
+## Usage
+
+### Basic TTS Generation
+
+To generate audio from text using TTS:
+
+```bash
+# Basic TTS with default voice (Kore)
+echo "Hello, this is a test of Gemini TTS" | fabric -m gemini-2.0-flash-tts -o output.wav
+
+# Using a specific voice
+echo "Hello, this is a test with the Charon voice" | fabric -m gemini-2.0-flash-tts --voice Charon -o output.wav
+
+# Using TTS with a pattern
+fabric -p summarize --voice Puck -m gemini-2.0-flash-tts -o summary.wav < document.txt
+```
+
+### Voice Selection
+
+Use the `--voice` flag to specify which voice to use for TTS generation:
+
+```bash
+fabric -m gemini-2.0-flash-tts --voice Zephyr -o output.wav "Your text here"
+```
+
+If no voice is specified, the default voice "Kore" will be used.
+
+## Available Voices
+
+Gemini TTS supports 30+ different voices, each with unique characteristics:
+
+### Popular Voices
+
+- **Kore** - Firm and confident (default)
+- **Charon** - Informative and clear
+- **Puck** - Upbeat and energetic
+- **Zephyr** - Bright and cheerful
+- **Leda** - Youthful and energetic
+- **Aoede** - Breezy and natural
+
+### Complete Voice List
+
+- Kore, Charon, Puck, Fenrir, Aoede, Leda, Orus, Zephyr
+- Autonoe, Callirhoe, Despina, Erinome, Gacrux, Laomedeia
+- Pulcherrima, Sulafat, Vindemiatrix, Achernar, Achird
+- Algenib, Algieba, Alnilam, Enceladus, Iapetus, Rasalgethi
+- Sadachbia, Zubenelgenubi, Vega, Capella, Lyra
+
+### Listing Available Voices
+
+To see all available voices with descriptions:
+
+```bash
+# List all voices with characteristics
+fabric --list-gemini-voices
+
+# List voice names only (for shell completion)
+fabric --list-gemini-voices --shell-complete-list
+```
+
+## Rate Limits
+
+Google Gemini TTS has usage quotas that vary by plan:
+
+### Free Tier
+
+- **15 requests per day** per project per TTS model
+- Quota resets daily
+- Applies to all TTS models (e.g., `gemini-2.5-flash-preview-tts`)
+
+### Rate Limit Errors
+
+If you exceed your quota, you'll see an error like:
+
+```text
+Error 429: You exceeded your current quota, please check your plan and billing details
+```
+
+**Solutions:**
+
+- Wait for daily quota reset (typically at midnight UTC)
+- Upgrade to a paid plan for higher limits
+- Use TTS generation strategically for important content
+
+For current rate limits and pricing, visit: <https://ai.google.dev/gemini-api/docs/rate-limits>
+
+## Configuration
+
+### Command Line Options
+
+- `--voice <voice_name>` - Specify the TTS voice to use
+- `-o <filename.wav>` - Output audio file (required for TTS models)
+- `-m <tts_model>` - Specify a TTS-capable model (e.g., `gemini-2.0-flash-tts`)
+
+### YAML Configuration
+
+You can also set a default voice in your Fabric configuration file (`~/.config/fabric/config.yaml`):
+
+```yaml
+voice: "Charon"  # Set your preferred default voice
+```
+
+## Requirements
+
+- Valid Google Gemini API key configured in Fabric
+- TTS-capable Gemini model (models containing "tts" in the name)
+- Audio output must be specified with `-o filename.wav`
+
+## Troubleshooting
+
+### Common Issues
+
+#### Error: "TTS model requires audio output"
+
+- Solution: Always specify an output file with `-o filename.wav` when using TTS models
+
+#### Error: "Invalid voice 'X'"
+
+- Solution: Check that the voice name is spelled correctly and matches one of the supported voices listed above
+
+#### Error: "TTS generation failed"
+
+- Solution: Verify your Gemini API key is valid and you have sufficient quota
+
+### Getting Help
+
+For additional help with TTS features:
+
+```bash
+fabric --help
+```
+
+## Technical Details
+
+- **Audio Format**: WAV files with 24kHz sample rate, 16-bit depth, mono channel
+- **Language Support**: Automatic language detection for 24+ languages
+- **Model Requirements**: Models must contain "tts", "preview-tts", or "text-to-speech" in the name
+- **Voice Selection**: Uses Google's PrebuiltVoiceConfig system for consistent voice quality
+
+---
+
+For more information about Fabric, visit the [main documentation](../README.md).
--- a/docs/voices/README.md
+++ b/docs/voices/README.md
@@ -0,0 +1,36 @@
+# Voice Samples
+
+This directory contains sample audio files demonstrating different Gemini TTS voices.
+
+## Sample Files
+
+Each voice sample says "The quick brown fox jumped over the lazy dog" to demonstrate the voice characteristics:
+
+- **Kore.wav** - Firm and confident (default voice)
+- **Charon.wav** - Informative and clear
+- **Vega.wav** - Smooth and pleasant
+- **Capella.wav** - Warm and welcoming
+- **Achird.wav** - Friendly and approachable
+- **Lyra.wav** - Melodic and expressive
+
+## Generating Samples
+
+To generate these samples, use the following commands:
+
+```bash
+# Generate each voice sample
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Kore -o docs/voices/Kore.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Charon -o docs/voices/Charon.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Vega -o docs/voices/Vega.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Capella -o docs/voices/Capella.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Achird -o docs/voices/Achird.wav
+echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Lyra -o docs/voices/Lyra.wav
+```
+
+## Audio Format
+
+- **Format**: WAV (uncompressed)
+- **Sample Rate**: 24kHz
+- **Bit Depth**: 16-bit
+- **Channels**: Mono
+- **Approximate Size**: ~500KB per sample
--- a/internal/cli/flags.go
+++ b/internal/cli/flags.go
@@ -87,6 +87,8 @@ type Flags struct {
 	ThinkStartTag                   string            `long:"think-start-tag" yaml:"thinkStartTag" description:"Start tag for thinking sections" default:"<think>"`
 	ThinkEndTag                     string            `long:"think-end-tag" yaml:"thinkEndTag" description:"End tag for thinking sections" default:"</think>"`
 	DisableResponsesAPI             bool              `long:"disable-responses-api" yaml:"disableResponsesAPI" description:"Disable OpenAI Responses API (default: false)"`
+	Voice                           string            `long:"voice" yaml:"voice" description:"TTS voice name for supported models (e.g., Kore, Charon, Puck)" default:"Kore"`
+	ListGeminiVoices                bool              `long:"list-gemini-voices" description:"List all available Gemini TTS voices"`
 }

 var debug = false
@@ -441,6 +443,7 @@ func (o *Flags) BuildChatOptions() (ret *domain.ChatOptions, err error) {
 		SuppressThink:      o.SuppressThink,
 		ThinkStartTag:      startTag,
 		ThinkEndTag:        endTag,
+		Voice:              o.Voice,
 	}
 	return
 }
--- a/internal/cli/listing.go
+++ b/internal/cli/listing.go
@@ -1,11 +1,13 @@
 package cli

 import (
+	"fmt"
 	"os"
 	"strconv"

 	"github.com/danielmiessler/fabric/internal/core"
 	"github.com/danielmiessler/fabric/internal/plugins/ai"
+	"github.com/danielmiessler/fabric/internal/plugins/ai/gemini"
 	"github.com/danielmiessler/fabric/internal/plugins/db/fsdb"
 )

@@ -58,5 +60,11 @@ func handleListingCommands(currentFlags *Flags, fabricDb *fsdb.Db, registry *cor
 		return true, err
 	}

+	if currentFlags.ListGeminiVoices {
+		voicesList := gemini.ListGeminiVoices(currentFlags.ShellCompleteOutput)
+		fmt.Print(voicesList)
+		return true, nil
+	}
+
 	return false, nil
 }
--- a/internal/domain/domain.go
+++ b/internal/domain/domain.go
@@ -38,6 +38,7 @@ type ChatOptions struct {
 	ThinkEndTag        string
 	AudioOutput        bool
 	AudioFormat        string
+	Voice              string
 }

 // NormalizeMessages remove empty messages and ensure messages order user-assist-user
--- a/internal/plugins/ai/gemini/gemini.go
+++ b/internal/plugins/ai/gemini/gemini.go
@@ -194,6 +194,12 @@ func (o *Client) generateTTSAudio(ctx context.Context, msgs []*chat.ChatCompleti
 		return "", err
 	}

+	// Validate voice name before making API call
+	if opts.Voice != "" && !IsValidGeminiVoice(opts.Voice) {
+		validVoices := GetGeminiVoiceNames()
+		return "", fmt.Errorf("invalid voice '%s'. Valid voices are: %v", opts.Voice, validVoices)
+	}
+
 	client, err := o.createGenaiClient(ctx)
 	if err != nil {
 		return "", err
@@ -211,12 +217,17 @@ func (o *Client) performTTSGeneration(ctx context.Context, client *genai.Client,
 	}}

 	// Configure for TTS generation
+	voiceName := opts.Voice
+	if voiceName == "" {
+		voiceName = "Kore" // Default voice if none specified
+	}
+
 	config := &genai.GenerateContentConfig{
 		ResponseModalities: []string{"AUDIO"},
 		SpeechConfig: &genai.SpeechConfig{
 			VoiceConfig: &genai.VoiceConfig{
 				PrebuiltVoiceConfig: &genai.PrebuiltVoiceConfig{
-					VoiceName: "Kore", // Default voice
+					VoiceName: voiceName,
 				},
 			},
 		},
--- a/internal/plugins/ai/gemini/voices.go
+++ b/internal/plugins/ai/gemini/voices.go
@@ -0,0 +1,218 @@
+package gemini
+
+import (
+	"fmt"
+	"sort"
+)
+
+// GeminiVoice represents a Gemini TTS voice with its characteristics
+type GeminiVoice struct {
+	Name            string
+	Description     string
+	Characteristics []string
+}
+
+// GetGeminiVoices returns the current list of supported Gemini TTS voices
+// This list is maintained based on official Google Gemini documentation
+// https://ai.google.dev/gemini-api/docs/speech-generation
+func GetGeminiVoices() []GeminiVoice {
+	return []GeminiVoice{
+		// Firm voices
+		{Name: "Kore", Description: "Firm and confident", Characteristics: []string{"firm", "confident", "default"}},
+		{Name: "Orus", Description: "Firm and decisive", Characteristics: []string{"firm", "decisive"}},
+		{Name: "Alnilam", Description: "Firm and strong", Characteristics: []string{"firm", "strong"}},
+
+		// Upbeat voices
+		{Name: "Puck", Description: "Upbeat and energetic", Characteristics: []string{"upbeat", "energetic"}},
+		{Name: "Laomedeia", Description: "Upbeat and lively", Characteristics: []string{"upbeat", "lively"}},
+
+		// Bright voices
+		{Name: "Zephyr", Description: "Bright and cheerful", Characteristics: []string{"bright", "cheerful"}},
+		{Name: "Autonoe", Description: "Bright and optimistic", Characteristics: []string{"bright", "optimistic"}},
+
+		// Informative voices
+		{Name: "Charon", Description: "Informative and clear", Characteristics: []string{"informative", "clear"}},
+		{Name: "Rasalgethi", Description: "Informative and professional", Characteristics: []string{"informative", "professional"}},
+
+		// Natural voices
+		{Name: "Aoede", Description: "Breezy and natural", Characteristics: []string{"breezy", "natural"}},
+		{Name: "Leda", Description: "Youthful and energetic", Characteristics: []string{"youthful", "energetic"}},
+
+		// Gentle voices
+		{Name: "Vindemiatrix", Description: "Gentle and kind", Characteristics: []string{"gentle", "kind"}},
+		{Name: "Achernar", Description: "Soft and gentle", Characteristics: []string{"soft", "gentle"}},
+		{Name: "Enceladus", Description: "Breathy and soft", Characteristics: []string{"breathy", "soft"}},
+
+		// Warm voices
+		{Name: "Sulafat", Description: "Warm and welcoming", Characteristics: []string{"warm", "welcoming"}},
+		{Name: "Capella", Description: "Warm and approachable", Characteristics: []string{"warm", "approachable"}},
+
+		// Clear voices
+		{Name: "Iapetus", Description: "Clear and articulate", Characteristics: []string{"clear", "articulate"}},
+		{Name: "Erinome", Description: "Clear and precise", Characteristics: []string{"clear", "precise"}},
+
+		// Pleasant voices
+		{Name: "Algieba", Description: "Smooth and pleasant", Characteristics: []string{"smooth", "pleasant"}},
+		{Name: "Vega", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}},
+
+		// Textured voices
+		{Name: "Algenib", Description: "Gravelly texture", Characteristics: []string{"gravelly", "textured"}},
+
+		// Relaxed voices
+		{Name: "Callirrhoe", Description: "Easy-going and relaxed", Characteristics: []string{"relaxed", "easy-going"}},
+		{Name: "Despina", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}},
+
+		// Mature voices
+		{Name: "Gacrux", Description: "Mature and experienced", Characteristics: []string{"mature", "experienced"}},
+
+		// Expressive voices
+		{Name: "Pulcherrima", Description: "Forward and expressive", Characteristics: []string{"forward", "expressive"}},
+		{Name: "Lyra", Description: "Melodic and expressive", Characteristics: []string{"melodic", "expressive"}},
+
+		// Dynamic voices
+		{Name: "Fenrir", Description: "Excitable and dynamic", Characteristics: []string{"excitable", "dynamic"}},
+		{Name: "Sadachbia", Description: "Lively and animated", Characteristics: []string{"lively", "animated"}},
+
+		// Friendly voices
+		{Name: "Achird", Description: "Friendly and approachable", Characteristics: []string{"friendly", "approachable"}},
+
+		// Casual voices
+		{Name: "Zubenelgenubi", Description: "Casual and conversational", Characteristics: []string{"casual", "conversational"}},
+
+		// Additional voices from latest API
+		{Name: "Sadaltager", Description: "Additional voice option", Characteristics: []string{"additional"}},
+		{Name: "Schedar", Description: "Additional voice option", Characteristics: []string{"additional"}},
+		{Name: "Umbriel", Description: "Additional voice option", Characteristics: []string{"additional"}},
+	}
+}
+
+// GetGeminiVoiceNames returns just the voice names in alphabetical order
+func GetGeminiVoiceNames() []string {
+	voices := GetGeminiVoices()
+	names := make([]string, len(voices))
+	for i, voice := range voices {
+		names[i] = voice.Name
+	}
+	sort.Strings(names)
+	return names
+}
+
+// IsValidGeminiVoice checks if a voice name is valid
+func IsValidGeminiVoice(voiceName string) bool {
+	if voiceName == "" {
+		return true // Empty voice is valid (will use default)
+	}
+
+	for _, voice := range GetGeminiVoices() {
+		if voice.Name == voiceName {
+			return true
+		}
+	}
+	return false
+}
+
+// GetGeminiVoiceByName returns a specific voice by name
+func GetGeminiVoiceByName(name string) (*GeminiVoice, error) {
+	for _, voice := range GetGeminiVoices() {
+		if voice.Name == name {
+			return &voice, nil
+		}
+	}
+	return nil, fmt.Errorf("voice '%s' not found", name)
+}
+
+// ListGeminiVoices formats the voice list for display
+func ListGeminiVoices(shellCompleteMode bool) string {
+	if shellCompleteMode {
+		// For shell completion, just return voice names
+		names := GetGeminiVoiceNames()
+		result := ""
+		for _, name := range names {
+			result += name + "\n"
+		}
+		return result
+	}
+
+	// For human-readable output
+	voices := GetGeminiVoices()
+	result := "Available Gemini Text-to-Speech voices:\n\n"
+
+	// Group by characteristics for better readability
+	groups := map[string][]GeminiVoice{
+		"Firm & Confident":     {},
+		"Bright & Cheerful":    {},
+		"Warm & Welcoming":     {},
+		"Clear & Professional": {},
+		"Natural & Expressive": {},
+		"Other Voices":         {},
+	}
+
+	for _, voice := range voices {
+		placed := false
+		for _, char := range voice.Characteristics {
+			switch char {
+			case "firm", "confident", "decisive", "strong":
+				if !placed {
+					groups["Firm & Confident"] = append(groups["Firm & Confident"], voice)
+					placed = true
+				}
+			case "bright", "cheerful", "upbeat", "energetic", "lively":
+				if !placed {
+					groups["Bright & Cheerful"] = append(groups["Bright & Cheerful"], voice)
+					placed = true
+				}
+			case "warm", "welcoming", "friendly", "approachable":
+				if !placed {
+					groups["Warm & Welcoming"] = append(groups["Warm & Welcoming"], voice)
+					placed = true
+				}
+			case "clear", "informative", "professional", "articulate":
+				if !placed {
+					groups["Clear & Professional"] = append(groups["Clear & Professional"], voice)
+					placed = true
+				}
+			case "natural", "expressive", "melodic", "breezy":
+				if !placed {
+					groups["Natural & Expressive"] = append(groups["Natural & Expressive"], voice)
+					placed = true
+				}
+			}
+		}
+		if !placed {
+			groups["Other Voices"] = append(groups["Other Voices"], voice)
+		}
+	}
+
+	// Output grouped voices
+	for groupName, groupVoices := range groups {
+		if len(groupVoices) > 0 {
+			result += fmt.Sprintf("%s:\n", groupName)
+			for _, voice := range groupVoices {
+				defaultStr := ""
+				if voice.Name == "Kore" {
+					defaultStr = " (default)"
+				}
+				result += fmt.Sprintf("  %-15s - %s%s\n", voice.Name, voice.Description, defaultStr)
+			}
+			result += "\n"
+		}
+	}
+
+	result += "Use --voice <voice_name> to select a specific voice.\n"
+	result += "Example: fabric --voice Charon -m gemini-2.0-flash-tts -o output.wav \"Hello world\"\n"
+
+	return result
+}
+
+// NOTE: This implementation maintains a curated list based on official Google documentation.
+// In the future, if Google provides a dynamic voice discovery API, this can be updated
+// to make API calls for real-time voice discovery.
+//
+// The current approach ensures:
+// 1. Fast response times (no API calls needed)
+// 2. Reliable voice information with descriptions
+// 3. Easy maintenance when new voices are added
+// 4. Offline functionality
+//
+// To update voices: Monitor Google's Gemini TTS documentation at:
+// https://ai.google.dev/gemini-api/docs/speech-generation