feat: add Gemini TTS voice selection and listing functionality

## CHANGES

- Add `--voice` flag for TTS voice selection
- Add `--list-gemini-voices` command for voice discovery
- Implement voice validation for Gemini TTS models
- Update shell completions for voice options
- Add comprehensive Gemini TTS documentation
- Create voice samples directory structure
- Extend spell checker dictionary with voice names
This commit is contained in:
Kayvan Sylvan
2025-07-26 15:11:30 -07:00
parent eab335873e
commit 614b1322d5
12 changed files with 474 additions and 11 deletions

16
.vscode/settings.json vendored
View File

@@ -1,23 +1,32 @@
{
"cSpell.words": [
"Achird",
"addextension",
"adduser",
"AIML",
"anthropics",
"Aoede",
"atotto",
"Autonoe",
"badfile",
"Behrens",
"blindspots",
"Bombal",
"Callirhoe",
"Callirrhoe",
"Cerebras",
"compadd",
"compdef",
"compinit",
"creatordate",
"curcontext",
"custompatterns",
"danielmiessler",
"davidanson",
"Debugf",
"dedup",
"deepseek",
"Despina",
"direnv",
"dryrun",
"dsrp",
@@ -25,6 +34,7 @@
"Eisler",
"elif",
"envrc",
"Erinome",
"Errorf",
"eugeis",
"Eugen",
@@ -66,6 +76,7 @@
"Kore",
"ksylvan",
"Langdock",
"Laomedeia",
"ldflags",
"libexec",
"listcontexts",
@@ -89,6 +100,7 @@
"openaiapi",
"opencode",
"openrouter",
"Orus",
"otiai",
"pdflatex",
"pipx",
@@ -97,11 +109,14 @@
"presencepenalty",
"printcontext",
"printsession",
"Pulcherrima",
"pycache",
"pyperclip",
"readystream",
"restapi",
"rmextension",
"Sadachbia",
"Sadaltager",
"samber",
"sashabaranov",
"sdist",
@@ -112,6 +127,7 @@
"Streamlit",
"stretchr",
"subchunk",
"Sulafat",
"talkpanel",
"Telos",
"testpattern",

View File

@@ -548,6 +548,9 @@ Application Options:
--think-start-tag= Start tag for thinking sections (default: <think>)
--think-end-tag= End tag for thinking sections (default: </think>)
--disable-responses-api Disable OpenAI Responses API (default: false)
--voice= TTS voice name for supported models (e.g., Kore, Charon, Puck)
(default: Kore)
--list-gemini-voices List all available Gemini TTS voices
Help Options:
-h, --help Show this help message

View File

@@ -14,16 +14,19 @@ _fabric_models() {
models=(${(f)"$(fabric --listmodels --shell-complete-list 2>/dev/null)"})
compadd -X "Models:" ${models}
}
_fabric_contexts() {
local -a contexts
contexts=(${(f)"$(fabric --listcontexts --shell-complete-list 2>/dev/null)"})
compadd -X "Contexts:" ${contexts}
}
_fabric_sessions() {
local -a sessions
sessions=(${(f)"$(fabric --listsessions --shell-complete-list 2>/dev/null)"})
compadd -X "Sessions:" ${sessions}
}
_fabric_strategies() {
local -a strategies
strategies=(${(f)"$(fabric --liststrategies --shell-complete-list 2>/dev/null)"})
@@ -34,14 +37,12 @@ _fabric_extensions() {
local -a extensions
extensions=(${(f)"$(fabric --listextensions --shell-complete-list 2>/dev/null)"})
compadd -X "Extensions:" ${extensions}
'(-L --listmodels)'{-L,--listmodels}'[List all available models]:list models:_fabric_models' \
'(-x --listcontexts)'{-x,--listcontexts}'[List all contexts]:list contexts:_fabric_contexts' \
'(-X --listsessions)'{-X,--listsessions}'[List all sessions]:list sessions:_fabric_sessions' \
'(--listextensions)--listextensions[List all registered extensions]' \
'(--liststrategies)--liststrategies[List all strategies]:list strategies:_fabric_strategies' \
'(--listvendors)--listvendors[List all vendors]' \
vendors=(${(f)"$(fabric --listvendors 2>/dev/null)"})
compadd -X "Vendors:" ${vendors}
}
_fabric_gemini_voices() {
local -a voices
voices=(${(f)"$(fabric --list-gemini-voices --shell-complete-list 2>/dev/null)"})
compadd -X "Gemini TTS Voices:" ${voices}
}
_fabric() {
@@ -109,6 +110,8 @@ _fabric() {
'(--strategy)--strategy[Choose a strategy from the available strategies]:strategy:_fabric_strategies' \
'(--liststrategies)--liststrategies[List all strategies]' \
'(--listvendors)--listvendors[List all vendors]' \
'(--voice)--voice[TTS voice name for supported models]:voice:_fabric_gemini_voices' \
'(--list-gemini-voices)--list-gemini-voices[List all available Gemini TTS voices]' \
'(--shell-complete-list)--shell-complete-list[Output raw list without headers/formatting (for shell completion)]' \
'(--suppress-think)--suppress-think[Suppress text enclosed in thinking tags]' \
'(--think-start-tag)--think-start-tag[Start tag for thinking sections (default: <think>)]:start tag:' \
@@ -119,4 +122,3 @@ _fabric() {
}
_fabric "$@"

View File

@@ -13,7 +13,7 @@ _fabric() {
_get_comp_words_by_ref -n : cur prev words cword
# Define all possible options/flags
local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
# Helper function for dynamic completions
_fabric_get_list() {
@@ -62,6 +62,10 @@ _fabric() {
COMPREPLY=($(compgen -W "$(_fabric_get_list --liststrategies)" -- "${cur}"))
return 0
;;
--voice)
COMPREPLY=($(compgen -W "$(_fabric_get_list --list-gemini-voices)" -- "${cur}"))
return 0
;;
# Options requiring file/directory paths
-a | --attachment | -o | --output | --config | --addextension | --image-file)
_filedir

View File

@@ -31,6 +31,10 @@ function __fabric_get_extensions
fabric --listextensions --shell-complete-list 2>/dev/null
end
function __fabric_get_gemini_voices
fabric --list-gemini-voices --shell-complete-list 2>/dev/null
end
# Main completion function
complete -c fabric -f
@@ -71,6 +75,7 @@ complete -c fabric -l rmextension -d "Remove a registered extension by name" -a
complete -c fabric -l strategy -d "Choose a strategy from the available strategies" -a "(__fabric_get_strategies)"
complete -c fabric -l think-start-tag -d "Start tag for thinking sections (default: <think>)"
complete -c fabric -l think-end-tag -d "End tag for thinking sections (default: </think>)"
complete -c fabric -l voice -d "TTS voice name for supported models (e.g., Kore, Charon, Puck)" -a "(__fabric_get_gemini_voices)"
# Boolean flags (no arguments)
complete -c fabric -s S -l setup -d "Run setup for all reconfigurable parts of fabric"
@@ -99,6 +104,7 @@ complete -c fabric -l version -d "Print current version"
complete -c fabric -l listextensions -d "List all registered extensions"
complete -c fabric -l liststrategies -d "List all strategies"
complete -c fabric -l listvendors -d "List all vendors"
complete -c fabric -l list-gemini-voices -d "List all available Gemini TTS voices"
complete -c fabric -l shell-complete-list -d "Output raw list without headers/formatting (for shell completion)"
complete -c fabric -l suppress-think -d "Suppress text enclosed in thinking tags"
complete -c fabric -l disable-responses-api -d "Disable OpenAI Responses API (default: false)"

155
docs/Gemini-TTS.md Normal file
View File

@@ -0,0 +1,155 @@
# Gemini Text-to-Speech (TTS) Guide
Fabric supports Google Gemini's text-to-speech (TTS) capabilities, allowing you to convert text into high-quality audio using various AI-generated voices.
## Overview
The Gemini TTS feature in Fabric allows you to:
- Convert text input into audio using Google's Gemini TTS models
- Choose from 30+ different AI voices with varying characteristics
- Generate high-quality WAV audio files
- Integrate TTS generation into your existing Fabric workflows
## Usage
### Basic TTS Generation
To generate audio from text using TTS:
```bash
# Basic TTS with default voice (Kore)
echo "Hello, this is a test of Gemini TTS" | fabric -m gemini-2.0-flash-tts -o output.wav
# Using a specific voice
echo "Hello, this is a test with the Charon voice" | fabric -m gemini-2.0-flash-tts --voice Charon -o output.wav
# Using TTS with a pattern
fabric -p summarize --voice Puck -m gemini-2.0-flash-tts -o summary.wav < document.txt
```
### Voice Selection
Use the `--voice` flag to specify which voice to use for TTS generation:
```bash
fabric -m gemini-2.0-flash-tts --voice Zephyr -o output.wav "Your text here"
```
If no voice is specified, the default voice "Kore" will be used.
## Available Voices
Gemini TTS supports 30+ different voices, each with unique characteristics:
### Popular Voices
- **Kore** - Firm and confident (default)
- **Charon** - Informative and clear
- **Puck** - Upbeat and energetic
- **Zephyr** - Bright and cheerful
- **Leda** - Youthful and energetic
- **Aoede** - Breezy and natural
### Complete Voice List
- Kore, Charon, Puck, Fenrir, Aoede, Leda, Orus, Zephyr
- Autonoe, Callirhoe, Despina, Erinome, Gacrux, Laomedeia
- Pulcherrima, Sulafat, Vindemiatrix, Achernar, Achird
- Algenib, Algieba, Alnilam, Enceladus, Iapetus, Rasalgethi
- Sadachbia, Zubenelgenubi, Vega, Capella, Lyra
### Listing Available Voices
To see all available voices with descriptions:
```bash
# List all voices with characteristics
fabric --list-gemini-voices
# List voice names only (for shell completion)
fabric --list-gemini-voices --shell-complete-list
```
## Rate Limits
Google Gemini TTS has usage quotas that vary by plan:
### Free Tier
- **15 requests per day** per project per TTS model
- Quota resets daily
- Applies to all TTS models (e.g., `gemini-2.5-flash-preview-tts`)
### Rate Limit Errors
If you exceed your quota, you'll see an error like:
```text
Error 429: You exceeded your current quota, please check your plan and billing details
```
**Solutions:**
- Wait for daily quota reset (typically at midnight UTC)
- Upgrade to a paid plan for higher limits
- Use TTS generation strategically for important content
For current rate limits and pricing, visit: <https://ai.google.dev/gemini-api/docs/rate-limits>
## Configuration
### Command Line Options
- `--voice <voice_name>` - Specify the TTS voice to use
- `-o <filename.wav>` - Output audio file (required for TTS models)
- `-m <tts_model>` - Specify a TTS-capable model (e.g., `gemini-2.0-flash-tts`)
### YAML Configuration
You can also set a default voice in your Fabric configuration file (`~/.config/fabric/config.yaml`):
```yaml
voice: "Charon" # Set your preferred default voice
```
## Requirements
- Valid Google Gemini API key configured in Fabric
- TTS-capable Gemini model (models containing "tts" in the name)
- Audio output must be specified with `-o filename.wav`
## Troubleshooting
### Common Issues
#### Error: "TTS model requires audio output"
- Solution: Always specify an output file with `-o filename.wav` when using TTS models
#### Error: "Invalid voice 'X'"
- Solution: Check that the voice name is spelled correctly and matches one of the supported voices listed above
#### Error: "TTS generation failed"
- Solution: Verify your Gemini API key is valid and you have sufficient quota
### Getting Help
For additional help with TTS features:
```bash
fabric --help
```
## Technical Details
- **Audio Format**: WAV files with 24kHz sample rate, 16-bit depth, mono channel
- **Language Support**: Automatic language detection for 24+ languages
- **Model Requirements**: Models must contain "tts", "preview-tts", or "text-to-speech" in the name
- **Voice Selection**: Uses Google's PrebuiltVoiceConfig system for consistent voice quality
---
For more information about Fabric, visit the [main documentation](../README.md).

36
docs/voices/README.md Normal file
View File

@@ -0,0 +1,36 @@
# Voice Samples
This directory contains sample audio files demonstrating different Gemini TTS voices.
## Sample Files
Each voice sample says "The quick brown fox jumped over the lazy dog" to demonstrate the voice characteristics:
- **Kore.wav** - Firm and confident (default voice)
- **Charon.wav** - Informative and clear
- **Vega.wav** - Smooth and pleasant
- **Capella.wav** - Warm and welcoming
- **Achird.wav** - Friendly and approachable
- **Lyra.wav** - Melodic and expressive
## Generating Samples
To generate these samples, use the following commands:
```bash
# Generate each voice sample
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Kore -o docs/voices/Kore.wav
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Charon -o docs/voices/Charon.wav
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Vega -o docs/voices/Vega.wav
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Capella -o docs/voices/Capella.wav
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Achird -o docs/voices/Achird.wav
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Lyra -o docs/voices/Lyra.wav
```
## Audio Format
- **Format**: WAV (uncompressed)
- **Sample Rate**: 24kHz
- **Bit Depth**: 16-bit
- **Channels**: Mono
- **Approximate Size**: ~500KB per sample

View File

@@ -87,6 +87,8 @@ type Flags struct {
ThinkStartTag string `long:"think-start-tag" yaml:"thinkStartTag" description:"Start tag for thinking sections" default:"<think>"`
ThinkEndTag string `long:"think-end-tag" yaml:"thinkEndTag" description:"End tag for thinking sections" default:"</think>"`
DisableResponsesAPI bool `long:"disable-responses-api" yaml:"disableResponsesAPI" description:"Disable OpenAI Responses API (default: false)"`
Voice string `long:"voice" yaml:"voice" description:"TTS voice name for supported models (e.g., Kore, Charon, Puck)" default:"Kore"`
ListGeminiVoices bool `long:"list-gemini-voices" description:"List all available Gemini TTS voices"`
}
var debug = false
@@ -441,6 +443,7 @@ func (o *Flags) BuildChatOptions() (ret *domain.ChatOptions, err error) {
SuppressThink: o.SuppressThink,
ThinkStartTag: startTag,
ThinkEndTag: endTag,
Voice: o.Voice,
}
return
}

View File

@@ -1,11 +1,13 @@
package cli
import (
"fmt"
"os"
"strconv"
"github.com/danielmiessler/fabric/internal/core"
"github.com/danielmiessler/fabric/internal/plugins/ai"
"github.com/danielmiessler/fabric/internal/plugins/ai/gemini"
"github.com/danielmiessler/fabric/internal/plugins/db/fsdb"
)
@@ -58,5 +60,11 @@ func handleListingCommands(currentFlags *Flags, fabricDb *fsdb.Db, registry *cor
return true, err
}
if currentFlags.ListGeminiVoices {
voicesList := gemini.ListGeminiVoices(currentFlags.ShellCompleteOutput)
fmt.Print(voicesList)
return true, nil
}
return false, nil
}

View File

@@ -38,6 +38,7 @@ type ChatOptions struct {
ThinkEndTag string
AudioOutput bool
AudioFormat string
Voice string
}
// NormalizeMessages remove empty messages and ensure messages order user-assist-user

View File

@@ -194,6 +194,12 @@ func (o *Client) generateTTSAudio(ctx context.Context, msgs []*chat.ChatCompleti
return "", err
}
// Validate voice name before making API call
if opts.Voice != "" && !IsValidGeminiVoice(opts.Voice) {
validVoices := GetGeminiVoiceNames()
return "", fmt.Errorf("invalid voice '%s'. Valid voices are: %v", opts.Voice, validVoices)
}
client, err := o.createGenaiClient(ctx)
if err != nil {
return "", err
@@ -211,12 +217,17 @@ func (o *Client) performTTSGeneration(ctx context.Context, client *genai.Client,
}}
// Configure for TTS generation
voiceName := opts.Voice
if voiceName == "" {
voiceName = "Kore" // Default voice if none specified
}
config := &genai.GenerateContentConfig{
ResponseModalities: []string{"AUDIO"},
SpeechConfig: &genai.SpeechConfig{
VoiceConfig: &genai.VoiceConfig{
PrebuiltVoiceConfig: &genai.PrebuiltVoiceConfig{
VoiceName: "Kore", // Default voice
VoiceName: voiceName,
},
},
},

View File

@@ -0,0 +1,218 @@
package gemini
import (
"fmt"
"sort"
)
// GeminiVoice represents a Gemini TTS voice with its characteristics
type GeminiVoice struct {
Name string
Description string
Characteristics []string
}
// GetGeminiVoices returns the current list of supported Gemini TTS voices
// This list is maintained based on official Google Gemini documentation
// https://ai.google.dev/gemini-api/docs/speech-generation
func GetGeminiVoices() []GeminiVoice {
return []GeminiVoice{
// Firm voices
{Name: "Kore", Description: "Firm and confident", Characteristics: []string{"firm", "confident", "default"}},
{Name: "Orus", Description: "Firm and decisive", Characteristics: []string{"firm", "decisive"}},
{Name: "Alnilam", Description: "Firm and strong", Characteristics: []string{"firm", "strong"}},
// Upbeat voices
{Name: "Puck", Description: "Upbeat and energetic", Characteristics: []string{"upbeat", "energetic"}},
{Name: "Laomedeia", Description: "Upbeat and lively", Characteristics: []string{"upbeat", "lively"}},
// Bright voices
{Name: "Zephyr", Description: "Bright and cheerful", Characteristics: []string{"bright", "cheerful"}},
{Name: "Autonoe", Description: "Bright and optimistic", Characteristics: []string{"bright", "optimistic"}},
// Informative voices
{Name: "Charon", Description: "Informative and clear", Characteristics: []string{"informative", "clear"}},
{Name: "Rasalgethi", Description: "Informative and professional", Characteristics: []string{"informative", "professional"}},
// Natural voices
{Name: "Aoede", Description: "Breezy and natural", Characteristics: []string{"breezy", "natural"}},
{Name: "Leda", Description: "Youthful and energetic", Characteristics: []string{"youthful", "energetic"}},
// Gentle voices
{Name: "Vindemiatrix", Description: "Gentle and kind", Characteristics: []string{"gentle", "kind"}},
{Name: "Achernar", Description: "Soft and gentle", Characteristics: []string{"soft", "gentle"}},
{Name: "Enceladus", Description: "Breathy and soft", Characteristics: []string{"breathy", "soft"}},
// Warm voices
{Name: "Sulafat", Description: "Warm and welcoming", Characteristics: []string{"warm", "welcoming"}},
{Name: "Capella", Description: "Warm and approachable", Characteristics: []string{"warm", "approachable"}},
// Clear voices
{Name: "Iapetus", Description: "Clear and articulate", Characteristics: []string{"clear", "articulate"}},
{Name: "Erinome", Description: "Clear and precise", Characteristics: []string{"clear", "precise"}},
// Pleasant voices
{Name: "Algieba", Description: "Smooth and pleasant", Characteristics: []string{"smooth", "pleasant"}},
{Name: "Vega", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}},
// Textured voices
{Name: "Algenib", Description: "Gravelly texture", Characteristics: []string{"gravelly", "textured"}},
// Relaxed voices
{Name: "Callirrhoe", Description: "Easy-going and relaxed", Characteristics: []string{"relaxed", "easy-going"}},
{Name: "Despina", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}},
// Mature voices
{Name: "Gacrux", Description: "Mature and experienced", Characteristics: []string{"mature", "experienced"}},
// Expressive voices
{Name: "Pulcherrima", Description: "Forward and expressive", Characteristics: []string{"forward", "expressive"}},
{Name: "Lyra", Description: "Melodic and expressive", Characteristics: []string{"melodic", "expressive"}},
// Dynamic voices
{Name: "Fenrir", Description: "Excitable and dynamic", Characteristics: []string{"excitable", "dynamic"}},
{Name: "Sadachbia", Description: "Lively and animated", Characteristics: []string{"lively", "animated"}},
// Friendly voices
{Name: "Achird", Description: "Friendly and approachable", Characteristics: []string{"friendly", "approachable"}},
// Casual voices
{Name: "Zubenelgenubi", Description: "Casual and conversational", Characteristics: []string{"casual", "conversational"}},
// Additional voices from latest API
{Name: "Sadaltager", Description: "Additional voice option", Characteristics: []string{"additional"}},
{Name: "Schedar", Description: "Additional voice option", Characteristics: []string{"additional"}},
{Name: "Umbriel", Description: "Additional voice option", Characteristics: []string{"additional"}},
}
}
// GetGeminiVoiceNames returns just the voice names in alphabetical order
func GetGeminiVoiceNames() []string {
voices := GetGeminiVoices()
names := make([]string, len(voices))
for i, voice := range voices {
names[i] = voice.Name
}
sort.Strings(names)
return names
}
// IsValidGeminiVoice checks if a voice name is valid
func IsValidGeminiVoice(voiceName string) bool {
if voiceName == "" {
return true // Empty voice is valid (will use default)
}
for _, voice := range GetGeminiVoices() {
if voice.Name == voiceName {
return true
}
}
return false
}
// GetGeminiVoiceByName returns a specific voice by name
func GetGeminiVoiceByName(name string) (*GeminiVoice, error) {
for _, voice := range GetGeminiVoices() {
if voice.Name == name {
return &voice, nil
}
}
return nil, fmt.Errorf("voice '%s' not found", name)
}
// ListGeminiVoices formats the voice list for display
func ListGeminiVoices(shellCompleteMode bool) string {
if shellCompleteMode {
// For shell completion, just return voice names
names := GetGeminiVoiceNames()
result := ""
for _, name := range names {
result += name + "\n"
}
return result
}
// For human-readable output
voices := GetGeminiVoices()
result := "Available Gemini Text-to-Speech voices:\n\n"
// Group by characteristics for better readability
groups := map[string][]GeminiVoice{
"Firm & Confident": {},
"Bright & Cheerful": {},
"Warm & Welcoming": {},
"Clear & Professional": {},
"Natural & Expressive": {},
"Other Voices": {},
}
for _, voice := range voices {
placed := false
for _, char := range voice.Characteristics {
switch char {
case "firm", "confident", "decisive", "strong":
if !placed {
groups["Firm & Confident"] = append(groups["Firm & Confident"], voice)
placed = true
}
case "bright", "cheerful", "upbeat", "energetic", "lively":
if !placed {
groups["Bright & Cheerful"] = append(groups["Bright & Cheerful"], voice)
placed = true
}
case "warm", "welcoming", "friendly", "approachable":
if !placed {
groups["Warm & Welcoming"] = append(groups["Warm & Welcoming"], voice)
placed = true
}
case "clear", "informative", "professional", "articulate":
if !placed {
groups["Clear & Professional"] = append(groups["Clear & Professional"], voice)
placed = true
}
case "natural", "expressive", "melodic", "breezy":
if !placed {
groups["Natural & Expressive"] = append(groups["Natural & Expressive"], voice)
placed = true
}
}
}
if !placed {
groups["Other Voices"] = append(groups["Other Voices"], voice)
}
}
// Output grouped voices
for groupName, groupVoices := range groups {
if len(groupVoices) > 0 {
result += fmt.Sprintf("%s:\n", groupName)
for _, voice := range groupVoices {
defaultStr := ""
if voice.Name == "Kore" {
defaultStr = " (default)"
}
result += fmt.Sprintf(" %-15s - %s%s\n", voice.Name, voice.Description, defaultStr)
}
result += "\n"
}
}
result += "Use --voice <voice_name> to select a specific voice.\n"
result += "Example: fabric --voice Charon -m gemini-2.0-flash-tts -o output.wav \"Hello world\"\n"
return result
}
// NOTE: This implementation maintains a curated list based on official Google documentation.
// In the future, if Google provides a dynamic voice discovery API, this can be updated
// to make API calls for real-time voice discovery.
//
// The current approach ensures:
// 1. Fast response times (no API calls needed)
// 2. Reliable voice information with descriptions
// 3. Easy maintenance when new voices are added
// 4. Offline functionality
//
// To update voices: Monitor Google's Gemini TTS documentation at:
// https://ai.google.dev/gemini-api/docs/speech-generation