mirror of
https://github.com/danielmiessler/Fabric.git
synced 2026-01-11 15:28:07 -05:00
Compare commits
28 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
737e37f00e | ||
|
|
42bb72ab65 | ||
|
|
612ae4e3b5 | ||
|
|
27f9134912 | ||
|
|
c02718855d | ||
|
|
4f16222b31 | ||
|
|
8c27b34d0f | ||
|
|
0b71b54698 | ||
|
|
614b1322d5 | ||
|
|
eab335873e | ||
|
|
577dc9896d | ||
|
|
3a4bb4b9b2 | ||
|
|
c766915764 | ||
|
|
71c08648c6 | ||
|
|
95e2e6a5ac | ||
|
|
5cdf297d85 | ||
|
|
5d7137804a | ||
|
|
8b6b8fbd44 | ||
|
|
3e75aa260f | ||
|
|
92aca524a4 | ||
|
|
f70eff2e41 | ||
|
|
489c481acc | ||
|
|
3a1eaf375f | ||
|
|
52246dda28 | ||
|
|
3c200e2883 | ||
|
|
bda6505d5c | ||
|
|
a241c98837 | ||
|
|
12d7803044 |
29
.github/workflows/release.yml
vendored
29
.github/workflows/release.yml
vendored
@@ -93,19 +93,24 @@ jobs:
|
||||
name: fabric-windows-${{ matrix.arch }}.exe
|
||||
path: fabric-windows-${{ matrix.arch }}.exe
|
||||
|
||||
- name: Get latest tag
|
||||
if: matrix.os != 'windows-latest'
|
||||
id: get_latest_tag
|
||||
- name: Get version from source
|
||||
id: get_version
|
||||
shell: bash
|
||||
run: |
|
||||
latest_tag=$(git tag --sort=-creatordate | head -n 1)
|
||||
echo "latest_tag=$latest_tag" >> $GITHUB_ENV
|
||||
|
||||
- name: Get latest tag
|
||||
if: matrix.os == 'windows-latest'
|
||||
id: get_latest_tag_windows
|
||||
run: |
|
||||
$latest_tag = git tag --sort=-creatordate | Select-Object -First 1
|
||||
Add-Content -Path $env:GITHUB_ENV -Value "latest_tag=$latest_tag"
|
||||
if [ ! -f "nix/pkgs/fabric/version.nix" ]; then
|
||||
echo "Error: version.nix file not found"
|
||||
exit 1
|
||||
fi
|
||||
version=$(cat nix/pkgs/fabric/version.nix | tr -d '"' | tr -cd '0-9.')
|
||||
if [ -z "$version" ]; then
|
||||
echo "Error: version is empty"
|
||||
exit 1
|
||||
fi
|
||||
if ! echo "$version" | grep -E '^[0-9]+\.[0-9]+\.[0-9]+' > /dev/null; then
|
||||
echo "Error: Invalid version format: $version"
|
||||
exit 1
|
||||
fi
|
||||
echo "latest_tag=v$version" >> $GITHUB_ENV
|
||||
|
||||
- name: Create release if it doesn't exist
|
||||
shell: bash
|
||||
|
||||
@@ -89,6 +89,7 @@ jobs:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
go run ./cmd/generate_changelog --process-prs ${{ env.new_tag }}
|
||||
go run ./cmd/generate_changelog --sync-db
|
||||
- name: Commit changes
|
||||
run: |
|
||||
# These files are modified by the version bump process
|
||||
|
||||
23
.vscode/settings.json
vendored
23
.vscode/settings.json
vendored
@@ -1,21 +1,32 @@
|
||||
{
|
||||
"cSpell.words": [
|
||||
"Achird",
|
||||
"addextension",
|
||||
"adduser",
|
||||
"AIML",
|
||||
"anthropics",
|
||||
"Aoede",
|
||||
"atotto",
|
||||
"Autonoe",
|
||||
"badfile",
|
||||
"Behrens",
|
||||
"blindspots",
|
||||
"Bombal",
|
||||
"Callirhoe",
|
||||
"Callirrhoe",
|
||||
"Cerebras",
|
||||
"compadd",
|
||||
"compdef",
|
||||
"compinit",
|
||||
"creatordate",
|
||||
"curcontext",
|
||||
"custompatterns",
|
||||
"danielmiessler",
|
||||
"davidanson",
|
||||
"Debugf",
|
||||
"dedup",
|
||||
"deepseek",
|
||||
"Despina",
|
||||
"direnv",
|
||||
"dryrun",
|
||||
"dsrp",
|
||||
@@ -23,12 +34,14 @@
|
||||
"Eisler",
|
||||
"elif",
|
||||
"envrc",
|
||||
"Erinome",
|
||||
"Errorf",
|
||||
"eugeis",
|
||||
"Eugen",
|
||||
"excalidraw",
|
||||
"exolab",
|
||||
"fabriclogo",
|
||||
"flac",
|
||||
"fpath",
|
||||
"frequencypenalty",
|
||||
"fsdb",
|
||||
@@ -60,8 +73,10 @@
|
||||
"jessevdk",
|
||||
"Jina",
|
||||
"joho",
|
||||
"Kore",
|
||||
"ksylvan",
|
||||
"Langdock",
|
||||
"Laomedeia",
|
||||
"ldflags",
|
||||
"libexec",
|
||||
"listcontexts",
|
||||
@@ -85,6 +100,7 @@
|
||||
"openaiapi",
|
||||
"opencode",
|
||||
"openrouter",
|
||||
"Orus",
|
||||
"otiai",
|
||||
"pdflatex",
|
||||
"pipx",
|
||||
@@ -93,11 +109,14 @@
|
||||
"presencepenalty",
|
||||
"printcontext",
|
||||
"printsession",
|
||||
"Pulcherrima",
|
||||
"pycache",
|
||||
"pyperclip",
|
||||
"readystream",
|
||||
"restapi",
|
||||
"rmextension",
|
||||
"Sadachbia",
|
||||
"Sadaltager",
|
||||
"samber",
|
||||
"sashabaranov",
|
||||
"sdist",
|
||||
@@ -107,14 +126,18 @@
|
||||
"storer",
|
||||
"Streamlit",
|
||||
"stretchr",
|
||||
"subchunk",
|
||||
"Sulafat",
|
||||
"talkpanel",
|
||||
"Telos",
|
||||
"testpattern",
|
||||
"testuser",
|
||||
"Thacker",
|
||||
"tidwall",
|
||||
"topp",
|
||||
"ttrc",
|
||||
"unalias",
|
||||
"unconfigured",
|
||||
"unmarshalling",
|
||||
"updatepatterns",
|
||||
"videoid",
|
||||
|
||||
50
CHANGELOG.md
50
CHANGELOG.md
@@ -1,5 +1,55 @@
|
||||
# Changelog
|
||||
|
||||
## v1.4.269 (2025-07-26)
|
||||
|
||||
### PR [#1653](https://github.com/danielmiessler/Fabric/pull/1653) by [ksylvan](https://github.com/ksylvan): docs: update Gemini TTS model references to gemini-2.5-flash-preview-tts
|
||||
|
||||
- Updated Gemini TTS model references from gemini-2.0-flash-tts to gemini-2.5-flash-preview-tts throughout documentation
|
||||
- Modified documentation examples to use the new gemini-2.5-flash-preview-tts model
|
||||
- Updated voice selection example commands in Gemini-TTS.md
|
||||
- Revised CLI help text example commands to reflect model changes
|
||||
- Updated changelog database binary file
|
||||
|
||||
## v1.4.268 (2025-07-26)
|
||||
|
||||
### PR [#1652](https://github.com/danielmiessler/Fabric/pull/1652) by [ksylvan](https://github.com/ksylvan): Implement Voice Selection for Gemini Text-to-Speech
|
||||
|
||||
- Feat: add Gemini TTS voice selection and listing functionality
|
||||
- Add `--voice` flag for TTS voice selection
|
||||
- Add `--list-gemini-voices` command for voice discovery
|
||||
- Implement voice validation for Gemini TTS models
|
||||
- Update shell completions for voice options
|
||||
|
||||
## v1.4.267 (2025-07-26)
|
||||
|
||||
### PR [#1650](https://github.com/danielmiessler/Fabric/pull/1650) by [ksylvan](https://github.com/ksylvan): Update Gemini Plugin to New SDK with TTS Support
|
||||
|
||||
- Update Gemini SDK to new genai library and add TTS audio output support
|
||||
- Replace deprecated generative-ai-go with google.golang.org/genai library
|
||||
- Add TTS model detection and audio output validation
|
||||
- Implement WAV file generation for TTS audio responses
|
||||
- Add audio format checking utilities in CLI output
|
||||
|
||||
## v1.4.266 (2025-07-25)
|
||||
|
||||
### PR [#1649](https://github.com/danielmiessler/Fabric/pull/1649) by [ksylvan](https://github.com/ksylvan): Fix Conditional API Initialization to Prevent Unnecessary Error Messages
|
||||
|
||||
- Prevent unconfigured API initialization and add Docker test suite
|
||||
- Add BEDROCK_AWS_REGION requirement for Bedrock initialization
|
||||
- Implement IsConfigured check for Ollama API URL
|
||||
- Create comprehensive Docker testing environment with 6 scenarios
|
||||
- Add interactive test runner with shell access
|
||||
|
||||
## v1.4.265 (2025-07-25)
|
||||
|
||||
### PR [#1647](https://github.com/danielmiessler/Fabric/pull/1647) by [ksylvan](https://github.com/ksylvan): Simplify Workflow with Single Version Retrieval Step
|
||||
|
||||
- Replace git tag lookup with version.nix file reading for release workflow
|
||||
- Remove OS-specific git tag retrieval steps and add unified version extraction from nix file
|
||||
- Include version format validation with regex check
|
||||
- Add error handling for missing version file
|
||||
- Consolidate cross-platform version logic into single step with bash shell for consistent version parsing
|
||||
|
||||
## v1.4.264 (2025-07-22)
|
||||
|
||||
### PR [#1642](https://github.com/danielmiessler/Fabric/pull/1642) by [ksylvan](https://github.com/ksylvan): Add --sync-db to `generate_changelog`, plus many fixes
|
||||
|
||||
@@ -548,6 +548,9 @@ Application Options:
|
||||
--think-start-tag= Start tag for thinking sections (default: <think>)
|
||||
--think-end-tag= End tag for thinking sections (default: </think>)
|
||||
--disable-responses-api Disable OpenAI Responses API (default: false)
|
||||
--voice= TTS voice name for supported models (e.g., Kore, Charon, Puck)
|
||||
(default: Kore)
|
||||
--list-gemini-voices List all available Gemini TTS voices
|
||||
|
||||
Help Options:
|
||||
-h, --help Show this help message
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
package main
|
||||
|
||||
var version = "v1.4.264"
|
||||
var version = "v1.4.269"
|
||||
|
||||
Binary file not shown.
@@ -140,7 +140,7 @@ func (g *Generator) CreateNewChangelogEntry(version string) error {
|
||||
continue // Continue to attempt processing other files
|
||||
}
|
||||
content.WriteString(string(data))
|
||||
content.WriteString("\n")
|
||||
// Note: No extra newline needed here as each incoming file already ends with a newline
|
||||
}
|
||||
|
||||
if len(processingErrors) > 0 {
|
||||
|
||||
@@ -14,16 +14,19 @@ _fabric_models() {
|
||||
models=(${(f)"$(fabric --listmodels --shell-complete-list 2>/dev/null)"})
|
||||
compadd -X "Models:" ${models}
|
||||
}
|
||||
|
||||
_fabric_contexts() {
|
||||
local -a contexts
|
||||
contexts=(${(f)"$(fabric --listcontexts --shell-complete-list 2>/dev/null)"})
|
||||
compadd -X "Contexts:" ${contexts}
|
||||
}
|
||||
|
||||
_fabric_sessions() {
|
||||
local -a sessions
|
||||
sessions=(${(f)"$(fabric --listsessions --shell-complete-list 2>/dev/null)"})
|
||||
compadd -X "Sessions:" ${sessions}
|
||||
}
|
||||
|
||||
_fabric_strategies() {
|
||||
local -a strategies
|
||||
strategies=(${(f)"$(fabric --liststrategies --shell-complete-list 2>/dev/null)"})
|
||||
@@ -34,14 +37,12 @@ _fabric_extensions() {
|
||||
local -a extensions
|
||||
extensions=(${(f)"$(fabric --listextensions --shell-complete-list 2>/dev/null)"})
|
||||
compadd -X "Extensions:" ${extensions}
|
||||
'(-L --listmodels)'{-L,--listmodels}'[List all available models]:list models:_fabric_models' \
|
||||
'(-x --listcontexts)'{-x,--listcontexts}'[List all contexts]:list contexts:_fabric_contexts' \
|
||||
'(-X --listsessions)'{-X,--listsessions}'[List all sessions]:list sessions:_fabric_sessions' \
|
||||
'(--listextensions)--listextensions[List all registered extensions]' \
|
||||
'(--liststrategies)--liststrategies[List all strategies]:list strategies:_fabric_strategies' \
|
||||
'(--listvendors)--listvendors[List all vendors]' \
|
||||
vendors=(${(f)"$(fabric --listvendors 2>/dev/null)"})
|
||||
compadd -X "Vendors:" ${vendors}
|
||||
}
|
||||
|
||||
_fabric_gemini_voices() {
|
||||
local -a voices
|
||||
voices=(${(f)"$(fabric --list-gemini-voices --shell-complete-list 2>/dev/null)"})
|
||||
compadd -X "Gemini TTS Voices:" ${voices}
|
||||
}
|
||||
|
||||
_fabric() {
|
||||
@@ -109,6 +110,8 @@ _fabric() {
|
||||
'(--strategy)--strategy[Choose a strategy from the available strategies]:strategy:_fabric_strategies' \
|
||||
'(--liststrategies)--liststrategies[List all strategies]' \
|
||||
'(--listvendors)--listvendors[List all vendors]' \
|
||||
'(--voice)--voice[TTS voice name for supported models]:voice:_fabric_gemini_voices' \
|
||||
'(--list-gemini-voices)--list-gemini-voices[List all available Gemini TTS voices]' \
|
||||
'(--shell-complete-list)--shell-complete-list[Output raw list without headers/formatting (for shell completion)]' \
|
||||
'(--suppress-think)--suppress-think[Suppress text enclosed in thinking tags]' \
|
||||
'(--think-start-tag)--think-start-tag[Start tag for thinking sections (default: <think>)]:start tag:' \
|
||||
@@ -119,4 +122,3 @@ _fabric() {
|
||||
}
|
||||
|
||||
_fabric "$@"
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ _fabric() {
|
||||
_get_comp_words_by_ref -n : cur prev words cword
|
||||
|
||||
# Define all possible options/flags
|
||||
local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
|
||||
local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
|
||||
|
||||
# Helper function for dynamic completions
|
||||
_fabric_get_list() {
|
||||
@@ -62,6 +62,10 @@ _fabric() {
|
||||
COMPREPLY=($(compgen -W "$(_fabric_get_list --liststrategies)" -- "${cur}"))
|
||||
return 0
|
||||
;;
|
||||
--voice)
|
||||
COMPREPLY=($(compgen -W "$(_fabric_get_list --list-gemini-voices)" -- "${cur}"))
|
||||
return 0
|
||||
;;
|
||||
# Options requiring file/directory paths
|
||||
-a | --attachment | -o | --output | --config | --addextension | --image-file)
|
||||
_filedir
|
||||
|
||||
@@ -31,6 +31,10 @@ function __fabric_get_extensions
|
||||
fabric --listextensions --shell-complete-list 2>/dev/null
|
||||
end
|
||||
|
||||
function __fabric_get_gemini_voices
|
||||
fabric --list-gemini-voices --shell-complete-list 2>/dev/null
|
||||
end
|
||||
|
||||
# Main completion function
|
||||
complete -c fabric -f
|
||||
|
||||
@@ -71,6 +75,7 @@ complete -c fabric -l rmextension -d "Remove a registered extension by name" -a
|
||||
complete -c fabric -l strategy -d "Choose a strategy from the available strategies" -a "(__fabric_get_strategies)"
|
||||
complete -c fabric -l think-start-tag -d "Start tag for thinking sections (default: <think>)"
|
||||
complete -c fabric -l think-end-tag -d "End tag for thinking sections (default: </think>)"
|
||||
complete -c fabric -l voice -d "TTS voice name for supported models (e.g., Kore, Charon, Puck)" -a "(__fabric_get_gemini_voices)"
|
||||
|
||||
# Boolean flags (no arguments)
|
||||
complete -c fabric -s S -l setup -d "Run setup for all reconfigurable parts of fabric"
|
||||
@@ -99,6 +104,7 @@ complete -c fabric -l version -d "Print current version"
|
||||
complete -c fabric -l listextensions -d "List all registered extensions"
|
||||
complete -c fabric -l liststrategies -d "List all strategies"
|
||||
complete -c fabric -l listvendors -d "List all vendors"
|
||||
complete -c fabric -l list-gemini-voices -d "List all available Gemini TTS voices"
|
||||
complete -c fabric -l shell-complete-list -d "Output raw list without headers/formatting (for shell completion)"
|
||||
complete -c fabric -l suppress-think -d "Suppress text enclosed in thinking tags"
|
||||
complete -c fabric -l disable-responses-api -d "Disable OpenAI Responses API (default: false)"
|
||||
|
||||
155
docs/Gemini-TTS.md
Normal file
155
docs/Gemini-TTS.md
Normal file
@@ -0,0 +1,155 @@
|
||||
# Gemini Text-to-Speech (TTS) Guide
|
||||
|
||||
Fabric supports Google Gemini's text-to-speech (TTS) capabilities, allowing you to convert text into high-quality audio using various AI-generated voices.
|
||||
|
||||
## Overview
|
||||
|
||||
The Gemini TTS feature in Fabric allows you to:
|
||||
|
||||
- Convert text input into audio using Google's Gemini TTS models
|
||||
- Choose from 30+ different AI voices with varying characteristics
|
||||
- Generate high-quality WAV audio files
|
||||
- Integrate TTS generation into your existing Fabric workflows
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic TTS Generation
|
||||
|
||||
To generate audio from text using TTS:
|
||||
|
||||
```bash
|
||||
# Basic TTS with default voice (Kore)
|
||||
echo "Hello, this is a test of Gemini TTS" | fabric -m gemini-2.5-flash-preview-tts -o output.wav
|
||||
|
||||
# Using a specific voice
|
||||
echo "Hello, this is a test with the Charon voice" | fabric -m gemini-2.5-flash-preview-tts --voice Charon -o output.wav
|
||||
|
||||
# Using TTS with a pattern
|
||||
fabric -p summarize --voice Puck -m gemini-2.5-flash-preview-tts -o summary.wav < document.txt
|
||||
```
|
||||
|
||||
### Voice Selection
|
||||
|
||||
Use the `--voice` flag to specify which voice to use for TTS generation:
|
||||
|
||||
```bash
|
||||
fabric -m gemini-2.5-flash-preview-tts --voice Zephyr -o output.wav "Your text here"
|
||||
```
|
||||
|
||||
If no voice is specified, the default voice "Kore" will be used.
|
||||
|
||||
## Available Voices
|
||||
|
||||
Gemini TTS supports 30+ different voices, each with unique characteristics:
|
||||
|
||||
### Popular Voices
|
||||
|
||||
- **Kore** - Firm and confident (default)
|
||||
- **Charon** - Informative and clear
|
||||
- **Puck** - Upbeat and energetic
|
||||
- **Zephyr** - Bright and cheerful
|
||||
- **Leda** - Youthful and energetic
|
||||
- **Aoede** - Breezy and natural
|
||||
|
||||
### Complete Voice List
|
||||
|
||||
- Kore, Charon, Puck, Fenrir, Aoede, Leda, Orus, Zephyr
|
||||
- Autonoe, Callirhoe, Despina, Erinome, Gacrux, Laomedeia
|
||||
- Pulcherrima, Sulafat, Vindemiatrix, Achernar, Achird
|
||||
- Algenib, Algieba, Alnilam, Enceladus, Iapetus, Rasalgethi
|
||||
- Sadachbia, Zubenelgenubi, Vega, Capella, Lyra
|
||||
|
||||
### Listing Available Voices
|
||||
|
||||
To see all available voices with descriptions:
|
||||
|
||||
```bash
|
||||
# List all voices with characteristics
|
||||
fabric --list-gemini-voices
|
||||
|
||||
# List voice names only (for shell completion)
|
||||
fabric --list-gemini-voices --shell-complete-list
|
||||
```
|
||||
|
||||
## Rate Limits
|
||||
|
||||
Google Gemini TTS has usage quotas that vary by plan:
|
||||
|
||||
### Free Tier
|
||||
|
||||
- **15 requests per day** per project per TTS model
|
||||
- Quota resets daily
|
||||
- Applies to all TTS models (e.g., `gemini-2.5-flash-preview-tts`)
|
||||
|
||||
### Rate Limit Errors
|
||||
|
||||
If you exceed your quota, you'll see an error like:
|
||||
|
||||
```text
|
||||
Error 429: You exceeded your current quota, please check your plan and billing details
|
||||
```
|
||||
|
||||
**Solutions:**
|
||||
|
||||
- Wait for daily quota reset (typically at midnight UTC)
|
||||
- Upgrade to a paid plan for higher limits
|
||||
- Use TTS generation strategically for important content
|
||||
|
||||
For current rate limits and pricing, visit: <https://ai.google.dev/gemini-api/docs/rate-limits>
|
||||
|
||||
## Configuration
|
||||
|
||||
### Command Line Options
|
||||
|
||||
- `--voice <voice_name>` - Specify the TTS voice to use
|
||||
- `-o <filename.wav>` - Output audio file (required for TTS models)
|
||||
- `-m <tts_model>` - Specify a TTS-capable model (e.g., `gemini-2.5-flash-preview-tts`)
|
||||
|
||||
### YAML Configuration
|
||||
|
||||
You can also set a default voice in your Fabric configuration file (`~/.config/fabric/config.yaml`):
|
||||
|
||||
```yaml
|
||||
voice: "Charon" # Set your preferred default voice
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- Valid Google Gemini API key configured in Fabric
|
||||
- TTS-capable Gemini model (models containing "tts" in the name)
|
||||
- Audio output must be specified with `-o filename.wav`
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
#### Error: "TTS model requires audio output"
|
||||
|
||||
- Solution: Always specify an output file with `-o filename.wav` when using TTS models
|
||||
|
||||
#### Error: "Invalid voice 'X'"
|
||||
|
||||
- Solution: Check that the voice name is spelled correctly and matches one of the supported voices listed above
|
||||
|
||||
#### Error: "TTS generation failed"
|
||||
|
||||
- Solution: Verify your Gemini API key is valid and you have sufficient quota
|
||||
|
||||
### Getting Help
|
||||
|
||||
For additional help with TTS features:
|
||||
|
||||
```bash
|
||||
fabric --help
|
||||
```
|
||||
|
||||
## Technical Details
|
||||
|
||||
- **Audio Format**: WAV files with 24kHz sample rate, 16-bit depth, mono channel
|
||||
- **Language Support**: Automatic language detection for 24+ languages
|
||||
- **Model Requirements**: Models must contain "tts", "preview-tts", or "text-to-speech" in the name
|
||||
- **Voice Selection**: Uses Google's PrebuiltVoiceConfig system for consistent voice quality
|
||||
|
||||
---
|
||||
|
||||
For more information about Fabric, visit the [main documentation](../README.md).
|
||||
36
docs/voices/README.md
Normal file
36
docs/voices/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# Voice Samples
|
||||
|
||||
This directory contains sample audio files demonstrating different Gemini TTS voices.
|
||||
|
||||
## Sample Files
|
||||
|
||||
Each voice sample says "The quick brown fox jumped over the lazy dog" to demonstrate the voice characteristics:
|
||||
|
||||
- **Kore.wav** - Firm and confident (default voice)
|
||||
- **Charon.wav** - Informative and clear
|
||||
- **Vega.wav** - Smooth and pleasant
|
||||
- **Capella.wav** - Warm and welcoming
|
||||
- **Achird.wav** - Friendly and approachable
|
||||
- **Lyra.wav** - Melodic and expressive
|
||||
|
||||
## Generating Samples
|
||||
|
||||
To generate these samples, use the following commands:
|
||||
|
||||
```bash
|
||||
# Generate each voice sample
|
||||
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Kore -o docs/voices/Kore.wav
|
||||
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Charon -o docs/voices/Charon.wav
|
||||
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Vega -o docs/voices/Vega.wav
|
||||
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Capella -o docs/voices/Capella.wav
|
||||
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Achird -o docs/voices/Achird.wav
|
||||
echo "The quick brown fox jumped over the lazy dog" | fabric -m gemini-2.5-flash-preview-tts --voice Lyra -o docs/voices/Lyra.wav
|
||||
```
|
||||
|
||||
## Audio Format
|
||||
|
||||
- **Format**: WAV (uncompressed)
|
||||
- **Sample Rate**: 24kHz
|
||||
- **Bit Depth**: 16-bit
|
||||
- **Channels**: Mono
|
||||
- **Approximate Size**: ~500KB per sample
|
||||
11
go.mod
11
go.mod
@@ -15,7 +15,6 @@ require (
|
||||
github.com/gin-gonic/gin v1.10.1
|
||||
github.com/go-git/go-git/v5 v5.16.2
|
||||
github.com/go-shiori/go-readability v0.0.0-20250217085726-9f5bf5ca7612
|
||||
github.com/google/generative-ai-go v0.20.1
|
||||
github.com/google/go-github/v66 v66.0.0
|
||||
github.com/hasura/go-graphql-client v0.14.4
|
||||
github.com/jessevdk/go-flags v1.6.1
|
||||
@@ -35,13 +34,16 @@ require (
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/google/go-cmp v0.7.0 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
cloud.google.com/go v0.121.2 // indirect
|
||||
cloud.google.com/go/ai v0.12.1 // indirect
|
||||
cloud.google.com/go/auth v0.16.2 // indirect
|
||||
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
|
||||
cloud.google.com/go/compute/metadata v0.7.0 // indirect
|
||||
cloud.google.com/go/longrunning v0.6.7 // indirect
|
||||
dario.cat/mergo v1.0.2 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/ProtonMail/go-crypto v1.3.0 // indirect
|
||||
@@ -109,7 +111,6 @@ require (
|
||||
github.com/ugorji/go/codec v1.2.14 // indirect
|
||||
github.com/xanzy/ssh-agent v0.3.3 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
|
||||
go.opentelemetry.io/otel v1.36.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.36.0 // indirect
|
||||
@@ -120,7 +121,7 @@ require (
|
||||
golang.org/x/net v0.41.0 // indirect
|
||||
golang.org/x/sync v0.16.0 // indirect
|
||||
golang.org/x/sys v0.34.0 // indirect
|
||||
golang.org/x/time v0.12.0 // indirect
|
||||
google.golang.org/genai v1.17.0
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect
|
||||
google.golang.org/grpc v1.73.0 // indirect
|
||||
|
||||
14
go.sum
14
go.sum
@@ -1,15 +1,11 @@
|
||||
cloud.google.com/go v0.121.2 h1:v2qQpN6Dx9x2NmwrqlesOt3Ys4ol5/lFZ6Mg1B7OJCg=
|
||||
cloud.google.com/go v0.121.2/go.mod h1:nRFlrHq39MNVWu+zESP2PosMWA0ryJw8KUBZ2iZpxbw=
|
||||
cloud.google.com/go/ai v0.12.1 h1:m1n/VjUuHS+pEO/2R4/VbuuEIkgk0w67fDQvFaMngM0=
|
||||
cloud.google.com/go/ai v0.12.1/go.mod h1:5vIPNe1ZQsVZqCliXIPL4QnhObQQY4d9hAGHdVc4iw4=
|
||||
cloud.google.com/go/auth v0.16.2 h1:QvBAGFPLrDeoiNjyfVunhQ10HKNYuOwZ5noee0M5df4=
|
||||
cloud.google.com/go/auth v0.16.2/go.mod h1:sRBas2Y1fB1vZTdurouM0AzuYQBMZinrUYL8EufhtEA=
|
||||
cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc=
|
||||
cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c=
|
||||
cloud.google.com/go/compute/metadata v0.7.0 h1:PBWF+iiAerVNe8UCHxdOt6eHLVc3ydFeOCw78U8ytSU=
|
||||
cloud.google.com/go/compute/metadata v0.7.0/go.mod h1:j5MvL9PprKL39t166CoB1uVHfQMs4tFQZZcKwksXUjo=
|
||||
cloud.google.com/go/longrunning v0.6.7 h1:IGtfDWHhQCgCjwQjV9iiLnUta9LBCo8R9QmAFsS/PrE=
|
||||
cloud.google.com/go/longrunning v0.6.7/go.mod h1:EAFV3IZAKmM56TyiE6VAP3VoTzhZzySwI/YI1s/nRsY=
|
||||
dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8=
|
||||
dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA=
|
||||
github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY=
|
||||
@@ -126,8 +122,6 @@ github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8J
|
||||
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/google/generative-ai-go v0.20.1 h1:6dEIujpgN2V0PgLhr6c/M1ynRdc7ARtiIDPFzj45uNQ=
|
||||
github.com/google/generative-ai-go v0.20.1/go.mod h1:TjOnZJmZKzarWbjUJgy+r3Ee7HGBRVLhOIgupnwR4Bg=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
@@ -145,6 +139,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.6 h1:GW/XbdyBFQ8Qe+YAmFU
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.6/go.mod h1:MkHOF77EYAE7qfSuSS9PU6g4Nt4e11cnsDUowfwewLA=
|
||||
github.com/googleapis/gax-go/v2 v2.14.2 h1:eBLnkZ9635krYIPD+ag1USrOAI0Nr0QYF3+/3GqO0k0=
|
||||
github.com/googleapis/gax-go/v2 v2.14.2/go.mod h1:ON64QhlJkhVtSqp4v1uaK92VyZ2gmvDQsweuyLV+8+w=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/hasura/go-graphql-client v0.14.4 h1:bYU7/+V50T2YBGdNQXt6l4f2cMZPECPUd8cyCR+ixtw=
|
||||
github.com/hasura/go-graphql-client v0.14.4/go.mod h1:jfSZtBER3or+88Q9vFhWHiFMPppfYILRyl+0zsgPIIw=
|
||||
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||
@@ -249,8 +245,6 @@ github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 h1:q4XOmH/0opmeuJtPsbFNivyl7bCt7yRBbeEm2sC/XtQ=
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0/go.mod h1:snMWehoOh2wsEwnvvwtDyFCxVeDAODenXHtn5vzrKjo=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
|
||||
go.opentelemetry.io/otel v1.36.0 h1:UumtzIklRBY6cI/lllNZlALOF5nNIzJVb16APdvgTXg=
|
||||
@@ -345,8 +339,6 @@ golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
|
||||
golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
|
||||
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
|
||||
golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
@@ -357,6 +349,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/api v0.236.0 h1:CAiEiDVtO4D/Qja2IA9VzlFrgPnK3XVMmRoJZlSWbc0=
|
||||
google.golang.org/api v0.236.0/go.mod h1:X1WF9CU2oTc+Jml1tiIxGmWFK/UZezdqEu09gcxZAj4=
|
||||
google.golang.org/genai v1.17.0 h1:lXYSnWShPYjxTouxRj0zF8RsNmSF+SKo7SQ7dM35NlI=
|
||||
google.golang.org/genai v1.17.0/go.mod h1:QPj5NGJw+3wEOHg+PrsWwJKvG6UC84ex5FR7qAYsN/M=
|
||||
google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 h1:1tXaIXCracvtsRxSBsYDiSBN0cuJvM7QYW+MrpIRY78=
|
||||
google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2/go.mod h1:49MsLSx0oWMOZqcpB3uL8ZOkAh1+TndpJ8ONoCBWiZk=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 h1:oWVWY3NzT7KJppx2UKhKmzPq4SRe0LdCijVRwvGeikY=
|
||||
|
||||
@@ -3,6 +3,7 @@ package cli
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/core"
|
||||
@@ -35,6 +36,40 @@ func handleChatProcessing(currentFlags *Flags, registry *core.PluginRegistry, me
|
||||
if chatOptions, err = currentFlags.BuildChatOptions(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if user is requesting audio output or using a TTS model
|
||||
isAudioOutput := currentFlags.Output != "" && IsAudioFormat(currentFlags.Output)
|
||||
isTTSModel := isTTSModel(currentFlags.Model)
|
||||
|
||||
if isTTSModel && !isAudioOutput {
|
||||
err = fmt.Errorf("TTS model '%s' requires audio output. Please specify an audio output file with -o flag (e.g., -o output.wav)", currentFlags.Model)
|
||||
return
|
||||
}
|
||||
|
||||
if isAudioOutput && !isTTSModel {
|
||||
err = fmt.Errorf("audio output file '%s' specified but model '%s' is not a TTS model. Please use a TTS model like gemini-2.5-flash-preview-tts", currentFlags.Output, currentFlags.Model)
|
||||
return
|
||||
}
|
||||
|
||||
// For TTS models, check if output file already exists BEFORE processing
|
||||
if isTTSModel && isAudioOutput {
|
||||
outputFile := currentFlags.Output
|
||||
// Add .wav extension if not provided
|
||||
if filepath.Ext(outputFile) == "" {
|
||||
outputFile += ".wav"
|
||||
}
|
||||
if _, err = os.Stat(outputFile); err == nil {
|
||||
err = fmt.Errorf("file %s already exists. Please choose a different filename or remove the existing file", outputFile)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Set audio options in chat config
|
||||
chatOptions.AudioOutput = isAudioOutput
|
||||
if isAudioOutput {
|
||||
chatOptions.AudioFormat = "wav" // Default to WAV format
|
||||
}
|
||||
|
||||
if session, err = chatter.Send(chatReq, chatOptions); err != nil {
|
||||
return
|
||||
}
|
||||
@@ -42,8 +77,13 @@ func handleChatProcessing(currentFlags *Flags, registry *core.PluginRegistry, me
|
||||
result := session.GetLastMessage().Content
|
||||
|
||||
if !currentFlags.Stream || currentFlags.SuppressThink {
|
||||
// print the result if it was not streamed already or suppress-think disabled streaming output
|
||||
fmt.Println(result)
|
||||
// For TTS models with audio output, show a user-friendly message instead of raw data
|
||||
if isTTSModel && isAudioOutput && strings.HasPrefix(result, "FABRIC_AUDIO_DATA:") {
|
||||
fmt.Printf("TTS audio generated successfully and saved to: %s\n", currentFlags.Output)
|
||||
} else {
|
||||
// print the result if it was not streamed already or suppress-think disabled streaming output
|
||||
fmt.Println(result)
|
||||
}
|
||||
}
|
||||
|
||||
// if the copy flag is set, copy the message to the clipboard
|
||||
@@ -59,8 +99,29 @@ func handleChatProcessing(currentFlags *Flags, registry *core.PluginRegistry, me
|
||||
sessionAsString := session.String()
|
||||
err = CreateOutputFile(sessionAsString, currentFlags.Output)
|
||||
} else {
|
||||
err = CreateOutputFile(result, currentFlags.Output)
|
||||
// For TTS models, we need to handle audio output differently
|
||||
if isTTSModel && isAudioOutput {
|
||||
// Check if result contains actual audio data
|
||||
if strings.HasPrefix(result, "FABRIC_AUDIO_DATA:") {
|
||||
// Extract the binary audio data
|
||||
audioData := result[len("FABRIC_AUDIO_DATA:"):]
|
||||
err = CreateAudioOutputFile([]byte(audioData), currentFlags.Output)
|
||||
} else {
|
||||
// Fallback for any error messages or unexpected responses
|
||||
err = CreateOutputFile(result, currentFlags.Output)
|
||||
}
|
||||
} else {
|
||||
err = CreateOutputFile(result, currentFlags.Output)
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// isTTSModel checks if the model is a text-to-speech model
|
||||
func isTTSModel(modelName string) bool {
|
||||
lowerModel := strings.ToLower(modelName)
|
||||
return strings.Contains(lowerModel, "tts") ||
|
||||
strings.Contains(lowerModel, "preview-tts") ||
|
||||
strings.Contains(lowerModel, "text-to-speech")
|
||||
}
|
||||
|
||||
@@ -87,6 +87,8 @@ type Flags struct {
|
||||
ThinkStartTag string `long:"think-start-tag" yaml:"thinkStartTag" description:"Start tag for thinking sections" default:"<think>"`
|
||||
ThinkEndTag string `long:"think-end-tag" yaml:"thinkEndTag" description:"End tag for thinking sections" default:"</think>"`
|
||||
DisableResponsesAPI bool `long:"disable-responses-api" yaml:"disableResponsesAPI" description:"Disable OpenAI Responses API (default: false)"`
|
||||
Voice string `long:"voice" yaml:"voice" description:"TTS voice name for supported models (e.g., Kore, Charon, Puck)" default:"Kore"`
|
||||
ListGeminiVoices bool `long:"list-gemini-voices" description:"List all available Gemini TTS voices"`
|
||||
}
|
||||
|
||||
var debug = false
|
||||
@@ -441,6 +443,7 @@ func (o *Flags) BuildChatOptions() (ret *domain.ChatOptions, err error) {
|
||||
SuppressThink: o.SuppressThink,
|
||||
ThinkStartTag: startTag,
|
||||
ThinkEndTag: endTag,
|
||||
Voice: o.Voice,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/core"
|
||||
"github.com/danielmiessler/fabric/internal/plugins/ai"
|
||||
"github.com/danielmiessler/fabric/internal/plugins/ai/gemini"
|
||||
"github.com/danielmiessler/fabric/internal/plugins/db/fsdb"
|
||||
)
|
||||
|
||||
@@ -58,5 +60,11 @@ func handleListingCommands(currentFlags *Flags, fabricDb *fsdb.Db, registry *cor
|
||||
return true, err
|
||||
}
|
||||
|
||||
if currentFlags.ListGeminiVoices {
|
||||
voicesList := gemini.ListGeminiVoices(currentFlags.ShellCompleteOutput)
|
||||
fmt.Print(voicesList)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ package cli
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/atotto/clipboard"
|
||||
)
|
||||
@@ -28,3 +30,37 @@ func CreateOutputFile(message string, fileName string) (err error) {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// CreateAudioOutputFile creates a binary file for audio data
|
||||
func CreateAudioOutputFile(audioData []byte, fileName string) (err error) {
|
||||
// If no extension is provided, default to .wav
|
||||
if filepath.Ext(fileName) == "" {
|
||||
fileName += ".wav"
|
||||
}
|
||||
|
||||
// File existence check is now done in the CLI layer before TTS generation
|
||||
var file *os.File
|
||||
if file, err = os.Create(fileName); err != nil {
|
||||
err = fmt.Errorf("error creating audio file: %v", err)
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
if _, err = file.Write(audioData); err != nil {
|
||||
err = fmt.Errorf("error writing audio data to file: %v", err)
|
||||
}
|
||||
// No redundant output message here - the CLI layer handles success messaging
|
||||
return
|
||||
}
|
||||
|
||||
// IsAudioFormat checks if the filename suggests an audio format
|
||||
func IsAudioFormat(fileName string) bool {
|
||||
ext := strings.ToLower(filepath.Ext(fileName))
|
||||
audioExts := []string{".wav", ".mp3", ".m4a", ".aac", ".ogg", ".flac"}
|
||||
for _, audioExt := range audioExts {
|
||||
if ext == audioExt {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -37,12 +37,16 @@ import (
|
||||
"github.com/danielmiessler/fabric/internal/util"
|
||||
)
|
||||
|
||||
// hasAWSCredentials checks if any AWS credentials are present either in the
|
||||
// environment variables or in the default/shared credentials file. It doesn't
|
||||
// attempt to verify the validity of the credentials, but simply ensures that a
|
||||
// potential authentication source exists so we can safely initialize the
|
||||
// Bedrock client without causing the AWS SDK to search for credentials.
|
||||
// hasAWSCredentials checks if Bedrock is properly configured by ensuring both
|
||||
// AWS credentials and BEDROCK_AWS_REGION are present. This prevents the Bedrock
|
||||
// client from being initialized when AWS credentials exist for other purposes.
|
||||
func hasAWSCredentials() bool {
|
||||
// First check if BEDROCK_AWS_REGION is set - this is required for Bedrock
|
||||
if os.Getenv("BEDROCK_AWS_REGION") == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
// Then check if AWS credentials are available
|
||||
if os.Getenv("AWS_PROFILE") != "" ||
|
||||
os.Getenv("AWS_ROLE_SESSION_NAME") != "" ||
|
||||
(os.Getenv("AWS_ACCESS_KEY_ID") != "" && os.Getenv("AWS_SECRET_ACCESS_KEY") != "") {
|
||||
|
||||
@@ -36,6 +36,9 @@ type ChatOptions struct {
|
||||
SuppressThink bool
|
||||
ThinkStartTag string
|
||||
ThinkEndTag string
|
||||
AudioOutput bool
|
||||
AudioFormat string
|
||||
Voice string
|
||||
}
|
||||
|
||||
// NormalizeMessages remove empty messages and ensure messages order user-assist-user
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
@@ -10,12 +11,20 @@ import (
|
||||
"github.com/danielmiessler/fabric/internal/plugins"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/domain"
|
||||
"github.com/google/generative-ai-go/genai"
|
||||
"google.golang.org/api/iterator"
|
||||
"google.golang.org/api/option"
|
||||
"google.golang.org/genai"
|
||||
)
|
||||
|
||||
const modelsNamePrefix = "models/"
|
||||
// WAV audio constants
|
||||
const (
|
||||
DefaultChannels = 1
|
||||
DefaultSampleRate = 24000
|
||||
DefaultBitsPerSample = 16
|
||||
WAVHeaderSize = 44
|
||||
RIFFHeaderSize = 36
|
||||
MaxAudioDataSize = 100 * 1024 * 1024 // 100MB limit for security
|
||||
MinAudioDataSize = 44 // Minimum viable audio data
|
||||
AudioDataPrefix = "FABRIC_AUDIO_DATA:"
|
||||
)
|
||||
|
||||
func NewClient() (ret *Client) {
|
||||
vendorName := "Gemini"
|
||||
@@ -39,107 +48,104 @@ type Client struct {
|
||||
func (o *Client) ListModels() (ret []string, err error) {
|
||||
ctx := context.Background()
|
||||
var client *genai.Client
|
||||
if client, err = genai.NewClient(ctx, option.WithAPIKey(o.ApiKey.Value)); err != nil {
|
||||
if client, err = genai.NewClient(ctx, &genai.ClientConfig{
|
||||
APIKey: o.ApiKey.Value,
|
||||
Backend: genai.BackendGeminiAPI,
|
||||
}); err != nil {
|
||||
return
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
iter := client.ListModels(ctx)
|
||||
for {
|
||||
var resp *genai.ModelInfo
|
||||
if resp, err = iter.Next(); err != nil {
|
||||
if errors.Is(err, iterator.Done) {
|
||||
err = nil
|
||||
}
|
||||
break
|
||||
}
|
||||
// List available models using the correct API
|
||||
resp, err := client.Models.List(ctx, &genai.ListModelsConfig{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
name := o.buildModelNameSimple(resp.Name)
|
||||
ret = append(ret, name)
|
||||
for _, model := range resp.Items {
|
||||
// Strip the "models/" prefix for user convenience
|
||||
modelName := strings.TrimPrefix(model.Name, "models/")
|
||||
ret = append(ret, modelName)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (o *Client) Send(ctx context.Context, msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions) (ret string, err error) {
|
||||
systemInstruction, messages := toMessages(msgs)
|
||||
// Check if this is a TTS model request
|
||||
if o.isTTSModel(opts.Model) {
|
||||
if !opts.AudioOutput {
|
||||
err = fmt.Errorf("TTS model '%s' requires audio output. Please specify an audio output file with -o flag ending in .wav", opts.Model)
|
||||
return
|
||||
}
|
||||
|
||||
// Handle TTS generation
|
||||
return o.generateTTSAudio(ctx, msgs, opts)
|
||||
}
|
||||
|
||||
// Regular text generation
|
||||
var client *genai.Client
|
||||
if client, err = genai.NewClient(ctx, option.WithAPIKey(o.ApiKey.Value)); err != nil {
|
||||
return
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
model := client.GenerativeModel(o.buildModelNameFull(opts.Model))
|
||||
model.SetTemperature(float32(opts.Temperature))
|
||||
model.SetTopP(float32(opts.TopP))
|
||||
model.SystemInstruction = systemInstruction
|
||||
|
||||
var response *genai.GenerateContentResponse
|
||||
if response, err = model.GenerateContent(ctx, messages...); err != nil {
|
||||
if client, err = genai.NewClient(ctx, &genai.ClientConfig{
|
||||
APIKey: o.ApiKey.Value,
|
||||
Backend: genai.BackendGeminiAPI,
|
||||
}); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
ret = o.extractText(response)
|
||||
// Convert messages to new SDK format
|
||||
contents := o.convertMessages(msgs)
|
||||
|
||||
// Generate content
|
||||
temperature := float32(opts.Temperature)
|
||||
topP := float32(opts.TopP)
|
||||
response, err := client.Models.GenerateContent(ctx, o.buildModelNameFull(opts.Model), contents, &genai.GenerateContentConfig{
|
||||
Temperature: &temperature,
|
||||
TopP: &topP,
|
||||
MaxOutputTokens: int32(opts.ModelContextLength),
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Extract text from response
|
||||
ret = o.extractTextFromResponse(response)
|
||||
return
|
||||
}
|
||||
|
||||
func (o *Client) buildModelNameSimple(fullModelName string) string {
|
||||
return strings.TrimPrefix(fullModelName, modelsNamePrefix)
|
||||
}
|
||||
|
||||
func (o *Client) buildModelNameFull(modelName string) string {
|
||||
return fmt.Sprintf("%v%v", modelsNamePrefix, modelName)
|
||||
}
|
||||
|
||||
func (o *Client) SendStream(msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions, channel chan string) (err error) {
|
||||
ctx := context.Background()
|
||||
var client *genai.Client
|
||||
if client, err = genai.NewClient(ctx, option.WithAPIKey(o.ApiKey.Value)); err != nil {
|
||||
if client, err = genai.NewClient(ctx, &genai.ClientConfig{
|
||||
APIKey: o.ApiKey.Value,
|
||||
Backend: genai.BackendGeminiAPI,
|
||||
}); err != nil {
|
||||
return
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
systemInstruction, messages := toMessages(msgs)
|
||||
// Convert messages to new SDK format
|
||||
contents := o.convertMessages(msgs)
|
||||
|
||||
model := client.GenerativeModel(o.buildModelNameFull(opts.Model))
|
||||
model.SetTemperature(float32(opts.Temperature))
|
||||
model.SetTopP(float32(opts.TopP))
|
||||
model.SystemInstruction = systemInstruction
|
||||
// Generate streaming content
|
||||
temperature := float32(opts.Temperature)
|
||||
topP := float32(opts.TopP)
|
||||
stream := client.Models.GenerateContentStream(ctx, o.buildModelNameFull(opts.Model), contents, &genai.GenerateContentConfig{
|
||||
Temperature: &temperature,
|
||||
TopP: &topP,
|
||||
MaxOutputTokens: int32(opts.ModelContextLength),
|
||||
})
|
||||
|
||||
iter := model.GenerateContentStream(ctx, messages...)
|
||||
for {
|
||||
if resp, iterErr := iter.Next(); iterErr == nil {
|
||||
for _, candidate := range resp.Candidates {
|
||||
if candidate.Content != nil {
|
||||
for _, part := range candidate.Content.Parts {
|
||||
if text, ok := part.(genai.Text); ok {
|
||||
channel <- string(text)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if !errors.Is(iterErr, iterator.Done) {
|
||||
channel <- fmt.Sprintf("%v\n", iterErr)
|
||||
}
|
||||
for response, err := range stream {
|
||||
if err != nil {
|
||||
channel <- fmt.Sprintf("Error: %v\n", err)
|
||||
close(channel)
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (o *Client) extractText(response *genai.GenerateContentResponse) (ret string) {
|
||||
for _, candidate := range response.Candidates {
|
||||
if candidate.Content == nil {
|
||||
break
|
||||
}
|
||||
for _, part := range candidate.Content.Parts {
|
||||
if text, ok := part.(genai.Text); ok {
|
||||
ret += string(text)
|
||||
}
|
||||
text := o.extractTextFromResponse(response)
|
||||
if text != "" {
|
||||
channel <- text
|
||||
}
|
||||
}
|
||||
close(channel)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -147,18 +153,223 @@ func (o *Client) NeedsRawMode(modelName string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func toMessages(msgs []*chat.ChatCompletionMessage) (systemInstruction *genai.Content, messages []genai.Part) {
|
||||
if len(msgs) >= 2 {
|
||||
systemInstruction = &genai.Content{
|
||||
Parts: []genai.Part{
|
||||
genai.Text(msgs[0].Content),
|
||||
},
|
||||
}
|
||||
for _, msg := range msgs[1:] {
|
||||
messages = append(messages, genai.Text(msg.Content))
|
||||
}
|
||||
} else {
|
||||
messages = append(messages, genai.Text(msgs[0].Content))
|
||||
// buildModelNameFull adds the "models/" prefix for API calls
|
||||
func (o *Client) buildModelNameFull(modelName string) string {
|
||||
if strings.HasPrefix(modelName, "models/") {
|
||||
return modelName
|
||||
}
|
||||
return
|
||||
return "models/" + modelName
|
||||
}
|
||||
|
||||
// isTTSModel checks if the model is a text-to-speech model
|
||||
func (o *Client) isTTSModel(modelName string) bool {
|
||||
lowerModel := strings.ToLower(modelName)
|
||||
return strings.Contains(lowerModel, "tts") ||
|
||||
strings.Contains(lowerModel, "preview-tts") ||
|
||||
strings.Contains(lowerModel, "text-to-speech")
|
||||
}
|
||||
|
||||
// extractTextForTTS extracts text content from chat messages for TTS generation
|
||||
func (o *Client) extractTextForTTS(msgs []*chat.ChatCompletionMessage) (string, error) {
|
||||
for i := len(msgs) - 1; i >= 0; i-- {
|
||||
if msgs[i].Role == chat.ChatMessageRoleUser && msgs[i].Content != "" {
|
||||
return msgs[i].Content, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("no text content found for TTS generation")
|
||||
}
|
||||
|
||||
// createGenaiClient creates a new GenAI client for TTS operations
|
||||
func (o *Client) createGenaiClient(ctx context.Context) (*genai.Client, error) {
|
||||
return genai.NewClient(ctx, &genai.ClientConfig{
|
||||
APIKey: o.ApiKey.Value,
|
||||
Backend: genai.BackendGeminiAPI,
|
||||
})
|
||||
}
|
||||
|
||||
// generateTTSAudio handles TTS audio generation using the new SDK
|
||||
func (o *Client) generateTTSAudio(ctx context.Context, msgs []*chat.ChatCompletionMessage, opts *domain.ChatOptions) (ret string, err error) {
|
||||
textToSpeak, err := o.extractTextForTTS(msgs)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Validate voice name before making API call
|
||||
if opts.Voice != "" && !IsValidGeminiVoice(opts.Voice) {
|
||||
validVoices := GetGeminiVoiceNames()
|
||||
return "", fmt.Errorf("invalid voice '%s'. Valid voices are: %v", opts.Voice, validVoices)
|
||||
}
|
||||
|
||||
client, err := o.createGenaiClient(ctx)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return o.performTTSGeneration(ctx, client, textToSpeak, opts)
|
||||
}
|
||||
|
||||
// performTTSGeneration performs the actual TTS generation and audio processing
|
||||
func (o *Client) performTTSGeneration(ctx context.Context, client *genai.Client, textToSpeak string, opts *domain.ChatOptions) (string, error) {
|
||||
|
||||
// Create content for TTS
|
||||
contents := []*genai.Content{{
|
||||
Parts: []*genai.Part{{Text: textToSpeak}},
|
||||
}}
|
||||
|
||||
// Configure for TTS generation
|
||||
voiceName := opts.Voice
|
||||
if voiceName == "" {
|
||||
voiceName = "Kore" // Default voice if none specified
|
||||
}
|
||||
|
||||
config := &genai.GenerateContentConfig{
|
||||
ResponseModalities: []string{"AUDIO"},
|
||||
SpeechConfig: &genai.SpeechConfig{
|
||||
VoiceConfig: &genai.VoiceConfig{
|
||||
PrebuiltVoiceConfig: &genai.PrebuiltVoiceConfig{
|
||||
VoiceName: voiceName,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// Generate TTS content
|
||||
response, err := client.Models.GenerateContent(ctx, o.buildModelNameFull(opts.Model), contents, config)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("TTS generation failed: %w", err)
|
||||
}
|
||||
|
||||
// Extract and process audio data
|
||||
if len(response.Candidates) > 0 && response.Candidates[0].Content != nil && len(response.Candidates[0].Content.Parts) > 0 {
|
||||
part := response.Candidates[0].Content.Parts[0]
|
||||
if part.InlineData != nil && len(part.InlineData.Data) > 0 {
|
||||
// Validate audio data format and size
|
||||
if part.InlineData.MIMEType != "" && !strings.HasPrefix(part.InlineData.MIMEType, "audio/") {
|
||||
return "", fmt.Errorf("unexpected data type: %s, expected audio data", part.InlineData.MIMEType)
|
||||
}
|
||||
|
||||
pcmData := part.InlineData.Data
|
||||
if len(pcmData) < MinAudioDataSize {
|
||||
return "", fmt.Errorf("audio data too small: %d bytes, minimum required: %d", len(pcmData), MinAudioDataSize)
|
||||
}
|
||||
|
||||
// Generate WAV file with proper headers and return the binary data
|
||||
wavData, err := o.generateWAVFile(pcmData)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to generate WAV file: %w", err)
|
||||
}
|
||||
|
||||
// Validate generated WAV data
|
||||
if len(wavData) < WAVHeaderSize {
|
||||
return "", fmt.Errorf("generated WAV data is invalid: %d bytes, minimum required: %d", len(wavData), WAVHeaderSize)
|
||||
}
|
||||
|
||||
// Store the binary audio data in a special format that the CLI can detect
|
||||
// Use more efficient string concatenation
|
||||
return AudioDataPrefix + string(wavData), nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("no audio data received from TTS model")
|
||||
}
|
||||
|
||||
// generateWAVFile creates WAV data from PCM data with proper headers
|
||||
func (o *Client) generateWAVFile(pcmData []byte) ([]byte, error) {
|
||||
// Validate input size to prevent potential security issues
|
||||
if len(pcmData) == 0 {
|
||||
return nil, fmt.Errorf("empty PCM data provided")
|
||||
}
|
||||
if len(pcmData) > MaxAudioDataSize {
|
||||
return nil, fmt.Errorf("PCM data too large: %d bytes, maximum allowed: %d", len(pcmData), MaxAudioDataSize)
|
||||
}
|
||||
|
||||
// WAV file parameters (Gemini TTS default specs)
|
||||
channels := DefaultChannels
|
||||
sampleRate := DefaultSampleRate
|
||||
bitsPerSample := DefaultBitsPerSample
|
||||
|
||||
// Calculate required values
|
||||
byteRate := sampleRate * channels * bitsPerSample / 8
|
||||
blockAlign := channels * bitsPerSample / 8
|
||||
dataLen := uint32(len(pcmData))
|
||||
riffSize := RIFFHeaderSize + dataLen
|
||||
|
||||
// Pre-allocate buffer with known size for better performance
|
||||
totalSize := int(riffSize + 8) // +8 for RIFF header
|
||||
buf := bytes.NewBuffer(make([]byte, 0, totalSize))
|
||||
|
||||
// RIFF header
|
||||
buf.WriteString("RIFF")
|
||||
binary.Write(buf, binary.LittleEndian, riffSize)
|
||||
buf.WriteString("WAVE")
|
||||
|
||||
// fmt chunk
|
||||
buf.WriteString("fmt ")
|
||||
binary.Write(buf, binary.LittleEndian, uint32(16)) // subchunk1Size
|
||||
binary.Write(buf, binary.LittleEndian, uint16(1)) // audioFormat = PCM
|
||||
binary.Write(buf, binary.LittleEndian, uint16(channels)) // numChannels
|
||||
binary.Write(buf, binary.LittleEndian, uint32(sampleRate)) // sampleRate
|
||||
binary.Write(buf, binary.LittleEndian, uint32(byteRate)) // byteRate
|
||||
binary.Write(buf, binary.LittleEndian, uint16(blockAlign)) // blockAlign
|
||||
binary.Write(buf, binary.LittleEndian, uint16(bitsPerSample)) // bitsPerSample
|
||||
|
||||
// data chunk
|
||||
buf.WriteString("data")
|
||||
binary.Write(buf, binary.LittleEndian, dataLen)
|
||||
|
||||
// Write PCM data to buffer
|
||||
buf.Write(pcmData)
|
||||
|
||||
// Validate generated WAV data
|
||||
result := buf.Bytes()
|
||||
if len(result) < WAVHeaderSize {
|
||||
return nil, fmt.Errorf("generated WAV data is invalid: %d bytes, minimum required: %d", len(result), WAVHeaderSize)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// convertMessages converts fabric chat messages to genai Content format
|
||||
func (o *Client) convertMessages(msgs []*chat.ChatCompletionMessage) []*genai.Content {
|
||||
var contents []*genai.Content
|
||||
|
||||
for _, msg := range msgs {
|
||||
content := &genai.Content{Parts: []*genai.Part{}}
|
||||
|
||||
if msg.Content != "" {
|
||||
content.Parts = append(content.Parts, &genai.Part{Text: msg.Content})
|
||||
}
|
||||
|
||||
// Handle multi-content messages (images, etc.)
|
||||
for _, part := range msg.MultiContent {
|
||||
switch part.Type {
|
||||
case chat.ChatMessagePartTypeText:
|
||||
content.Parts = append(content.Parts, &genai.Part{Text: part.Text})
|
||||
case chat.ChatMessagePartTypeImageURL:
|
||||
// TODO: Handle image URLs if needed
|
||||
// This would require downloading and converting to inline data
|
||||
}
|
||||
}
|
||||
|
||||
contents = append(contents, content)
|
||||
}
|
||||
|
||||
return contents
|
||||
}
|
||||
|
||||
// extractTextFromResponse extracts text content from the response
|
||||
func (o *Client) extractTextFromResponse(response *genai.GenerateContentResponse) string {
|
||||
var result strings.Builder
|
||||
|
||||
for _, candidate := range response.Candidates {
|
||||
if candidate.Content != nil {
|
||||
for _, part := range candidate.Content.Parts {
|
||||
if part.Text != "" {
|
||||
result.WriteString(part.Text)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result.String()
|
||||
}
|
||||
|
||||
@@ -3,32 +3,40 @@ package gemini
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/generative-ai-go/genai"
|
||||
"google.golang.org/genai"
|
||||
)
|
||||
|
||||
// Test generated using Keploy
|
||||
func TestBuildModelNameSimple(t *testing.T) {
|
||||
// Test buildModelNameFull method
|
||||
func TestBuildModelNameFull(t *testing.T) {
|
||||
client := &Client{}
|
||||
fullModelName := "models/chat-bison-001"
|
||||
expected := "chat-bison-001"
|
||||
|
||||
result := client.buildModelNameSimple(fullModelName)
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"chat-bison-001", "models/chat-bison-001"},
|
||||
{"models/chat-bison-001", "models/chat-bison-001"},
|
||||
{"gemini-2.5-flash-preview-tts", "models/gemini-2.5-flash-preview-tts"},
|
||||
}
|
||||
|
||||
if result != expected {
|
||||
t.Errorf("Expected %v, got %v", expected, result)
|
||||
for _, test := range tests {
|
||||
result := client.buildModelNameFull(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("For input %v, expected %v, got %v", test.input, test.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test generated using Keploy
|
||||
func TestExtractText(t *testing.T) {
|
||||
// Test extractTextFromResponse method
|
||||
func TestExtractTextFromResponse(t *testing.T) {
|
||||
client := &Client{}
|
||||
response := &genai.GenerateContentResponse{
|
||||
Candidates: []*genai.Candidate{
|
||||
{
|
||||
Content: &genai.Content{
|
||||
Parts: []genai.Part{
|
||||
genai.Text("Hello, "),
|
||||
genai.Text("world!"),
|
||||
Parts: []*genai.Part{
|
||||
{Text: "Hello, "},
|
||||
{Text: "world!"},
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -36,9 +44,56 @@ func TestExtractText(t *testing.T) {
|
||||
}
|
||||
expected := "Hello, world!"
|
||||
|
||||
result := client.extractText(response)
|
||||
result := client.extractTextFromResponse(response)
|
||||
|
||||
if result != expected {
|
||||
t.Errorf("Expected %v, got %v", expected, result)
|
||||
}
|
||||
}
|
||||
|
||||
// Test isTTSModel method
|
||||
func TestIsTTSModel(t *testing.T) {
|
||||
client := &Client{}
|
||||
|
||||
tests := []struct {
|
||||
modelName string
|
||||
expected bool
|
||||
}{
|
||||
{"gemini-2.5-flash-preview-tts", true},
|
||||
{"text-to-speech-model", true},
|
||||
{"TTS-MODEL", true},
|
||||
{"gemini-pro", false},
|
||||
{"chat-bison", false},
|
||||
{"", false},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
result := client.isTTSModel(test.modelName)
|
||||
if result != test.expected {
|
||||
t.Errorf("For model %v, expected %v, got %v", test.modelName, test.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test generateWAVFile method (basic test)
|
||||
func TestGenerateWAVFile(t *testing.T) {
|
||||
client := &Client{}
|
||||
|
||||
// Test with minimal PCM data
|
||||
pcmData := []byte{0x00, 0x01, 0x02, 0x03}
|
||||
|
||||
result, err := client.generateWAVFile(pcmData)
|
||||
if err != nil {
|
||||
t.Errorf("generateWAVFile failed: %v", err)
|
||||
}
|
||||
|
||||
// Check that we got some data back
|
||||
if len(result) == 0 {
|
||||
t.Error("generateWAVFile returned empty data")
|
||||
}
|
||||
|
||||
// Check that it starts with RIFF header
|
||||
if len(result) >= 4 && string(result[0:4]) != "RIFF" {
|
||||
t.Error("Generated WAV data doesn't start with RIFF header")
|
||||
}
|
||||
}
|
||||
|
||||
218
internal/plugins/ai/gemini/voices.go
Normal file
218
internal/plugins/ai/gemini/voices.go
Normal file
@@ -0,0 +1,218 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// GeminiVoice represents a Gemini TTS voice with its characteristics
|
||||
type GeminiVoice struct {
|
||||
Name string
|
||||
Description string
|
||||
Characteristics []string
|
||||
}
|
||||
|
||||
// GetGeminiVoices returns the current list of supported Gemini TTS voices
|
||||
// This list is maintained based on official Google Gemini documentation
|
||||
// https://ai.google.dev/gemini-api/docs/speech-generation
|
||||
func GetGeminiVoices() []GeminiVoice {
|
||||
return []GeminiVoice{
|
||||
// Firm voices
|
||||
{Name: "Kore", Description: "Firm and confident", Characteristics: []string{"firm", "confident", "default"}},
|
||||
{Name: "Orus", Description: "Firm and decisive", Characteristics: []string{"firm", "decisive"}},
|
||||
{Name: "Alnilam", Description: "Firm and strong", Characteristics: []string{"firm", "strong"}},
|
||||
|
||||
// Upbeat voices
|
||||
{Name: "Puck", Description: "Upbeat and energetic", Characteristics: []string{"upbeat", "energetic"}},
|
||||
{Name: "Laomedeia", Description: "Upbeat and lively", Characteristics: []string{"upbeat", "lively"}},
|
||||
|
||||
// Bright voices
|
||||
{Name: "Zephyr", Description: "Bright and cheerful", Characteristics: []string{"bright", "cheerful"}},
|
||||
{Name: "Autonoe", Description: "Bright and optimistic", Characteristics: []string{"bright", "optimistic"}},
|
||||
|
||||
// Informative voices
|
||||
{Name: "Charon", Description: "Informative and clear", Characteristics: []string{"informative", "clear"}},
|
||||
{Name: "Rasalgethi", Description: "Informative and professional", Characteristics: []string{"informative", "professional"}},
|
||||
|
||||
// Natural voices
|
||||
{Name: "Aoede", Description: "Breezy and natural", Characteristics: []string{"breezy", "natural"}},
|
||||
{Name: "Leda", Description: "Youthful and energetic", Characteristics: []string{"youthful", "energetic"}},
|
||||
|
||||
// Gentle voices
|
||||
{Name: "Vindemiatrix", Description: "Gentle and kind", Characteristics: []string{"gentle", "kind"}},
|
||||
{Name: "Achernar", Description: "Soft and gentle", Characteristics: []string{"soft", "gentle"}},
|
||||
{Name: "Enceladus", Description: "Breathy and soft", Characteristics: []string{"breathy", "soft"}},
|
||||
|
||||
// Warm voices
|
||||
{Name: "Sulafat", Description: "Warm and welcoming", Characteristics: []string{"warm", "welcoming"}},
|
||||
{Name: "Capella", Description: "Warm and approachable", Characteristics: []string{"warm", "approachable"}},
|
||||
|
||||
// Clear voices
|
||||
{Name: "Iapetus", Description: "Clear and articulate", Characteristics: []string{"clear", "articulate"}},
|
||||
{Name: "Erinome", Description: "Clear and precise", Characteristics: []string{"clear", "precise"}},
|
||||
|
||||
// Pleasant voices
|
||||
{Name: "Algieba", Description: "Smooth and pleasant", Characteristics: []string{"smooth", "pleasant"}},
|
||||
{Name: "Vega", Description: "Smooth and flowing", Characteristics: []string{"smooth", "flowing"}},
|
||||
|
||||
// Textured voices
|
||||
{Name: "Algenib", Description: "Gravelly texture", Characteristics: []string{"gravelly", "textured"}},
|
||||
|
||||
// Relaxed voices
|
||||
{Name: "Callirrhoe", Description: "Easy-going and relaxed", Characteristics: []string{"relaxed", "easy-going"}},
|
||||
{Name: "Despina", Description: "Calm and serene", Characteristics: []string{"calm", "serene"}},
|
||||
|
||||
// Mature voices
|
||||
{Name: "Gacrux", Description: "Mature and experienced", Characteristics: []string{"mature", "experienced"}},
|
||||
|
||||
// Expressive voices
|
||||
{Name: "Pulcherrima", Description: "Forward and expressive", Characteristics: []string{"forward", "expressive"}},
|
||||
{Name: "Lyra", Description: "Melodic and expressive", Characteristics: []string{"melodic", "expressive"}},
|
||||
|
||||
// Dynamic voices
|
||||
{Name: "Fenrir", Description: "Excitable and dynamic", Characteristics: []string{"excitable", "dynamic"}},
|
||||
{Name: "Sadachbia", Description: "Lively and animated", Characteristics: []string{"lively", "animated"}},
|
||||
|
||||
// Friendly voices
|
||||
{Name: "Achird", Description: "Friendly and approachable", Characteristics: []string{"friendly", "approachable"}},
|
||||
|
||||
// Casual voices
|
||||
{Name: "Zubenelgenubi", Description: "Casual and conversational", Characteristics: []string{"casual", "conversational"}},
|
||||
|
||||
// Additional voices from latest API
|
||||
{Name: "Sadaltager", Description: "Experimental voice with a calm and neutral tone", Characteristics: []string{"experimental", "calm", "neutral"}},
|
||||
{Name: "Schedar", Description: "Experimental voice with a warm and engaging tone", Characteristics: []string{"experimental", "warm", "engaging"}},
|
||||
{Name: "Umbriel", Description: "Experimental voice with a deep and resonant tone", Characteristics: []string{"experimental", "deep", "resonant"}},
|
||||
}
|
||||
}
|
||||
|
||||
// GetGeminiVoiceNames returns just the voice names in alphabetical order
|
||||
func GetGeminiVoiceNames() []string {
|
||||
voices := GetGeminiVoices()
|
||||
names := make([]string, len(voices))
|
||||
for i, voice := range voices {
|
||||
names[i] = voice.Name
|
||||
}
|
||||
sort.Strings(names)
|
||||
return names
|
||||
}
|
||||
|
||||
// IsValidGeminiVoice checks if a voice name is valid
|
||||
func IsValidGeminiVoice(voiceName string) bool {
|
||||
if voiceName == "" {
|
||||
return true // Empty voice is valid (will use default)
|
||||
}
|
||||
|
||||
for _, voice := range GetGeminiVoices() {
|
||||
if voice.Name == voiceName {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// GetGeminiVoiceByName returns a specific voice by name
|
||||
func GetGeminiVoiceByName(name string) (*GeminiVoice, error) {
|
||||
for _, voice := range GetGeminiVoices() {
|
||||
if voice.Name == name {
|
||||
return &voice, nil
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("voice '%s' not found", name)
|
||||
}
|
||||
|
||||
// ListGeminiVoices formats the voice list for display
|
||||
func ListGeminiVoices(shellCompleteMode bool) string {
|
||||
if shellCompleteMode {
|
||||
// For shell completion, just return voice names
|
||||
names := GetGeminiVoiceNames()
|
||||
result := ""
|
||||
for _, name := range names {
|
||||
result += name + "\n"
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// For human-readable output
|
||||
voices := GetGeminiVoices()
|
||||
result := "Available Gemini Text-to-Speech voices:\n\n"
|
||||
|
||||
// Group by characteristics for better readability
|
||||
groups := map[string][]GeminiVoice{
|
||||
"Firm & Confident": {},
|
||||
"Bright & Cheerful": {},
|
||||
"Warm & Welcoming": {},
|
||||
"Clear & Professional": {},
|
||||
"Natural & Expressive": {},
|
||||
"Other Voices": {},
|
||||
}
|
||||
|
||||
for _, voice := range voices {
|
||||
placed := false
|
||||
for _, char := range voice.Characteristics {
|
||||
switch char {
|
||||
case "firm", "confident", "decisive", "strong":
|
||||
if !placed {
|
||||
groups["Firm & Confident"] = append(groups["Firm & Confident"], voice)
|
||||
placed = true
|
||||
}
|
||||
case "bright", "cheerful", "upbeat", "energetic", "lively":
|
||||
if !placed {
|
||||
groups["Bright & Cheerful"] = append(groups["Bright & Cheerful"], voice)
|
||||
placed = true
|
||||
}
|
||||
case "warm", "welcoming", "friendly", "approachable":
|
||||
if !placed {
|
||||
groups["Warm & Welcoming"] = append(groups["Warm & Welcoming"], voice)
|
||||
placed = true
|
||||
}
|
||||
case "clear", "informative", "professional", "articulate":
|
||||
if !placed {
|
||||
groups["Clear & Professional"] = append(groups["Clear & Professional"], voice)
|
||||
placed = true
|
||||
}
|
||||
case "natural", "expressive", "melodic", "breezy":
|
||||
if !placed {
|
||||
groups["Natural & Expressive"] = append(groups["Natural & Expressive"], voice)
|
||||
placed = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if !placed {
|
||||
groups["Other Voices"] = append(groups["Other Voices"], voice)
|
||||
}
|
||||
}
|
||||
|
||||
// Output grouped voices
|
||||
for groupName, groupVoices := range groups {
|
||||
if len(groupVoices) > 0 {
|
||||
result += fmt.Sprintf("%s:\n", groupName)
|
||||
for _, voice := range groupVoices {
|
||||
defaultStr := ""
|
||||
if voice.Name == "Kore" {
|
||||
defaultStr = " (default)"
|
||||
}
|
||||
result += fmt.Sprintf(" %-15s - %s%s\n", voice.Name, voice.Description, defaultStr)
|
||||
}
|
||||
result += "\n"
|
||||
}
|
||||
}
|
||||
|
||||
result += "Use --voice <voice_name> to select a specific voice.\n"
|
||||
result += "Example: fabric --voice Charon -m gemini-2.5-flash-preview-tts -o output.wav \"Hello world\"\n"
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// NOTE: This implementation maintains a curated list based on official Google documentation.
|
||||
// In the future, if Google provides a dynamic voice discovery API, this can be updated
|
||||
// to make API calls for real-time voice discovery.
|
||||
//
|
||||
// The current approach ensures:
|
||||
// 1. Fast response times (no API calls needed)
|
||||
// 2. Reliable voice information with descriptions
|
||||
// 3. Easy maintenance when new voices are added
|
||||
// 4. Offline functionality
|
||||
//
|
||||
// To update voices: Monitor Google's Gemini TTS documentation at:
|
||||
// https://ai.google.dev/gemini-api/docs/speech-generation
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -61,6 +62,11 @@ func (t *transport_sec) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
return t.underlyingTransport.RoundTrip(req)
|
||||
}
|
||||
|
||||
// IsConfigured returns true only if OLLAMA_API_URL environment variable is explicitly set
|
||||
func (o *Client) IsConfigured() bool {
|
||||
return os.Getenv("OLLAMA_API_URL") != ""
|
||||
}
|
||||
|
||||
func (o *Client) configure() (err error) {
|
||||
if o.apiUrl, err = url.Parse(o.ApiUrl.Value); err != nil {
|
||||
fmt.Printf("cannot parse URL: %s: %v\n", o.ApiUrl.Value, err)
|
||||
|
||||
@@ -4,9 +4,6 @@ schema = 3
|
||||
[mod."cloud.google.com/go"]
|
||||
version = "v0.121.2"
|
||||
hash = "sha256-BCgGHxKti8slH98UDDurtgzX3lgcYEklsmj4ImPpwlc="
|
||||
[mod."cloud.google.com/go/ai"]
|
||||
version = "v0.12.1"
|
||||
hash = "sha256-wg3oLMS68E/v7EdNzywbjwEmpk+u6U8LTnIc1pq8edo="
|
||||
[mod."cloud.google.com/go/auth"]
|
||||
version = "v0.16.2"
|
||||
hash = "sha256-BAU9WGFKe0pd5Eu3l/Mbts+QeCOjS+lChr5hrPBCzdA="
|
||||
@@ -16,9 +13,6 @@ schema = 3
|
||||
[mod."cloud.google.com/go/compute/metadata"]
|
||||
version = "v0.7.0"
|
||||
hash = "sha256-jJZDW+hibqjMiY8OiJhgJALbGwEq+djLOxfYR7upQyE="
|
||||
[mod."cloud.google.com/go/longrunning"]
|
||||
version = "v0.6.7"
|
||||
hash = "sha256-9I0Nc2KWAEVoxDngNkqFUdASmZIAySfMEELlPh3Q3xA="
|
||||
[mod."dario.cat/mergo"]
|
||||
version = "v1.0.2"
|
||||
hash = "sha256-p6jdiHlLEfZES8vJnDywG4aVzIe16p0CU6iglglIweA="
|
||||
@@ -163,9 +157,9 @@ schema = 3
|
||||
[mod."github.com/golang/groupcache"]
|
||||
version = "v0.0.0-20241129210726-2c02b8208cf8"
|
||||
hash = "sha256-AdLZ3dJLe/yduoNvZiXugZxNfmwJjNQyQGsIdzYzH74="
|
||||
[mod."github.com/google/generative-ai-go"]
|
||||
version = "v0.20.1"
|
||||
hash = "sha256-9bSpEs4kByhgyTKiHdOY5muYjGBTluA1LvEjw2gSoLI="
|
||||
[mod."github.com/google/go-cmp"]
|
||||
version = "v0.7.0"
|
||||
hash = "sha256-JbxZFBFGCh/Rj5XZ1vG94V2x7c18L8XKB0N9ZD5F2rM="
|
||||
[mod."github.com/google/go-github/v66"]
|
||||
version = "v66.0.0"
|
||||
hash = "sha256-o4usfbApXwTuwIFMECagJwK2H4UMJbCpdyGdWZ5VUpI="
|
||||
@@ -184,6 +178,9 @@ schema = 3
|
||||
[mod."github.com/googleapis/gax-go/v2"]
|
||||
version = "v2.14.2"
|
||||
hash = "sha256-QyY7wuCkrOJCJIf9Q884KD/BC3vk/QtQLXeLeNPt750="
|
||||
[mod."github.com/gorilla/websocket"]
|
||||
version = "v1.5.3"
|
||||
hash = "sha256-vTIGEFMEi+30ZdO6ffMNJ/kId6pZs5bbyqov8xe9BM0="
|
||||
[mod."github.com/hasura/go-graphql-client"]
|
||||
version = "v0.14.4"
|
||||
hash = "sha256-TBNYIfC2CI0cVu7aZcHSWc6ZkgdkWSSfoCXqoAJT8jw="
|
||||
@@ -292,9 +289,6 @@ schema = 3
|
||||
[mod."go.opentelemetry.io/auto/sdk"]
|
||||
version = "v1.1.0"
|
||||
hash = "sha256-cA9qCCu8P1NSJRxgmpfkfa5rKyn9X+Y/9FSmSd5xjyo="
|
||||
[mod."go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"]
|
||||
version = "v0.61.0"
|
||||
hash = "sha256-o5w9k3VbqP3gaXI3Aelw93LLHH53U4PnkYVwc3MaY3Y="
|
||||
[mod."go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"]
|
||||
version = "v0.61.0"
|
||||
hash = "sha256-4pfXD7ErXhexSynXiEEQSAkWoPwHd7PEDE3M1Zi5gLM="
|
||||
@@ -331,12 +325,12 @@ schema = 3
|
||||
[mod."golang.org/x/text"]
|
||||
version = "v0.27.0"
|
||||
hash = "sha256-VX0rOh6L3qIvquKSGjfZQFU8URNtGvkNvxE7OZtboW8="
|
||||
[mod."golang.org/x/time"]
|
||||
version = "v0.12.0"
|
||||
hash = "sha256-Cp3oxrCMH2wyxjzr5SHVmyhgaoUuSl56Uy00Q7DYEpw="
|
||||
[mod."google.golang.org/api"]
|
||||
version = "v0.236.0"
|
||||
hash = "sha256-tP1RSUSnQ4a0axgZQwEZgKF1E13nL02FSP1NPSZr0Rc="
|
||||
[mod."google.golang.org/genai"]
|
||||
version = "v1.17.0"
|
||||
hash = "sha256-Iw09DYpWuGR8E++dsFCBs702oKJPZLBEEGv0g4a4AhA="
|
||||
[mod."google.golang.org/genproto/googleapis/api"]
|
||||
version = "v0.0.0-20250603155806-513f23925822"
|
||||
hash = "sha256-0CS432v9zVhkVLqFpZtxBX8rvVqP67lb7qQ3es7RqIU="
|
||||
|
||||
@@ -1 +1 @@
|
||||
"1.4.264"
|
||||
"1.4.269"
|
||||
|
||||
116
scripts/docker-test/README.md
Normal file
116
scripts/docker-test/README.md
Normal file
@@ -0,0 +1,116 @@
|
||||
# Docker Test Environment for API Configuration Fix
|
||||
|
||||
This directory contains a Docker-based testing setup for fixing the issue where Fabric calls Ollama and Bedrock APIs even when not configured. This addresses the problem where unconfigured services show error messages during model listing.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
./scripts/docker-test/test-runner.sh
|
||||
|
||||
# Interactive mode - pick which test to run
|
||||
./scripts/docker-test/test-runner.sh -i
|
||||
|
||||
# Run specific test case
|
||||
./scripts/docker-test/test-runner.sh gemini-only
|
||||
|
||||
# Shell into test environment
|
||||
./scripts/docker-test/test-runner.sh -s gemini-only
|
||||
|
||||
# Build image only (for development)
|
||||
./scripts/docker-test/test-runner.sh -b
|
||||
|
||||
# Show help
|
||||
./scripts/docker-test/test-runner.sh -h
|
||||
```
|
||||
|
||||
## Test Cases
|
||||
|
||||
1. **no-config**: No APIs configured
|
||||
2. **gemini-only**: Only Gemini configured (reproduces original issue #1195)
|
||||
3. **openai-only**: Only OpenAI configured
|
||||
4. **ollama-only**: Only Ollama configured
|
||||
5. **bedrock-only**: Only Bedrock configured
|
||||
6. **mixed**: Multiple APIs configured (Gemini + OpenAI + Ollama)
|
||||
|
||||
## Environment Files
|
||||
|
||||
Each test case has a corresponding environment file in `scripts/docker-test/env/`:
|
||||
|
||||
- `env.no-config` - Empty configuration
|
||||
- `env.gemini-only` - Only Gemini API key
|
||||
- `env.openai-only` - Only OpenAI API key
|
||||
- `env.ollama-only` - Only Ollama URL
|
||||
- `env.bedrock-only` - Only Bedrock configuration
|
||||
- `env.mixed` - Multiple API configurations
|
||||
|
||||
These files are volume-mounted into the Docker container and persist changes made with `fabric -S`.
|
||||
|
||||
## Interactive Mode & Shell Access
|
||||
|
||||
The interactive mode (`-i`) provides several options:
|
||||
|
||||
```text
|
||||
Available test cases:
|
||||
|
||||
1) No APIs configured (no-config)
|
||||
2) Only Gemini configured (gemini-only)
|
||||
3) Only OpenAI configured (openai-only)
|
||||
4) Only Ollama configured (ollama-only)
|
||||
5) Only Bedrock configured (bedrock-only)
|
||||
6) Mixed configuration (mixed)
|
||||
7) Run all tests
|
||||
0) Exit
|
||||
|
||||
Add '!' after number to shell into test environment (e.g., '1!' to shell into no-config)
|
||||
```
|
||||
|
||||
### Shell Mode
|
||||
|
||||
- Use `1!`, `2!`, etc. to shell into any test environment
|
||||
- Run `fabric -S` to configure APIs interactively
|
||||
- Run `fabric --listmodels` or `fabric -L` to test model listing
|
||||
- Changes persist in the environment files
|
||||
- Type `exit` to return to test runner
|
||||
|
||||
## Expected Results
|
||||
|
||||
**Before Fix:**
|
||||
|
||||
- `no-config` and `gemini-only` tests show Ollama connection errors
|
||||
- Tests show Bedrock authentication errors when BEDROCK_AWS_REGION not set
|
||||
- Error: `Ollama Get "http://localhost:11434/api/tags": dial tcp...`
|
||||
- Error: `Bedrock failed to list foundation models...`
|
||||
|
||||
**After Fix:**
|
||||
|
||||
- Clean output with no error messages for unconfigured services
|
||||
- Only configured services appear in model listings
|
||||
- Ollama only initialized when `OLLAMA_API_URL` is set
|
||||
- Bedrock only initialized when `BEDROCK_AWS_REGION` is set
|
||||
|
||||
## Implementation Details
|
||||
|
||||
- **Volume-mounted configs**: Environment files are mounted to `/home/testuser/.config/fabric/.env`
|
||||
- **Persistent state**: Configuration changes survive between test runs
|
||||
- **Single Docker image**: Built once from `scripts/docker-test/base/Dockerfile`, reused for all tests
|
||||
- **Isolated environments**: Each test uses its own environment file
|
||||
- **Cross-platform**: Works on macOS, Linux, and Windows with Docker
|
||||
|
||||
## Development Workflow
|
||||
|
||||
1. Make code changes to fix API initialization logic
|
||||
2. Run `./scripts/docker-test/test-runner.sh no-config` to test the main issue
|
||||
3. Use `./scripts/docker-test/test-runner.sh -i` for interactive testing
|
||||
4. Shell into environments (`1!`, `2!`, etc.) to debug specific configurations
|
||||
5. Run all tests before submitting PR: `./scripts/docker-test/test-runner.sh`
|
||||
|
||||
## Architecture
|
||||
|
||||
The fix involves:
|
||||
|
||||
1. **Ollama**: Override `IsConfigured()` method to check for `OLLAMA_API_URL` env var
|
||||
2. **Bedrock**: Modify `hasAWSCredentials()` to require `BEDROCK_AWS_REGION`
|
||||
3. **Plugin Registry**: Only initialize providers when properly configured
|
||||
|
||||
This prevents unnecessary API calls and eliminates confusing error messages for users.
|
||||
30
scripts/docker-test/base/Dockerfile
Normal file
30
scripts/docker-test/base/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
FROM golang:1.24-alpine AS builder
|
||||
|
||||
WORKDIR /app
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY ./cmd/fabric ./cmd/fabric
|
||||
COPY ./internal ./internal
|
||||
RUN go build -o fabric ./cmd/fabric
|
||||
|
||||
FROM alpine:latest
|
||||
RUN apk --no-cache add ca-certificates
|
||||
|
||||
# Create a test user
|
||||
RUN adduser -D -s /bin/sh testuser
|
||||
|
||||
# Switch to test user
|
||||
USER testuser
|
||||
WORKDIR /home/testuser
|
||||
|
||||
# Set environment variables for the test user
|
||||
ENV HOME=/home/testuser
|
||||
ENV USER=testuser
|
||||
|
||||
COPY --from=builder /app/fabric .
|
||||
|
||||
# Create fabric config directory and empty .env file
|
||||
RUN mkdir -p .config/fabric && touch .config/fabric/.env
|
||||
|
||||
ENTRYPOINT ["./fabric"]
|
||||
235
scripts/docker-test/test-runner.sh
Executable file
235
scripts/docker-test/test-runner.sh
Executable file
@@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
# Get the directory where this script is located
|
||||
top_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
base_name="$(basename "$top_dir")"
|
||||
cd "$top_dir"/../.. || exit 1
|
||||
|
||||
# Check if bash version supports associative arrays
|
||||
if [[ ${BASH_VERSION%%.*} -lt 4 ]]; then
|
||||
echo "This script requires bash 4.0 or later for associative arrays."
|
||||
echo "Current version: $BASH_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
IMAGE_NAME="fabric-test-setup"
|
||||
ENV_DIR="scripts/${base_name}/env"
|
||||
|
||||
# Test case descriptions
|
||||
declare -A test_descriptions=(
|
||||
["no-config"]="No APIs configured"
|
||||
["gemini-only"]="Only Gemini configured (reproduces original issue)"
|
||||
["openai-only"]="Only OpenAI configured"
|
||||
["ollama-only"]="Only Ollama configured"
|
||||
["bedrock-only"]="Only Bedrock configured"
|
||||
["mixed"]="Mixed configuration (Gemini + OpenAI + Ollama)"
|
||||
)
|
||||
|
||||
# Test case order for consistent display
|
||||
test_order=("no-config" "gemini-only" "openai-only" "ollama-only" "bedrock-only" "mixed")
|
||||
|
||||
build_image() {
|
||||
echo "=== Building Docker image ==="
|
||||
docker build -f "${top_dir}/base/Dockerfile" -t "$IMAGE_NAME" .
|
||||
echo
|
||||
}
|
||||
|
||||
check_env_file() {
|
||||
local test_name="$1"
|
||||
local env_file="$ENV_DIR/env.$test_name"
|
||||
|
||||
if [[ ! -f "$env_file" ]]; then
|
||||
echo "Error: Environment file not found: $env_file"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
run_test() {
|
||||
local test_name="$1"
|
||||
local description="${test_descriptions[$test_name]}"
|
||||
local env_file="$ENV_DIR/env.$test_name"
|
||||
|
||||
check_env_file "$test_name"
|
||||
|
||||
echo "===================="
|
||||
echo "Test: $description"
|
||||
echo "Config: $test_name"
|
||||
echo "Env file: $env_file"
|
||||
echo "===================="
|
||||
|
||||
echo "Running test..."
|
||||
if docker run --rm \
|
||||
-e HOME=/home/testuser \
|
||||
-e USER=testuser \
|
||||
-v "$(pwd)/$env_file:/home/testuser/.config/fabric/.env:ro" \
|
||||
"$IMAGE_NAME" --listmodels 2>&1; then
|
||||
echo "✅ Test completed"
|
||||
else
|
||||
echo "❌ Test failed"
|
||||
fi
|
||||
echo
|
||||
}
|
||||
|
||||
shell_into_env() {
|
||||
local test_name="$1"
|
||||
local description="${test_descriptions[$test_name]}"
|
||||
local env_file="$ENV_DIR/env.$test_name"
|
||||
|
||||
check_env_file "$test_name"
|
||||
|
||||
echo "===================="
|
||||
echo "Shelling into: $description"
|
||||
echo "Config: $test_name"
|
||||
echo "Env file: $env_file"
|
||||
echo "===================="
|
||||
echo "You can now run 'fabric -S' to configure, or 'fabric --listmodels' or 'fabric -L' to test."
|
||||
echo "Changes to .env will persist in $env_file"
|
||||
echo "Type 'exit' to return to the test runner."
|
||||
echo
|
||||
|
||||
docker run -it --rm \
|
||||
-e HOME=/home/testuser \
|
||||
-e USER=testuser \
|
||||
-v "$(pwd)/$env_file:/home/testuser/.config/fabric/.env" \
|
||||
--entrypoint=/bin/sh \
|
||||
"$IMAGE_NAME"
|
||||
}
|
||||
|
||||
interactive_mode() {
|
||||
echo "=== Interactive Mode ==="
|
||||
echo "Available test cases:"
|
||||
echo
|
||||
local i=1
|
||||
local cases=()
|
||||
for test_name in "${test_order[@]}"; do
|
||||
echo "$i) ${test_descriptions[$test_name]} ($test_name)"
|
||||
cases[i]="$test_name"
|
||||
((i++))
|
||||
done
|
||||
echo "$i) Run all tests"
|
||||
echo "0) Exit"
|
||||
echo
|
||||
echo "Add '!' after number to shell into test environment (e.g., '1!' to shell into no-config)"
|
||||
echo
|
||||
|
||||
while true; do
|
||||
read -r -p "Select test case (0-$i) [or 1!, etc. to shell into test environment]: " choice
|
||||
|
||||
# Check for shell mode (! suffix)
|
||||
local shell_mode=false
|
||||
if [[ "$choice" == *"!" ]]; then
|
||||
shell_mode=true
|
||||
choice="${choice%!}" # Remove the ! suffix
|
||||
fi
|
||||
|
||||
if [[ "$choice" == "0" ]]; then
|
||||
if [[ "$shell_mode" == true ]]; then
|
||||
echo "Cannot shell into exit option."
|
||||
continue
|
||||
fi
|
||||
echo "Exiting..."
|
||||
exit 0
|
||||
elif [[ "$choice" == "$i" ]]; then
|
||||
if [[ "$shell_mode" == true ]]; then
|
||||
echo "Cannot shell into 'run all tests' option."
|
||||
continue
|
||||
fi
|
||||
echo "Running all tests..."
|
||||
run_all_tests
|
||||
break
|
||||
elif [[ "$choice" -ge 1 && "$choice" -lt "$i" ]]; then
|
||||
local selected_test="${cases[$choice]}"
|
||||
if [[ "$shell_mode" == true ]]; then
|
||||
echo "Shelling into: ${test_descriptions[$selected_test]}"
|
||||
shell_into_env "$selected_test"
|
||||
else
|
||||
echo "Running: ${test_descriptions[$selected_test]}"
|
||||
run_test "$selected_test"
|
||||
fi
|
||||
|
||||
read -r -p "Continue testing? (y/n): " again
|
||||
if [[ "$again" != "y" && "$again" != "Y" ]]; then
|
||||
break
|
||||
fi
|
||||
echo
|
||||
else
|
||||
echo "Invalid choice. Please select 0-$i (optionally with '!' for shell mode)."
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
run_all_tests() {
|
||||
echo "=== Testing PR #1645: Conditional API initialization ==="
|
||||
echo
|
||||
|
||||
for test_name in "${test_order[@]}"; do
|
||||
run_test "$test_name"
|
||||
done
|
||||
|
||||
echo "=== Test run complete ==="
|
||||
echo "Review the output above to check:"
|
||||
echo "1. No Ollama connection errors when OLLAMA_URL not set"
|
||||
echo "2. No Bedrock authentication errors when BEDROCK_AWS_REGION not set"
|
||||
echo "3. Only configured services appear in model listings"
|
||||
}
|
||||
|
||||
show_help() {
|
||||
echo "Usage: $0 [OPTIONS] [TEST_CASE]"
|
||||
echo
|
||||
echo "Test PR #1645 conditional API initialization"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -h, --help Show this help message"
|
||||
echo " -i, --interactive Run in interactive mode"
|
||||
echo " -b, --build-only Build image only, don't run tests"
|
||||
echo " -s, --shell TEST Shell into test environment"
|
||||
echo
|
||||
echo "Test cases:"
|
||||
for test_name in "${test_order[@]}"; do
|
||||
echo " $test_name: ${test_descriptions[$test_name]}"
|
||||
done
|
||||
echo
|
||||
echo "Examples:"
|
||||
echo " $0 # Run all tests"
|
||||
echo " $0 -i # Interactive mode"
|
||||
echo " $0 gemini-only # Run specific test"
|
||||
echo " $0 -s gemini-only # Shell into gemini-only environment"
|
||||
echo " $0 -b # Build image only"
|
||||
echo
|
||||
echo "Environment files are located in $ENV_DIR/ and can be edited directly."
|
||||
}
|
||||
|
||||
# Parse command line arguments
|
||||
if [[ $# -eq 0 ]]; then
|
||||
build_image
|
||||
run_all_tests
|
||||
elif [[ "$1" == "-h" || "$1" == "--help" ]]; then
|
||||
show_help
|
||||
elif [[ "$1" == "-i" || "$1" == "--interactive" ]]; then
|
||||
build_image
|
||||
interactive_mode
|
||||
elif [[ "$1" == "-b" || "$1" == "--build-only" ]]; then
|
||||
build_image
|
||||
elif [[ "$1" == "-s" || "$1" == "--shell" ]]; then
|
||||
if [[ -z "$2" ]]; then
|
||||
echo "Error: -s/--shell requires a test case name"
|
||||
echo "Use -h for help."
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "${test_descriptions[$2]}" ]]; then
|
||||
echo "Error: Unknown test case: $2"
|
||||
echo "Use -h for help."
|
||||
exit 1
|
||||
fi
|
||||
build_image
|
||||
shell_into_env "$2"
|
||||
elif [[ -n "${test_descriptions[$1]}" ]]; then
|
||||
build_image
|
||||
run_test "$1"
|
||||
else
|
||||
echo "Unknown test case or option: $1"
|
||||
echo "Use -h for help."
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user