mirror of
https://github.com/danielmiessler/Fabric.git
synced 2026-04-24 03:00:15 -04:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7af6817bac | ||
|
|
50ecc32d85 | ||
|
|
ff1ef380a7 | ||
|
|
6a3a7e82d1 | ||
|
|
34bc0b5e31 | ||
|
|
ce59999503 | ||
|
|
9bb4ccf740 | ||
|
|
900b13f08c | ||
|
|
6824f0c0a7 | ||
|
|
a2481406db |
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@@ -99,6 +99,7 @@
|
||||
"mbed",
|
||||
"metacharacters",
|
||||
"Miessler",
|
||||
"mpga",
|
||||
"nometa",
|
||||
"numpy",
|
||||
"ollama",
|
||||
|
||||
33
CHANGELOG.md
33
CHANGELOG.md
@@ -1,5 +1,38 @@
|
||||
# Changelog
|
||||
|
||||
## v1.4.293 (2025-08-19)
|
||||
|
||||
### PR [#1718](https://github.com/danielmiessler/Fabric/pull/1718) by [ksylvan](https://github.com/ksylvan): Implement Configurable Debug Logging Levels
|
||||
|
||||
- Add --debug flag controlling runtime logging verbosity levels
|
||||
- Introduce internal/log package with Off, Basic, Detailed, Trace
|
||||
- Replace ad-hoc Debugf and globals with centralized debug logger
|
||||
- Wire debug level during early CLI argument parsing
|
||||
- Add bash, zsh, fish completions for --debug levels
|
||||
|
||||
## v1.4.292 (2025-08-18)
|
||||
|
||||
### PR [#1717](https://github.com/danielmiessler/Fabric/pull/1717) by [ksylvan](https://github.com/ksylvan): Highlight default vendor/model in model listing
|
||||
|
||||
- Update PrintWithVendor signature to accept default vendor and model
|
||||
- Mark default vendor/model with asterisk in non-shell output
|
||||
- Compare vendor and model case-insensitively when marking
|
||||
- Pass registry defaults to PrintWithVendor from CLI
|
||||
- Add test ensuring default selection appears with asterisk
|
||||
### Direct commits
|
||||
|
||||
- Docs: update version number in README updates section from v1.4.290 to v1.4.291
|
||||
|
||||
## v1.4.291 (2025-08-18)
|
||||
|
||||
### PR [#1715](https://github.com/danielmiessler/Fabric/pull/1715) by [ksylvan](https://github.com/ksylvan): feat: add speech-to-text via OpenAI with transcription flags and comp…
|
||||
|
||||
- Add --transcribe-file flag to transcribe audio or video
|
||||
- Add --transcribe-model flag with model listing and completion
|
||||
- Add --split-media-file flag to chunk files over 25MB
|
||||
- Implement OpenAI transcription using Whisper and GPT-4o Transcribe
|
||||
- Integrate transcription pipeline into CLI before readability processing
|
||||
|
||||
## v1.4.290 (2025-08-17)
|
||||
|
||||
### PR [#1714](https://github.com/danielmiessler/Fabric/pull/1714) by [ksylvan](https://github.com/ksylvan): feat: add per-pattern model mapping support via environment variables
|
||||
|
||||
13
README.md
13
README.md
@@ -57,6 +57,7 @@ Below are the **new features and capabilities** we've added (newest first):
|
||||
|
||||
### Recent Major Features
|
||||
|
||||
- [v1.4.291](https://github.com/danielmiessler/fabric/releases/tag/v1.4.291) (Aug 18, 2025) — **Speech To Text**: Add OpenAI speech-to-text support with `--transcribe-file`, `--transcribe-model`, and `--split-media-file` flags.
|
||||
- [v1.4.287](https://github.com/danielmiessler/fabric/releases/tag/v1.4.287) (Aug 16, 2025) — **AI Reasoning**: Add Thinking to Gemini models and introduce `readme_updates` python script
|
||||
- [v1.4.286](https://github.com/danielmiessler/fabric/releases/tag/v1.4.286) (Aug 14, 2025) — **AI Reasoning**: Introduce Thinking Config Across Anthropic and OpenAI Providers
|
||||
- [v1.4.285](https://github.com/danielmiessler/fabric/releases/tag/v1.4.285) (Aug 13, 2025) — **Extended Context**: Enable One Million Token Context Beta Feature for Sonnet-4
|
||||
@@ -635,10 +636,20 @@ Application Options:
|
||||
--yt-dlp-args= Additional arguments to pass to yt-dlp (e.g. '--cookies-from-browser brave')
|
||||
--thinking= Set reasoning/thinking level (e.g., off, low, medium, high, or
|
||||
numeric tokens for Anthropic or Google Gemini)
|
||||
--debug= Set debug level (0: off, 1: basic, 2: detailed, 3: trace)
|
||||
Help Options:
|
||||
-h, --help Show this help message
|
||||
```
|
||||
|
||||
### Debug Levels
|
||||
|
||||
Use the `--debug` flag to control runtime logging:
|
||||
|
||||
- `0`: off (default)
|
||||
- `1`: basic debug info
|
||||
- `2`: detailed debugging
|
||||
- `3`: trace level
|
||||
|
||||
## Our approach to prompting
|
||||
|
||||
Fabric _Patterns_ are different than most prompts you'll see.
|
||||
@@ -648,7 +659,7 @@ Fabric _Patterns_ are different than most prompts you'll see.
|
||||
Here's an example of a Fabric Pattern.
|
||||
|
||||
```bash
|
||||
https://github.com/danielmiessler/fabric/blob/main/patterns/extract_wisdom/system.md
|
||||
https://github.com/danielmiessler/Fabric/blob/main/data/patterns/extract_wisdom/system.md
|
||||
```
|
||||
|
||||
<img width="1461" alt="pattern-example" src="https://github.com/danielmiessler/fabric/assets/50654/b910c551-9263-405f-9735-71ca69bbab6d">
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
package main
|
||||
|
||||
var version = "v1.4.290"
|
||||
var version = "v1.4.293"
|
||||
|
||||
Binary file not shown.
@@ -59,6 +59,13 @@ _fabric_gemini_voices() {
|
||||
compadd -X "Gemini TTS Voices:" ${voices}
|
||||
}
|
||||
|
||||
_fabric_transcription_models() {
|
||||
local -a models
|
||||
local cmd=${words[1]}
|
||||
models=(${(f)"$($cmd --list-transcription-models --shell-complete-list 2>/dev/null)"})
|
||||
compadd -X "Transcription Models:" ${models}
|
||||
}
|
||||
|
||||
_fabric() {
|
||||
local curcontext="$curcontext" state line
|
||||
typeset -A opt_args
|
||||
@@ -135,6 +142,10 @@ _fabric() {
|
||||
'(--think-start-tag)--think-start-tag[Start tag for thinking sections (default: <think>)]:start tag:' \
|
||||
'(--think-end-tag)--think-end-tag[End tag for thinking sections (default: </think>)]:end tag:' \
|
||||
'(--disable-responses-api)--disable-responses-api[Disable OpenAI Responses API (default: false)]' \
|
||||
'(--transcribe-file)--transcribe-file[Audio or video file to transcribe]:audio file:_files -g "*.mp3 *.mp4 *.mpeg *.mpga *.m4a *.wav *.webm"' \
|
||||
'(--transcribe-model)--transcribe-model[Model to use for transcription (separate from chat model)]:transcribe model:_fabric_transcription_models' \
|
||||
'(--split-media-file)--split-media-file[Split audio/video files larger than 25MB using ffmpeg]' \
|
||||
'(--debug)--debug[Set debug level (0=off, 1=basic, 2=detailed, 3=trace)]:debug level:(0 1 2 3)' \
|
||||
'(--notification)--notification[Send desktop notification when command completes]' \
|
||||
'(--notification-command)--notification-command[Custom command to run for notifications]:notification command:' \
|
||||
'(-h --help)'{-h,--help}'[Show this help message]' \
|
||||
|
||||
@@ -13,7 +13,7 @@ _fabric() {
|
||||
_get_comp_words_by_ref -n : cur prev words cword
|
||||
|
||||
# Define all possible options/flags
|
||||
local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --vendor -V --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --yt-dlp-args --language -g --scrape_url -u --scrape_question -q --seed -e --thinking --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --no-variable-replacement --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --notification --notification-command --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
|
||||
local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --vendor -V --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --yt-dlp-args --language -g --scrape_url -u --scrape_question -q --seed -e --thinking --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --no-variable-replacement --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --transcribe-file --transcribe-model --split-media-file --voice --list-gemini-voices --notification --notification-command --debug --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
|
||||
|
||||
# Helper function for dynamic completions
|
||||
_fabric_get_list() {
|
||||
@@ -74,8 +74,16 @@ _fabric() {
|
||||
COMPREPLY=($(compgen -W "$(_fabric_get_list --list-gemini-voices)" -- "${cur}"))
|
||||
return 0
|
||||
;;
|
||||
--transcribe-model)
|
||||
COMPREPLY=($(compgen -W "$(_fabric_get_list --list-transcription-models)" -- "${cur}"))
|
||||
return 0
|
||||
;;
|
||||
--debug)
|
||||
COMPREPLY=($(compgen -W "0 1 2 3" -- "${cur}"))
|
||||
return 0
|
||||
;;
|
||||
# Options requiring file/directory paths
|
||||
-a | --attachment | -o | --output | --config | --addextension | --image-file)
|
||||
-a | --attachment | -o | --output | --config | --addextension | --image-file | --transcribe-file)
|
||||
_filedir
|
||||
return 0
|
||||
;;
|
||||
|
||||
@@ -47,6 +47,11 @@ function __fabric_get_gemini_voices
|
||||
$cmd --list-gemini-voices --shell-complete-list 2>/dev/null
|
||||
end
|
||||
|
||||
function __fabric_get_transcription_models
|
||||
set cmd (commandline -opc)[1]
|
||||
$cmd --list-transcription-models --shell-complete-list 2>/dev/null
|
||||
end
|
||||
|
||||
# Main completion function
|
||||
function __fabric_register_completions
|
||||
set cmd $argv[1]
|
||||
@@ -92,6 +97,9 @@ function __fabric_register_completions
|
||||
complete -c $cmd -l think-start-tag -d "Start tag for thinking sections (default: <think>)"
|
||||
complete -c $cmd -l think-end-tag -d "End tag for thinking sections (default: </think>)"
|
||||
complete -c $cmd -l voice -d "TTS voice name for supported models (e.g., Kore, Charon, Puck)" -a "(__fabric_get_gemini_voices)"
|
||||
complete -c $cmd -l transcribe-file -d "Audio or video file to transcribe" -r -a "*.mp3 *.mp4 *.mpeg *.mpga *.m4a *.wav *.webm"
|
||||
complete -c $cmd -l transcribe-model -d "Model to use for transcription (separate from chat model)" -a "(__fabric_get_transcription_models)"
|
||||
complete -c $cmd -l debug -d "Set debug level (0=off, 1=basic, 2=detailed, 3=trace)" -a "0 1 2 3"
|
||||
complete -c $cmd -l notification-command -d "Custom command to run for notifications (overrides built-in notifications)"
|
||||
|
||||
# Boolean flags (no arguments)
|
||||
@@ -127,6 +135,7 @@ function __fabric_register_completions
|
||||
complete -c $cmd -l shell-complete-list -d "Output raw list without headers/formatting (for shell completion)"
|
||||
complete -c $cmd -l suppress-think -d "Suppress text enclosed in thinking tags"
|
||||
complete -c $cmd -l disable-responses-api -d "Disable OpenAI Responses API (default: false)"
|
||||
complete -c $cmd -l split-media-file -d "Split audio/video files larger than 25MB using ffmpeg"
|
||||
complete -c $cmd -l notification -d "Send desktop notification when command completes"
|
||||
complete -c $cmd -s h -l help -d "Show this help message"
|
||||
end
|
||||
|
||||
139
docs/Using-Speech-To-Text.md
Normal file
139
docs/Using-Speech-To-Text.md
Normal file
@@ -0,0 +1,139 @@
|
||||
# Using Speech-To-Text (STT) with Fabric
|
||||
|
||||
Fabric supports speech-to-text transcription of audio and video files using OpenAI's transcription models. This feature allows you to convert spoken content into text that can then be processed through Fabric's patterns.
|
||||
|
||||
## Overview
|
||||
|
||||
The STT feature integrates OpenAI's Whisper and GPT-4o transcription models to convert audio/video files into text. The transcribed text is automatically passed as input to your chosen pattern or chat session.
|
||||
|
||||
## Requirements
|
||||
|
||||
- OpenAI API key configured in Fabric
|
||||
- For files larger than 25MB: `ffmpeg` installed on your system
|
||||
- Supported audio/video formats: `.mp3`, `.mp4`, `.mpeg`, `.mpga`, `.m4a`, `.wav`, `.webm`
|
||||
|
||||
## Basic Usage
|
||||
|
||||
### Simple Transcription
|
||||
|
||||
To transcribe an audio file and send the result to a pattern:
|
||||
|
||||
```bash
|
||||
fabric --transcribe-file /path/to/audio.mp3 --transcribe-model whisper-1 --pattern summarize
|
||||
```
|
||||
|
||||
### Transcription Only
|
||||
|
||||
To just transcribe a file without applying a pattern:
|
||||
|
||||
```bash
|
||||
fabric --transcribe-file /path/to/audio.mp3 --transcribe-model whisper-1
|
||||
```
|
||||
|
||||
## Command Line Flags
|
||||
|
||||
### Required Flags
|
||||
|
||||
- `--transcribe-file`: Path to the audio or video file to transcribe
|
||||
- `--transcribe-model`: Model to use for transcription (required when using transcription)
|
||||
|
||||
### Optional Flags
|
||||
|
||||
- `--split-media-file`: Automatically split files larger than 25MB into chunks using ffmpeg
|
||||
|
||||
## Available Models
|
||||
|
||||
You can list all available transcription models with:
|
||||
|
||||
```bash
|
||||
fabric --list-transcription-models
|
||||
```
|
||||
|
||||
Currently supported models:
|
||||
|
||||
- `whisper-1`: OpenAI's Whisper model
|
||||
- `gpt-4o-mini-transcribe`: GPT-4o Mini transcription model
|
||||
- `gpt-4o-transcribe`: GPT-4o transcription model
|
||||
|
||||
## File Size Handling
|
||||
|
||||
### Files Under 25MB
|
||||
|
||||
Files under the 25MB limit are processed directly without any special handling.
|
||||
|
||||
### Files Over 25MB
|
||||
|
||||
For files exceeding OpenAI's 25MB limit, you have two options:
|
||||
|
||||
1. **Manual handling**: The command will fail with an error message suggesting to use `--split-media-file`
|
||||
2. **Automatic splitting**: Use the `--split-media-file` flag to automatically split the file into chunks
|
||||
|
||||
```bash
|
||||
fabric --transcribe-file large_recording.mp4 --transcribe-model whisper-1 --split-media-file --pattern summarize
|
||||
```
|
||||
|
||||
When splitting is enabled:
|
||||
|
||||
- Fabric uses `ffmpeg` to split the file into 10-minute segments initially
|
||||
- If segments are still too large, it reduces the segment time by half repeatedly
|
||||
- All segments are transcribed and the results are concatenated
|
||||
- Temporary files are automatically cleaned up after processing
|
||||
|
||||
## Integration with Patterns
|
||||
|
||||
The transcribed text is seamlessly integrated into Fabric's workflow:
|
||||
|
||||
1. File is transcribed using the specified model
|
||||
2. Transcribed text becomes the input message
|
||||
3. Text is sent to the specified pattern or chat session
|
||||
|
||||
### Example Workflows
|
||||
|
||||
**Meeting transcription and summarization:**
|
||||
|
||||
```bash
|
||||
fabric --transcribe-file meeting.mp4 --transcribe-model gpt-4o-transcribe --pattern summarize
|
||||
```
|
||||
|
||||
**Interview analysis:**
|
||||
|
||||
```bash
|
||||
fabric --transcribe-file interview.mp3 --transcribe-model whisper-1 --pattern extract_insights
|
||||
```
|
||||
|
||||
**Large video file processing:**
|
||||
|
||||
```bash
|
||||
fabric --transcribe-file presentation.mp4 --transcribe-model gpt-4o-transcribe --split-media-file --pattern create_summary
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
Common error scenarios:
|
||||
|
||||
- **Unsupported format**: Only the listed audio/video formats are supported
|
||||
- **File too large**: Use `--split-media-file` for files over 25MB
|
||||
- **Missing ffmpeg**: Install ffmpeg for automatic file splitting
|
||||
- **Invalid model**: Use `--list-transcription-models` to see available models
|
||||
- **Missing model**: The `--transcribe-model` flag is required when using `--transcribe-file`
|
||||
|
||||
## Technical Details
|
||||
|
||||
### Implementation
|
||||
|
||||
- Transcription is handled in `internal/cli/transcribe.go:14`
|
||||
- OpenAI-specific implementation in `internal/plugins/ai/openai/openai_audio.go:41`
|
||||
- File splitting uses ffmpeg with configurable segment duration
|
||||
- Supports any vendor that implements the `transcriber` interface
|
||||
|
||||
### Processing Pipeline
|
||||
|
||||
1. CLI validates file format and size
|
||||
2. If file > 25MB and splitting enabled, file is split using ffmpeg
|
||||
3. Each file/segment is sent to OpenAI's transcription API
|
||||
4. Results are concatenated with spaces between segments
|
||||
5. Transcribed text is passed as input to the main Fabric pipeline
|
||||
|
||||
### Vendor Support
|
||||
|
||||
Currently, only OpenAI is supported for transcription, but the interface allows for future expansion to other vendors that provide transcription capabilities.
|
||||
@@ -74,6 +74,15 @@ func Cli(version string) (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
// Handle transcription if specified
|
||||
if currentFlags.TranscribeFile != "" {
|
||||
var transcriptionMessage string
|
||||
if transcriptionMessage, err = handleTranscription(currentFlags, registry); err != nil {
|
||||
return
|
||||
}
|
||||
currentFlags.Message = AppendMessage(currentFlags.Message, transcriptionMessage)
|
||||
}
|
||||
|
||||
// Process HTML readability if needed
|
||||
if currentFlags.HtmlReadability {
|
||||
if msg, cleanErr := converter.HtmlReadability(currentFlags.Message); cleanErr != nil {
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/chat"
|
||||
"github.com/danielmiessler/fabric/internal/domain"
|
||||
debuglog "github.com/danielmiessler/fabric/internal/log"
|
||||
"github.com/danielmiessler/fabric/internal/util"
|
||||
"github.com/jessevdk/go-flags"
|
||||
"golang.org/x/text/language"
|
||||
@@ -92,23 +93,21 @@ type Flags struct {
|
||||
ThinkStartTag string `long:"think-start-tag" yaml:"thinkStartTag" description:"Start tag for thinking sections" default:"<think>"`
|
||||
ThinkEndTag string `long:"think-end-tag" yaml:"thinkEndTag" description:"End tag for thinking sections" default:"</think>"`
|
||||
DisableResponsesAPI bool `long:"disable-responses-api" yaml:"disableResponsesAPI" description:"Disable OpenAI Responses API (default: false)"`
|
||||
TranscribeFile string `long:"transcribe-file" yaml:"transcribeFile" description:"Audio or video file to transcribe"`
|
||||
TranscribeModel string `long:"transcribe-model" yaml:"transcribeModel" description:"Model to use for transcription (separate from chat model)"`
|
||||
SplitMediaFile bool `long:"split-media-file" yaml:"splitMediaFile" description:"Split audio/video files larger than 25MB using ffmpeg"`
|
||||
Voice string `long:"voice" yaml:"voice" description:"TTS voice name for supported models (e.g., Kore, Charon, Puck)" default:"Kore"`
|
||||
ListGeminiVoices bool `long:"list-gemini-voices" description:"List all available Gemini TTS voices"`
|
||||
ListTranscriptionModels bool `long:"list-transcription-models" description:"List all available transcription models"`
|
||||
Notification bool `long:"notification" yaml:"notification" description:"Send desktop notification when command completes"`
|
||||
NotificationCommand string `long:"notification-command" yaml:"notificationCommand" description:"Custom command to run for notifications (overrides built-in notifications)"`
|
||||
Thinking domain.ThinkingLevel `long:"thinking" yaml:"thinking" description:"Set reasoning/thinking level (e.g., off, low, medium, high, or numeric tokens for Anthropic or Google Gemini)"`
|
||||
}
|
||||
|
||||
var debug = false
|
||||
|
||||
func Debugf(format string, a ...interface{}) {
|
||||
if debug {
|
||||
fmt.Printf("DEBUG: "+format, a...)
|
||||
}
|
||||
Debug int `long:"debug" description:"Set debug level (0=off, 1=basic, 2=detailed, 3=trace)" default:"0"`
|
||||
}
|
||||
|
||||
// Init Initialize flags. returns a Flags struct and an error
|
||||
func Init() (ret *Flags, err error) {
|
||||
debuglog.SetLevel(debuglog.LevelFromInt(parseDebugLevel(os.Args[1:])))
|
||||
// Track which yaml-configured flags were set on CLI
|
||||
usedFlags := make(map[string]bool)
|
||||
yamlArgsScan := os.Args[1:]
|
||||
@@ -124,11 +123,11 @@ func Init() (ret *Flags, err error) {
|
||||
shortTag := field.Tag.Get("short")
|
||||
if longTag != "" {
|
||||
flagToYamlTag[longTag] = yamlTag
|
||||
Debugf("Mapped long flag %s to yaml tag %s\n", longTag, yamlTag)
|
||||
debuglog.Debug(debuglog.Detailed, "Mapped long flag %s to yaml tag %s\n", longTag, yamlTag)
|
||||
}
|
||||
if shortTag != "" {
|
||||
flagToYamlTag[shortTag] = yamlTag
|
||||
Debugf("Mapped short flag %s to yaml tag %s\n", shortTag, yamlTag)
|
||||
debuglog.Debug(debuglog.Detailed, "Mapped short flag %s to yaml tag %s\n", shortTag, yamlTag)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -140,7 +139,7 @@ func Init() (ret *Flags, err error) {
|
||||
if flag != "" {
|
||||
if yamlTag, exists := flagToYamlTag[flag]; exists {
|
||||
usedFlags[yamlTag] = true
|
||||
Debugf("CLI flag used: %s (yaml: %s)\n", flag, yamlTag)
|
||||
debuglog.Debug(debuglog.Detailed, "CLI flag used: %s (yaml: %s)\n", flag, yamlTag)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -152,6 +151,7 @@ func Init() (ret *Flags, err error) {
|
||||
if args, err = parser.Parse(); err != nil {
|
||||
return
|
||||
}
|
||||
debuglog.SetLevel(debuglog.LevelFromInt(ret.Debug))
|
||||
|
||||
// Check to see if a ~/.config/fabric/config.yaml config file exists (only when user didn't specify a config)
|
||||
if ret.Config == "" {
|
||||
@@ -159,7 +159,7 @@ func Init() (ret *Flags, err error) {
|
||||
if defaultConfigPath, err := util.GetDefaultConfigPath(); err == nil && defaultConfigPath != "" {
|
||||
ret.Config = defaultConfigPath
|
||||
} else if err != nil {
|
||||
Debugf("Could not determine default config path: %v\n", err)
|
||||
debuglog.Debug(debuglog.Detailed, "Could not determine default config path: %v\n", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,13 +184,13 @@ func Init() (ret *Flags, err error) {
|
||||
if flagField.CanSet() {
|
||||
if yamlField.Type() != flagField.Type() {
|
||||
if err := assignWithConversion(flagField, yamlField); err != nil {
|
||||
Debugf("Type conversion failed for %s: %v\n", yamlTag, err)
|
||||
debuglog.Debug(debuglog.Detailed, "Type conversion failed for %s: %v\n", yamlTag, err)
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
flagField.Set(yamlField)
|
||||
}
|
||||
Debugf("Applied YAML value for %s: %v\n", yamlTag, yamlField.Interface())
|
||||
debuglog.Debug(debuglog.Detailed, "Applied YAML value for %s: %v\n", yamlTag, yamlField.Interface())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -216,6 +216,22 @@ func Init() (ret *Flags, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func parseDebugLevel(args []string) int {
|
||||
for i := 0; i < len(args); i++ {
|
||||
arg := args[i]
|
||||
if arg == "--debug" && i+1 < len(args) {
|
||||
if lvl, err := strconv.Atoi(args[i+1]); err == nil {
|
||||
return lvl
|
||||
}
|
||||
} else if strings.HasPrefix(arg, "--debug=") {
|
||||
if lvl, err := strconv.Atoi(strings.TrimPrefix(arg, "--debug=")); err == nil {
|
||||
return lvl
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func extractFlag(arg string) string {
|
||||
var flag string
|
||||
if strings.HasPrefix(arg, "--") {
|
||||
@@ -285,7 +301,7 @@ func loadYAMLConfig(configPath string) (*Flags, error) {
|
||||
return nil, fmt.Errorf("error parsing config file: %w", err)
|
||||
}
|
||||
|
||||
Debugf("Config: %v\n", config)
|
||||
debuglog.Debug(debuglog.Detailed, "Config: %v\n", config)
|
||||
|
||||
return config, nil
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
openai "github.com/openai/openai-go"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/core"
|
||||
"github.com/danielmiessler/fabric/internal/plugins/ai"
|
||||
"github.com/danielmiessler/fabric/internal/plugins/ai/gemini"
|
||||
@@ -39,7 +41,7 @@ func handleListingCommands(currentFlags *Flags, fabricDb *fsdb.Db, registry *cor
|
||||
if currentFlags.ShellCompleteOutput {
|
||||
models.Print(true)
|
||||
} else {
|
||||
models.PrintWithVendor(false)
|
||||
models.PrintWithVendor(false, registry.Defaults.Vendor.Value, registry.Defaults.Model.Value)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
@@ -70,5 +72,30 @@ func handleListingCommands(currentFlags *Flags, fabricDb *fsdb.Db, registry *cor
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if currentFlags.ListTranscriptionModels {
|
||||
listTranscriptionModels(currentFlags.ShellCompleteOutput)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// listTranscriptionModels lists all available transcription models
|
||||
func listTranscriptionModels(shellComplete bool) {
|
||||
models := []string{
|
||||
string(openai.AudioModelWhisper1),
|
||||
string(openai.AudioModelGPT4oMiniTranscribe),
|
||||
string(openai.AudioModelGPT4oTranscribe),
|
||||
}
|
||||
|
||||
if shellComplete {
|
||||
for _, model := range models {
|
||||
fmt.Println(model)
|
||||
}
|
||||
} else {
|
||||
fmt.Println("Available transcription models:")
|
||||
for _, model := range models {
|
||||
fmt.Printf(" %s\n", model)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
35
internal/cli/transcribe.go
Normal file
35
internal/cli/transcribe.go
Normal file
@@ -0,0 +1,35 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/core"
|
||||
)
|
||||
|
||||
type transcriber interface {
|
||||
TranscribeFile(ctx context.Context, filePath, model string, split bool) (string, error)
|
||||
}
|
||||
|
||||
func handleTranscription(flags *Flags, registry *core.PluginRegistry) (message string, err error) {
|
||||
vendorName := flags.Vendor
|
||||
if vendorName == "" {
|
||||
vendorName = "OpenAI"
|
||||
}
|
||||
vendor, ok := registry.VendorManager.VendorsByName[vendorName]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("vendor %s not configured", vendorName)
|
||||
}
|
||||
tr, ok := vendor.(transcriber)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("vendor %s does not support audio transcription", vendorName)
|
||||
}
|
||||
model := flags.TranscribeModel
|
||||
if model == "" {
|
||||
return "", fmt.Errorf("transcription model is required (use --transcribe-model)")
|
||||
}
|
||||
if message, err = tr.TranscribeFile(context.Background(), flags.TranscribeFile, model, flags.SplitMediaFile); err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -81,8 +81,10 @@ func TestGetChatter_WarnsOnAmbiguousModel(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("GetChatter() error = %v", err)
|
||||
}
|
||||
if chatter.vendor.GetName() != "VendorA" {
|
||||
t.Fatalf("expected vendor VendorA, got %s", chatter.vendor.GetName())
|
||||
// Verify that one of the valid vendors was selected (don't care which one due to map iteration randomness)
|
||||
vendorName := chatter.vendor.GetName()
|
||||
if vendorName != "VendorA" && vendorName != "VendorB" {
|
||||
t.Fatalf("expected vendor VendorA or VendorB, got %s", vendorName)
|
||||
}
|
||||
if !strings.Contains(string(warning), "multiple vendors provide model shared-model") {
|
||||
t.Fatalf("expected warning about multiple vendors, got %q", string(warning))
|
||||
|
||||
69
internal/log/log.go
Normal file
69
internal/log/log.go
Normal file
@@ -0,0 +1,69 @@
|
||||
package log
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Level represents the debug verbosity.
|
||||
type Level int
|
||||
|
||||
const (
|
||||
// Off disables all debug output.
|
||||
Off Level = iota
|
||||
// Basic provides minimal debugging information.
|
||||
Basic
|
||||
// Detailed provides more verbose debugging.
|
||||
Detailed
|
||||
// Trace is the most verbose level.
|
||||
Trace
|
||||
)
|
||||
|
||||
var (
|
||||
mu sync.RWMutex
|
||||
level Level = Off
|
||||
output io.Writer = os.Stderr
|
||||
)
|
||||
|
||||
// SetLevel sets the global debug level.
|
||||
func SetLevel(l Level) {
|
||||
mu.Lock()
|
||||
level = l
|
||||
mu.Unlock()
|
||||
}
|
||||
|
||||
// LevelFromInt converts an int to a Level.
|
||||
func LevelFromInt(i int) Level {
|
||||
switch {
|
||||
case i <= 0:
|
||||
return Off
|
||||
case i == 1:
|
||||
return Basic
|
||||
case i == 2:
|
||||
return Detailed
|
||||
case i >= 3:
|
||||
return Trace
|
||||
default:
|
||||
return Off
|
||||
}
|
||||
}
|
||||
|
||||
// Debug writes a debug message if the global level permits.
|
||||
func Debug(l Level, format string, a ...interface{}) {
|
||||
mu.RLock()
|
||||
current := level
|
||||
w := output
|
||||
mu.RUnlock()
|
||||
if current >= l {
|
||||
fmt.Fprintf(w, "DEBUG: "+format, a...)
|
||||
}
|
||||
}
|
||||
|
||||
// SetOutput allows overriding the output destination for debug logs.
|
||||
func SetOutput(w io.Writer) {
|
||||
mu.Lock()
|
||||
output = w
|
||||
mu.Unlock()
|
||||
}
|
||||
@@ -18,7 +18,8 @@ type VendorsModels struct {
|
||||
|
||||
// PrintWithVendor prints models including their vendor on each line.
|
||||
// When shellCompleteList is true, output is suitable for shell completion.
|
||||
func (o *VendorsModels) PrintWithVendor(shellCompleteList bool) {
|
||||
// Default vendor and model are highlighted with an asterisk.
|
||||
func (o *VendorsModels) PrintWithVendor(shellCompleteList bool, defaultVendor, defaultModel string) {
|
||||
if !shellCompleteList {
|
||||
fmt.Printf("\n%v:\n", o.SelectionLabel)
|
||||
}
|
||||
@@ -42,7 +43,11 @@ func (o *VendorsModels) PrintWithVendor(shellCompleteList bool) {
|
||||
if shellCompleteList {
|
||||
fmt.Printf("%s|%s\n", groupItems.Group, item)
|
||||
} else {
|
||||
fmt.Printf("\t[%d]\t%s|%s\n", currentItemIndex, groupItems.Group, item)
|
||||
mark := " "
|
||||
if strings.EqualFold(groupItems.Group, defaultVendor) && strings.EqualFold(item, defaultModel) {
|
||||
mark = " *"
|
||||
}
|
||||
fmt.Printf("%s\t[%d]\t%s|%s\n", mark, currentItemIndex, groupItems.Group, item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -31,3 +34,23 @@ func TestFindVendorsByModel(t *testing.T) {
|
||||
t.Fatalf("FindVendorsByModel() = %v, want %v", foundVendors, []string{"vendor1"})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrintWithVendorMarksDefault(t *testing.T) {
|
||||
vendors := NewVendorsModels()
|
||||
vendors.AddGroupItems("vendor1", []string{"model1"}...)
|
||||
vendors.AddGroupItems("vendor2", []string{"model2"}...)
|
||||
|
||||
r, w, _ := os.Pipe()
|
||||
oldStdout := os.Stdout
|
||||
os.Stdout = w
|
||||
|
||||
vendors.PrintWithVendor(false, "vendor2", "model2")
|
||||
|
||||
w.Close()
|
||||
os.Stdout = oldStdout
|
||||
out, _ := io.ReadAll(r)
|
||||
|
||||
if !strings.Contains(string(out), " *\t[2]\tvendor2|model2") {
|
||||
t.Fatalf("default model not marked: %s", out)
|
||||
}
|
||||
}
|
||||
|
||||
153
internal/plugins/ai/openai/openai_audio.go
Normal file
153
internal/plugins/ai/openai/openai_audio.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package openai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
debuglog "github.com/danielmiessler/fabric/internal/log"
|
||||
|
||||
openai "github.com/openai/openai-go"
|
||||
)
|
||||
|
||||
// MaxAudioFileSize defines the maximum allowed size for audio uploads (25MB).
|
||||
const MaxAudioFileSize int64 = 25 * 1024 * 1024
|
||||
|
||||
// AllowedTranscriptionModels lists the models supported for transcription.
|
||||
var AllowedTranscriptionModels = []string{
|
||||
string(openai.AudioModelWhisper1),
|
||||
string(openai.AudioModelGPT4oMiniTranscribe),
|
||||
string(openai.AudioModelGPT4oTranscribe),
|
||||
}
|
||||
|
||||
// allowedAudioExtensions defines the supported input file extensions.
|
||||
var allowedAudioExtensions = map[string]struct{}{
|
||||
".mp3": {},
|
||||
".mp4": {},
|
||||
".mpeg": {},
|
||||
".mpga": {},
|
||||
".m4a": {},
|
||||
".wav": {},
|
||||
".webm": {},
|
||||
}
|
||||
|
||||
// TranscribeFile transcribes the given audio file using the specified model. If the file
|
||||
// exceeds the size limit, it can optionally be split into chunks using ffmpeg.
|
||||
func (o *Client) TranscribeFile(ctx context.Context, filePath, model string, split bool) (string, error) {
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
|
||||
if !slices.Contains(AllowedTranscriptionModels, model) {
|
||||
return "", fmt.Errorf("model '%s' is not supported for transcription", model)
|
||||
}
|
||||
|
||||
ext := strings.ToLower(filepath.Ext(filePath))
|
||||
if _, ok := allowedAudioExtensions[ext]; !ok {
|
||||
return "", fmt.Errorf("unsupported audio format '%s'", ext)
|
||||
}
|
||||
|
||||
info, err := os.Stat(filePath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var files []string
|
||||
var cleanup func()
|
||||
if info.Size() > MaxAudioFileSize {
|
||||
if !split {
|
||||
return "", fmt.Errorf("file %s exceeds 25MB limit; use --split-media-file to enable automatic splitting", filePath)
|
||||
}
|
||||
debuglog.Debug(debuglog.Basic, "File %s is larger than the size limit... breaking it up into chunks...\n", filePath)
|
||||
if files, cleanup, err = splitAudioFile(filePath, ext, MaxAudioFileSize); err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer cleanup()
|
||||
} else {
|
||||
files = []string{filePath}
|
||||
}
|
||||
|
||||
var builder strings.Builder
|
||||
for i, f := range files {
|
||||
debuglog.Debug(debuglog.Basic, "Using model %s to transcribe part %d (file name: %s)...\n", model, i+1, f)
|
||||
var chunk *os.File
|
||||
if chunk, err = os.Open(f); err != nil {
|
||||
return "", err
|
||||
}
|
||||
params := openai.AudioTranscriptionNewParams{
|
||||
File: chunk,
|
||||
Model: openai.AudioModel(model),
|
||||
}
|
||||
var resp *openai.Transcription
|
||||
resp, err = o.ApiClient.Audio.Transcriptions.New(ctx, params)
|
||||
chunk.Close()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if i > 0 {
|
||||
builder.WriteString(" ")
|
||||
}
|
||||
builder.WriteString(resp.Text)
|
||||
}
|
||||
|
||||
return builder.String(), nil
|
||||
}
|
||||
|
||||
// splitAudioFile splits the source file into chunks smaller than maxSize using ffmpeg.
|
||||
// It returns the list of chunk file paths and a cleanup function.
|
||||
func splitAudioFile(src, ext string, maxSize int64) (files []string, cleanup func(), err error) {
|
||||
if _, err = exec.LookPath("ffmpeg"); err != nil {
|
||||
return nil, nil, fmt.Errorf("ffmpeg not found: please install it")
|
||||
}
|
||||
|
||||
var dir string
|
||||
if dir, err = os.MkdirTemp("", "fabric-audio-*"); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
cleanup = func() { os.RemoveAll(dir) }
|
||||
|
||||
segmentTime := 600 // start with 10 minutes
|
||||
for {
|
||||
pattern := filepath.Join(dir, "chunk-%03d"+ext)
|
||||
debuglog.Debug(debuglog.Basic, "Running ffmpeg to split audio into %d-second chunks...\n", segmentTime)
|
||||
cmd := exec.Command("ffmpeg", "-y", "-i", src, "-f", "segment", "-segment_time", fmt.Sprintf("%d", segmentTime), "-c", "copy", pattern)
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
if err = cmd.Run(); err != nil {
|
||||
return nil, cleanup, fmt.Errorf("ffmpeg failed: %v: %s", err, stderr.String())
|
||||
}
|
||||
|
||||
if files, err = filepath.Glob(filepath.Join(dir, "chunk-*"+ext)); err != nil {
|
||||
return nil, cleanup, err
|
||||
}
|
||||
sort.Strings(files)
|
||||
|
||||
tooBig := false
|
||||
for _, f := range files {
|
||||
var info os.FileInfo
|
||||
if info, err = os.Stat(f); err != nil {
|
||||
return nil, cleanup, err
|
||||
}
|
||||
if info.Size() > maxSize {
|
||||
tooBig = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !tooBig {
|
||||
return files, cleanup, nil
|
||||
}
|
||||
for _, f := range files {
|
||||
_ = os.Remove(f)
|
||||
}
|
||||
if segmentTime <= 1 {
|
||||
return nil, cleanup, fmt.Errorf("unable to split file into acceptable size chunks")
|
||||
}
|
||||
segmentTime /= 2
|
||||
}
|
||||
}
|
||||
@@ -148,7 +148,6 @@ func (o *VendorsManager) setupVendorTo(vendor Vendor, configuredVendors map[stri
|
||||
delete(configuredVendors, vendor.GetName())
|
||||
fmt.Printf("[%v] skipped\n", vendor.GetName())
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type modelResult struct {
|
||||
|
||||
@@ -10,8 +10,9 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
debuglog "github.com/danielmiessler/fabric/internal/log"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
// Add this import
|
||||
)
|
||||
|
||||
// ExtensionDefinition represents a single extension configuration
|
||||
@@ -87,9 +88,7 @@ func NewExtensionRegistry(configDir string) *ExtensionRegistry {
|
||||
r.ensureConfigDir()
|
||||
|
||||
if err := r.loadRegistry(); err != nil {
|
||||
if Debug {
|
||||
fmt.Printf("Warning: could not load extension registry: %v\n", err)
|
||||
}
|
||||
debuglog.Debug(debuglog.Basic, "Warning: could not load extension registry: %v\n", err)
|
||||
}
|
||||
|
||||
return r
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
debuglog "github.com/danielmiessler/fabric/internal/log"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -14,7 +16,6 @@ var (
|
||||
filePlugin = &FilePlugin{}
|
||||
fetchPlugin = &FetchPlugin{}
|
||||
sysPlugin = &SysPlugin{}
|
||||
Debug = false // Debug flag
|
||||
)
|
||||
|
||||
var extensionManager *ExtensionManager
|
||||
@@ -33,9 +34,7 @@ var pluginPattern = regexp.MustCompile(`\{\{plugin:([^:]+):([^:]+)(?::([^}]+))?\
|
||||
var extensionPattern = regexp.MustCompile(`\{\{ext:([^:]+):([^:]+)(?::([^}]+))?\}\}`)
|
||||
|
||||
func debugf(format string, a ...interface{}) {
|
||||
if Debug {
|
||||
fmt.Printf(format, a...)
|
||||
}
|
||||
debuglog.Debug(debuglog.Trace, format, a...)
|
||||
}
|
||||
|
||||
func ApplyTemplate(content string, variables map[string]string, input string) (string, error) {
|
||||
|
||||
@@ -1 +1 @@
|
||||
"1.4.290"
|
||||
"1.4.293"
|
||||
|
||||
Reference in New Issue
Block a user