From ff1ef380a74bb95ff2532c06cd3d2439bdd57ee6 Mon Sep 17 00:00:00 2001 From: Kayvan Sylvan Date: Mon, 18 Aug 2025 23:53:10 -0700 Subject: [PATCH] feat: add --debug flag with levels and centralized logging CHANGES - Add --debug flag controlling runtime logging verbosity levels - Introduce internal/log package with Off, Basic, Detailed, Trace - Replace ad-hoc Debugf and globals with centralized debug logger - Wire debug level during early CLI argument parsing - Add bash, zsh, fish completions for --debug levels - Document debug levels in README with usage examples - Add comprehensive STT guide covering models, flags, workflows - Simplify splitAudioFile signature and log ffmpeg chunking operations - Remove FABRIC_STT_DEBUG environment variable and related code - Clean minor code paths in vendors and template modules --- README.md | 10 ++ cmd/generate_changelog/incoming/1718.txt | 7 + completions/_fabric | 1 + completions/fabric.bash | 6 +- completions/fabric.fish | 1 + docs/Using-Speech-To-Text.md | 139 ++++++++++++++++++ internal/cli/flags.go | 42 ++++-- internal/log/log.go | 69 +++++++++ internal/plugins/ai/openai/openai_audio.go | 20 +-- internal/plugins/ai/vendors.go | 1 - .../plugins/template/extension_registry.go | 7 +- internal/plugins/template/template.go | 7 +- 12 files changed, 272 insertions(+), 38 deletions(-) create mode 100644 cmd/generate_changelog/incoming/1718.txt create mode 100644 docs/Using-Speech-To-Text.md create mode 100644 internal/log/log.go diff --git a/README.md b/README.md index c364025a..f9897ec8 100644 --- a/README.md +++ b/README.md @@ -636,10 +636,20 @@ Application Options: --yt-dlp-args= Additional arguments to pass to yt-dlp (e.g. '--cookies-from-browser brave') --thinking= Set reasoning/thinking level (e.g., off, low, medium, high, or numeric tokens for Anthropic or Google Gemini) + --debug= Set debug level (0: off, 1: basic, 2: detailed, 3: trace) Help Options: -h, --help Show this help message ``` +### Debug Levels + +Use the `--debug` flag to control runtime logging: + +- `0`: off (default) +- `1`: basic debug info +- `2`: detailed debugging +- `3`: trace level + ## Our approach to prompting Fabric _Patterns_ are different than most prompts you'll see. diff --git a/cmd/generate_changelog/incoming/1718.txt b/cmd/generate_changelog/incoming/1718.txt new file mode 100644 index 00000000..0c4dc294 --- /dev/null +++ b/cmd/generate_changelog/incoming/1718.txt @@ -0,0 +1,7 @@ +### PR [#1718](https://github.com/danielmiessler/Fabric/pull/1718) by [ksylvan](https://github.com/ksylvan): Implement Configurable Debug Logging Levels + +- Add --debug flag controlling runtime logging verbosity levels +- Introduce internal/log package with Off, Basic, Detailed, Trace +- Replace ad-hoc Debugf and globals with centralized debug logger +- Wire debug level during early CLI argument parsing +- Add bash, zsh, fish completions for --debug levels diff --git a/completions/_fabric b/completions/_fabric index bdf55927..69b92061 100644 --- a/completions/_fabric +++ b/completions/_fabric @@ -145,6 +145,7 @@ _fabric() { '(--transcribe-file)--transcribe-file[Audio or video file to transcribe]:audio file:_files -g "*.mp3 *.mp4 *.mpeg *.mpga *.m4a *.wav *.webm"' \ '(--transcribe-model)--transcribe-model[Model to use for transcription (separate from chat model)]:transcribe model:_fabric_transcription_models' \ '(--split-media-file)--split-media-file[Split audio/video files larger than 25MB using ffmpeg]' \ + '(--debug)--debug[Set debug level (0=off, 1=basic, 2=detailed, 3=trace)]:debug level:(0 1 2 3)' \ '(--notification)--notification[Send desktop notification when command completes]' \ '(--notification-command)--notification-command[Custom command to run for notifications]:notification command:' \ '(-h --help)'{-h,--help}'[Show this help message]' \ diff --git a/completions/fabric.bash b/completions/fabric.bash index 3af1c125..7a096d76 100644 --- a/completions/fabric.bash +++ b/completions/fabric.bash @@ -13,7 +13,7 @@ _fabric() { _get_comp_words_by_ref -n : cur prev words cword # Define all possible options/flags - local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --vendor -V --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --yt-dlp-args --language -g --scrape_url -u --scrape_question -q --seed -e --thinking --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --no-variable-replacement --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --transcribe-file --transcribe-model --split-media-file --voice --list-gemini-voices --notification --notification-command --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h" + local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --vendor -V --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --yt-dlp-args --language -g --scrape_url -u --scrape_question -q --seed -e --thinking --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --no-variable-replacement --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --transcribe-file --transcribe-model --split-media-file --voice --list-gemini-voices --notification --notification-command --debug --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h" # Helper function for dynamic completions _fabric_get_list() { @@ -78,6 +78,10 @@ _fabric() { COMPREPLY=($(compgen -W "$(_fabric_get_list --list-transcription-models)" -- "${cur}")) return 0 ;; + --debug) + COMPREPLY=($(compgen -W "0 1 2 3" -- "${cur}")) + return 0 + ;; # Options requiring file/directory paths -a | --attachment | -o | --output | --config | --addextension | --image-file | --transcribe-file) _filedir diff --git a/completions/fabric.fish b/completions/fabric.fish index 6c3f113f..bc1edef9 100755 --- a/completions/fabric.fish +++ b/completions/fabric.fish @@ -99,6 +99,7 @@ function __fabric_register_completions complete -c $cmd -l voice -d "TTS voice name for supported models (e.g., Kore, Charon, Puck)" -a "(__fabric_get_gemini_voices)" complete -c $cmd -l transcribe-file -d "Audio or video file to transcribe" -r -a "*.mp3 *.mp4 *.mpeg *.mpga *.m4a *.wav *.webm" complete -c $cmd -l transcribe-model -d "Model to use for transcription (separate from chat model)" -a "(__fabric_get_transcription_models)" + complete -c $cmd -l debug -d "Set debug level (0=off, 1=basic, 2=detailed, 3=trace)" -a "0 1 2 3" complete -c $cmd -l notification-command -d "Custom command to run for notifications (overrides built-in notifications)" # Boolean flags (no arguments) diff --git a/docs/Using-Speech-To-Text.md b/docs/Using-Speech-To-Text.md new file mode 100644 index 00000000..9d12b3d3 --- /dev/null +++ b/docs/Using-Speech-To-Text.md @@ -0,0 +1,139 @@ +# Using Speech-To-Text (STT) with Fabric + +Fabric supports speech-to-text transcription of audio and video files using OpenAI's transcription models. This feature allows you to convert spoken content into text that can then be processed through Fabric's patterns. + +## Overview + +The STT feature integrates OpenAI's Whisper and GPT-4o transcription models to convert audio/video files into text. The transcribed text is automatically passed as input to your chosen pattern or chat session. + +## Requirements + +- OpenAI API key configured in Fabric +- For files larger than 25MB: `ffmpeg` installed on your system +- Supported audio/video formats: `.mp3`, `.mp4`, `.mpeg`, `.mpga`, `.m4a`, `.wav`, `.webm` + +## Basic Usage + +### Simple Transcription + +To transcribe an audio file and send the result to a pattern: + +```bash +fabric --transcribe-file /path/to/audio.mp3 --transcribe-model whisper-1 --pattern summarize +``` + +### Transcription Only + +To just transcribe a file without applying a pattern: + +```bash +fabric --transcribe-file /path/to/audio.mp3 --transcribe-model whisper-1 +``` + +## Command Line Flags + +### Required Flags + +- `--transcribe-file`: Path to the audio or video file to transcribe +- `--transcribe-model`: Model to use for transcription (required when using transcription) + +### Optional Flags + +- `--split-media-file`: Automatically split files larger than 25MB into chunks using ffmpeg + +## Available Models + +You can list all available transcription models with: + +```bash +fabric --list-transcription-models +``` + +Currently supported models: + +- `whisper-1`: OpenAI's Whisper model +- `gpt-4o-mini-transcribe`: GPT-4o Mini transcription model +- `gpt-4o-transcribe`: GPT-4o transcription model + +## File Size Handling + +### Files Under 25MB + +Files under the 25MB limit are processed directly without any special handling. + +### Files Over 25MB + +For files exceeding OpenAI's 25MB limit, you have two options: + +1. **Manual handling**: The command will fail with an error message suggesting to use `--split-media-file` +2. **Automatic splitting**: Use the `--split-media-file` flag to automatically split the file into chunks + +```bash +fabric --transcribe-file large_recording.mp4 --transcribe-model whisper-1 --split-media-file --pattern summarize +``` + +When splitting is enabled: + +- Fabric uses `ffmpeg` to split the file into 10-minute segments initially +- If segments are still too large, it reduces the segment time by half repeatedly +- All segments are transcribed and the results are concatenated +- Temporary files are automatically cleaned up after processing + +## Integration with Patterns + +The transcribed text is seamlessly integrated into Fabric's workflow: + +1. File is transcribed using the specified model +2. Transcribed text becomes the input message +3. Text is sent to the specified pattern or chat session + +### Example Workflows + +**Meeting transcription and summarization:** + +```bash +fabric --transcribe-file meeting.mp4 --transcribe-model gpt-4o-transcribe --pattern summarize +``` + +**Interview analysis:** + +```bash +fabric --transcribe-file interview.mp3 --transcribe-model whisper-1 --pattern extract_insights +``` + +**Large video file processing:** + +```bash +fabric --transcribe-file presentation.mp4 --transcribe-model gpt-4o-transcribe --split-media-file --pattern create_summary +``` + +## Error Handling + +Common error scenarios: + +- **Unsupported format**: Only the listed audio/video formats are supported +- **File too large**: Use `--split-media-file` for files over 25MB +- **Missing ffmpeg**: Install ffmpeg for automatic file splitting +- **Invalid model**: Use `--list-transcription-models` to see available models +- **Missing model**: The `--transcribe-model` flag is required when using `--transcribe-file` + +## Technical Details + +### Implementation + +- Transcription is handled in `internal/cli/transcribe.go:14` +- OpenAI-specific implementation in `internal/plugins/ai/openai/openai_audio.go:41` +- File splitting uses ffmpeg with configurable segment duration +- Supports any vendor that implements the `transcriber` interface + +### Processing Pipeline + +1. CLI validates file format and size +2. If file > 25MB and splitting enabled, file is split using ffmpeg +3. Each file/segment is sent to OpenAI's transcription API +4. Results are concatenated with spaces between segments +5. Transcribed text is passed as input to the main Fabric pipeline + +### Vendor Support + +Currently, only OpenAI is supported for transcription, but the interface allows for future expansion to other vendors that provide transcription capabilities. diff --git a/internal/cli/flags.go b/internal/cli/flags.go index dec443be..e4a5360c 100644 --- a/internal/cli/flags.go +++ b/internal/cli/flags.go @@ -13,6 +13,7 @@ import ( "github.com/danielmiessler/fabric/internal/chat" "github.com/danielmiessler/fabric/internal/domain" + debuglog "github.com/danielmiessler/fabric/internal/log" "github.com/danielmiessler/fabric/internal/util" "github.com/jessevdk/go-flags" "golang.org/x/text/language" @@ -101,18 +102,12 @@ type Flags struct { Notification bool `long:"notification" yaml:"notification" description:"Send desktop notification when command completes"` NotificationCommand string `long:"notification-command" yaml:"notificationCommand" description:"Custom command to run for notifications (overrides built-in notifications)"` Thinking domain.ThinkingLevel `long:"thinking" yaml:"thinking" description:"Set reasoning/thinking level (e.g., off, low, medium, high, or numeric tokens for Anthropic or Google Gemini)"` -} - -var debug = false - -func Debugf(format string, a ...interface{}) { - if debug { - fmt.Printf("DEBUG: "+format, a...) - } + Debug int `long:"debug" description:"Set debug level (0=off, 1=basic, 2=detailed, 3=trace)" default:"0"` } // Init Initialize flags. returns a Flags struct and an error func Init() (ret *Flags, err error) { + debuglog.SetLevel(debuglog.LevelFromInt(parseDebugLevel(os.Args[1:]))) // Track which yaml-configured flags were set on CLI usedFlags := make(map[string]bool) yamlArgsScan := os.Args[1:] @@ -128,11 +123,11 @@ func Init() (ret *Flags, err error) { shortTag := field.Tag.Get("short") if longTag != "" { flagToYamlTag[longTag] = yamlTag - Debugf("Mapped long flag %s to yaml tag %s\n", longTag, yamlTag) + debuglog.Debug(debuglog.Detailed, "Mapped long flag %s to yaml tag %s\n", longTag, yamlTag) } if shortTag != "" { flagToYamlTag[shortTag] = yamlTag - Debugf("Mapped short flag %s to yaml tag %s\n", shortTag, yamlTag) + debuglog.Debug(debuglog.Detailed, "Mapped short flag %s to yaml tag %s\n", shortTag, yamlTag) } } } @@ -144,7 +139,7 @@ func Init() (ret *Flags, err error) { if flag != "" { if yamlTag, exists := flagToYamlTag[flag]; exists { usedFlags[yamlTag] = true - Debugf("CLI flag used: %s (yaml: %s)\n", flag, yamlTag) + debuglog.Debug(debuglog.Detailed, "CLI flag used: %s (yaml: %s)\n", flag, yamlTag) } } } @@ -156,6 +151,7 @@ func Init() (ret *Flags, err error) { if args, err = parser.Parse(); err != nil { return } + debuglog.SetLevel(debuglog.LevelFromInt(ret.Debug)) // Check to see if a ~/.config/fabric/config.yaml config file exists (only when user didn't specify a config) if ret.Config == "" { @@ -163,7 +159,7 @@ func Init() (ret *Flags, err error) { if defaultConfigPath, err := util.GetDefaultConfigPath(); err == nil && defaultConfigPath != "" { ret.Config = defaultConfigPath } else if err != nil { - Debugf("Could not determine default config path: %v\n", err) + debuglog.Debug(debuglog.Detailed, "Could not determine default config path: %v\n", err) } } @@ -188,13 +184,13 @@ func Init() (ret *Flags, err error) { if flagField.CanSet() { if yamlField.Type() != flagField.Type() { if err := assignWithConversion(flagField, yamlField); err != nil { - Debugf("Type conversion failed for %s: %v\n", yamlTag, err) + debuglog.Debug(debuglog.Detailed, "Type conversion failed for %s: %v\n", yamlTag, err) continue } } else { flagField.Set(yamlField) } - Debugf("Applied YAML value for %s: %v\n", yamlTag, yamlField.Interface()) + debuglog.Debug(debuglog.Detailed, "Applied YAML value for %s: %v\n", yamlTag, yamlField.Interface()) } } } @@ -220,6 +216,22 @@ func Init() (ret *Flags, err error) { return } +func parseDebugLevel(args []string) int { + for i := 0; i < len(args); i++ { + arg := args[i] + if arg == "--debug" && i+1 < len(args) { + if lvl, err := strconv.Atoi(args[i+1]); err == nil { + return lvl + } + } else if strings.HasPrefix(arg, "--debug=") { + if lvl, err := strconv.Atoi(strings.TrimPrefix(arg, "--debug=")); err == nil { + return lvl + } + } + } + return 0 +} + func extractFlag(arg string) string { var flag string if strings.HasPrefix(arg, "--") { @@ -289,7 +301,7 @@ func loadYAMLConfig(configPath string) (*Flags, error) { return nil, fmt.Errorf("error parsing config file: %w", err) } - Debugf("Config: %v\n", config) + debuglog.Debug(debuglog.Detailed, "Config: %v\n", config) return config, nil } diff --git a/internal/log/log.go b/internal/log/log.go new file mode 100644 index 00000000..4a18af41 --- /dev/null +++ b/internal/log/log.go @@ -0,0 +1,69 @@ +package log + +import ( + "fmt" + "io" + "os" + "sync" +) + +// Level represents the debug verbosity. +type Level int + +const ( + // Off disables all debug output. + Off Level = iota + // Basic provides minimal debugging information. + Basic + // Detailed provides more verbose debugging. + Detailed + // Trace is the most verbose level. + Trace +) + +var ( + mu sync.RWMutex + level Level = Off + output io.Writer = os.Stderr +) + +// SetLevel sets the global debug level. +func SetLevel(l Level) { + mu.Lock() + level = l + mu.Unlock() +} + +// LevelFromInt converts an int to a Level. +func LevelFromInt(i int) Level { + switch { + case i <= 0: + return Off + case i == 1: + return Basic + case i == 2: + return Detailed + case i >= 3: + return Trace + default: + return Off + } +} + +// Debug writes a debug message if the global level permits. +func Debug(l Level, format string, a ...interface{}) { + mu.RLock() + current := level + w := output + mu.RUnlock() + if current >= l { + fmt.Fprintf(w, "DEBUG: "+format, a...) + } +} + +// SetOutput allows overriding the output destination for debug logs. +func SetOutput(w io.Writer) { + mu.Lock() + output = w + mu.Unlock() +} diff --git a/internal/plugins/ai/openai/openai_audio.go b/internal/plugins/ai/openai/openai_audio.go index 06b3d7ce..8c603c87 100644 --- a/internal/plugins/ai/openai/openai_audio.go +++ b/internal/plugins/ai/openai/openai_audio.go @@ -11,6 +11,8 @@ import ( "sort" "strings" + debuglog "github.com/danielmiessler/fabric/internal/log" + openai "github.com/openai/openai-go" ) @@ -56,18 +58,14 @@ func (o *Client) TranscribeFile(ctx context.Context, filePath, model string, spl return "", err } - debug := os.Getenv("FABRIC_STT_DEBUG") != "" - var files []string var cleanup func() if info.Size() > MaxAudioFileSize { if !split { return "", fmt.Errorf("file %s exceeds 25MB limit; use --split-media-file to enable automatic splitting", filePath) } - if debug { - fmt.Fprintf(os.Stderr, "File %s is larger than the size limit... breaking it up into chunks...\n", filePath) - } - if files, cleanup, err = splitAudioFile(filePath, ext, MaxAudioFileSize, debug); err != nil { + debuglog.Debug(debuglog.Basic, "File %s is larger than the size limit... breaking it up into chunks...\n", filePath) + if files, cleanup, err = splitAudioFile(filePath, ext, MaxAudioFileSize); err != nil { return "", err } defer cleanup() @@ -77,9 +75,7 @@ func (o *Client) TranscribeFile(ctx context.Context, filePath, model string, spl var builder strings.Builder for i, f := range files { - if debug { - fmt.Fprintf(os.Stderr, "Using model %s to transcribe part %d (file name: %s)...\n", model, i+1, f) - } + debuglog.Debug(debuglog.Basic, "Using model %s to transcribe part %d (file name: %s)...\n", model, i+1, f) var chunk *os.File if chunk, err = os.Open(f); err != nil { return "", err @@ -105,7 +101,7 @@ func (o *Client) TranscribeFile(ctx context.Context, filePath, model string, spl // splitAudioFile splits the source file into chunks smaller than maxSize using ffmpeg. // It returns the list of chunk file paths and a cleanup function. -func splitAudioFile(src, ext string, maxSize int64, debug bool) (files []string, cleanup func(), err error) { +func splitAudioFile(src, ext string, maxSize int64) (files []string, cleanup func(), err error) { if _, err = exec.LookPath("ffmpeg"); err != nil { return nil, nil, fmt.Errorf("ffmpeg not found: please install it") } @@ -119,9 +115,7 @@ func splitAudioFile(src, ext string, maxSize int64, debug bool) (files []string, segmentTime := 600 // start with 10 minutes for { pattern := filepath.Join(dir, "chunk-%03d"+ext) - if debug { - fmt.Fprintf(os.Stderr, "Running ffmpeg to split audio into %d-second chunks...\n", segmentTime) - } + debuglog.Debug(debuglog.Basic, "Running ffmpeg to split audio into %d-second chunks...\n", segmentTime) cmd := exec.Command("ffmpeg", "-y", "-i", src, "-f", "segment", "-segment_time", fmt.Sprintf("%d", segmentTime), "-c", "copy", pattern) var stderr bytes.Buffer cmd.Stderr = &stderr diff --git a/internal/plugins/ai/vendors.go b/internal/plugins/ai/vendors.go index 2e1202fe..8d8a51c1 100644 --- a/internal/plugins/ai/vendors.go +++ b/internal/plugins/ai/vendors.go @@ -148,7 +148,6 @@ func (o *VendorsManager) setupVendorTo(vendor Vendor, configuredVendors map[stri delete(configuredVendors, vendor.GetName()) fmt.Printf("[%v] skipped\n", vendor.GetName()) } - return } type modelResult struct { diff --git a/internal/plugins/template/extension_registry.go b/internal/plugins/template/extension_registry.go index 470ae9a3..bd91cad1 100644 --- a/internal/plugins/template/extension_registry.go +++ b/internal/plugins/template/extension_registry.go @@ -10,8 +10,9 @@ import ( "strings" "time" + debuglog "github.com/danielmiessler/fabric/internal/log" + "gopkg.in/yaml.v3" - // Add this import ) // ExtensionDefinition represents a single extension configuration @@ -87,9 +88,7 @@ func NewExtensionRegistry(configDir string) *ExtensionRegistry { r.ensureConfigDir() if err := r.loadRegistry(); err != nil { - if Debug { - fmt.Printf("Warning: could not load extension registry: %v\n", err) - } + debuglog.Debug(debuglog.Basic, "Warning: could not load extension registry: %v\n", err) } return r diff --git a/internal/plugins/template/template.go b/internal/plugins/template/template.go index 9b2b41c1..e42811d7 100644 --- a/internal/plugins/template/template.go +++ b/internal/plugins/template/template.go @@ -6,6 +6,8 @@ import ( "path/filepath" "regexp" "strings" + + debuglog "github.com/danielmiessler/fabric/internal/log" ) var ( @@ -14,7 +16,6 @@ var ( filePlugin = &FilePlugin{} fetchPlugin = &FetchPlugin{} sysPlugin = &SysPlugin{} - Debug = false // Debug flag ) var extensionManager *ExtensionManager @@ -33,9 +34,7 @@ var pluginPattern = regexp.MustCompile(`\{\{plugin:([^:]+):([^:]+)(?::([^}]+))?\ var extensionPattern = regexp.MustCompile(`\{\{ext:([^:]+):([^:]+)(?::([^}]+))?\}\}`) func debugf(format string, a ...interface{}) { - if Debug { - fmt.Printf(format, a...) - } + debuglog.Debug(debuglog.Trace, format, a...) } func ApplyTemplate(content string, variables map[string]string, input string) (string, error) {