diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a30c4b7..9e9ada76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ ### PR [#1679](https://github.com/danielmiessler/Fabric/pull/1679) by [ksylvan](https://github.com/ksylvan): Add cross-platform desktop notifications to Fabric CLI - Add cross-platform desktop notifications with secure custom commands -- Integrate notification sending into chat processing workflow +- Integrate notification sending into chat processing workflow - Add --notification and --notification-command CLI flags and help - Provide cross-platform providers: macOS, Linux, Windows with fallbacks - Escape shell metacharacters to prevent injection vulnerabilities @@ -45,8 +45,7 @@ ### Direct commits -- Chore: remove redundant words -Signed-off-by: queryfast +- Remove redundant words from codebase - Fix typos in t_ patterns ## v1.4.272 (2025-07-28) diff --git a/README.md b/README.md index 4977026c..f2f50d12 100644 --- a/README.md +++ b/README.md @@ -554,6 +554,7 @@ Application Options: --notification Send desktop notification when command completes --notification-command= Custom command to run for notifications (overrides built-in notifications) + --yt-dlp-args= Additional arguments to pass to yt-dlp (e.g. '--cookies-from-browser brave') Help Options: -h, --help Show this help message diff --git a/cmd/generate_changelog/changelog.db b/cmd/generate_changelog/changelog.db index 15b9c4ed..1de9f274 100644 Binary files a/cmd/generate_changelog/changelog.db and b/cmd/generate_changelog/changelog.db differ diff --git a/completions/_fabric b/completions/_fabric index fa56c9f0..590030d7 100644 --- a/completions/_fabric +++ b/completions/_fabric @@ -80,6 +80,7 @@ _fabric() { '(--transcript-with-timestamps)--transcript-with-timestamps[Grab transcript from YouTube video with timestamps]' \ '(--comments)--comments[Grab comments from YouTube video and send to chat]' \ '(--metadata)--metadata[Output video metadata]' \ + '(--yt-dlp-args)--yt-dlp-args[Additional arguments to pass to yt-dlp]:yt-dlp args:' \ '(-g --language)'{-g,--language}'[Specify the Language Code for the chat, e.g. -g=en -g=zh]:language:' \ '(-u --scrape_url)'{-u,--scrape_url}'[Scrape website URL to markdown using Jina AI]:url:' \ '(-q --scrape_question)'{-q,--scrape_question}'[Search question using Jina AI]:question:' \ diff --git a/completions/fabric.bash b/completions/fabric.bash index afeb2c5c..e5d345f5 100644 --- a/completions/fabric.bash +++ b/completions/fabric.bash @@ -13,7 +13,7 @@ _fabric() { _get_comp_words_by_ref -n : cur prev words cword # Define all possible options/flags - local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --notification --notification-command --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h" + local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --yt-dlp-args --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --notification --notification-command --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h" # Helper function for dynamic completions _fabric_get_list() { @@ -85,7 +85,7 @@ _fabric() { return 0 ;; # Options requiring simple arguments (no specific completion logic here) - -v | --variable | -t | --temperature | -T | --topp | -P | --presencepenalty | -F | --frequencypenalty | --modelContextLength | -n | --latest | -y | --youtube | -g | --language | -u | --scrape_url | -q | --scrape_question | -e | --seed | --address | --api-key | --search-location | --image-compression | --think-start-tag | --think-end-tag | --notification-command) + -v | --variable | -t | --temperature | -T | --topp | -P | --presencepenalty | -F | --frequencypenalty | --modelContextLength | -n | --latest | -y | --youtube | --yt-dlp-args | -g | --language | -u | --scrape_url | -q | --scrape_question | -e | --seed | --address | --api-key | --search-location | --image-compression | --think-start-tag | --think-end-tag | --notification-command) # No specific completion suggestions, user types the value return 0 ;; diff --git a/completions/fabric.fish b/completions/fabric.fish index eaaeaec5..0efb0003 100755 --- a/completions/fabric.fish +++ b/completions/fabric.fish @@ -95,6 +95,7 @@ complete -c fabric -l transcript -d "Grab transcript from YouTube video and send complete -c fabric -l transcript-with-timestamps -d "Grab transcript from YouTube video with timestamps" complete -c fabric -l comments -d "Grab comments from YouTube video and send to chat" complete -c fabric -l metadata -d "Output video metadata" +complete -c fabric -l yt-dlp-args -d "Additional arguments to pass to yt-dlp (e.g. '--cookies-from-browser brave')" complete -c fabric -l readability -d "Convert HTML input into a clean, readable view" complete -c fabric -l input-has-vars -d "Apply variables to user input" complete -c fabric -l dry-run -d "Show what would be sent to the model without actually sending it" diff --git a/docs/YouTube-Processing.md b/docs/YouTube-Processing.md new file mode 100644 index 00000000..ab4f97c4 --- /dev/null +++ b/docs/YouTube-Processing.md @@ -0,0 +1,260 @@ +# YouTube Processing with Fabric + +Fabric provides powerful YouTube video processing capabilities that allow you to extract transcripts, comments, and metadata from YouTube videos and playlists. This guide covers all the available options and common use cases. + +## Prerequisites + +- **yt-dlp**: Required for transcript extraction. Install on MacOS with: + + ```bash + brew install yt-dlp + ``` + + Or use the package manager of your choice for your operating system. + + See the [yt-dlp wiki page](https://github.com/yt-dlp/yt-dlp/wiki/Installation) for your specific installation instructions. + +- **YouTube API Key** (optional): Only needed for comments and metadata extraction. Configure with: + + ```bash + fabric --setup + ``` + +## Basic Usage + +### Extract Transcript + +Extract a video transcript and process it with a pattern: + +```bash +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern summarize +``` + +### Extract Transcript with Timestamps + +Get transcript with timestamps preserved: + +```bash +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --transcript-with-timestamps --pattern extract_wisdom +``` + +### Extract Comments + +Get video comments (requires YouTube API key): + +```bash +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --comments --pattern analyze_claims +``` + +### Extract Metadata + +Get video metadata as JSON: + +```bash +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --metadata +``` + +## Advanced Options + +### Custom yt-dlp Arguments + +Pass additional arguments to yt-dlp for advanced functionality: + +```bash +# Use browser cookies for age-restricted or private videos +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--cookies-from-browser brave" + +# Specify subtitle language +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--sub-langs es" + +# Use specific format +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--format best" + +# Handle rate limiting (slow down requests) +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--sleep-requests 1" + +# Multiple arguments (use quotes) +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--cookies-from-browser firefox --write-info-json" + +# Combine rate limiting with authentication +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--cookies-from-browser brave --sleep-requests 1" +``` + +### Playlist Processing + +Process entire playlists: + +```bash +# Process all videos in a playlist +fabric -y "https://www.youtube.com/playlist?list=PLAYLIST_ID" --playlist --pattern summarize + +# Save playlist videos to CSV +fabric -y "https://www.youtube.com/playlist?list=PLAYLIST_ID" --playlist -o playlist.csv +``` + +### Language Support + +Specify transcript language: + +```bash +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" -g es --pattern translate +``` + +## Combining Options + +You can combine multiple YouTube processing options: + +```bash +# Get transcript, comments, and metadata +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" \ + --transcript \ + --comments \ + --metadata \ + --pattern comprehensive_analysis +``` + +## Output Options + +### Save to File + +```bash +# Save output to file +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern summarize -o summary.md + +# Save entire session including input +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern summarize --output-session -o full_session.md +``` + +### Stream Output + +Get real-time streaming output: + +```bash +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern summarize --stream +``` + +## Common Use Cases + +### Content Analysis + +```bash +# Analyze video content for key insights +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern extract_wisdom + +# Check claims made in the video +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern analyze_claims +``` + +### Educational Content + +```bash +# Create study notes from educational videos +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern create_study_notes + +# Extract key concepts and definitions +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern extract_concepts +``` + +### Meeting/Conference Processing + +```bash +# Summarize conference talks with timestamps +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" \ + --transcript-with-timestamps \ + --pattern meeting_summary + +# Extract action items from recorded meetings +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern extract_action_items +``` + +### Content Creation + +```bash +# Create social media posts from video content +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern create_social_posts + +# Generate blog post from video transcript +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern write_blog_post +``` + +## Troubleshooting + +### Common Issues + +1. **"yt-dlp not found"**: Install yt-dlp using pip or your package manager +2. **Age-restricted videos**: Use `--yt-dlp-args "--cookies-from-browser BROWSER"` +3. **No subtitles available**: Some videos don't have auto-generated subtitles +4. **API rate limits**: YouTube API has daily quotas for comments/metadata +5. **HTTP 429 errors**: YouTube is rate limiting subtitle requests + +### Error Messages + +- **"YouTube is not configured"**: Run `fabric --setup` to configure YouTube API +- **"yt-dlp failed"**: Check video URL and try with `--yt-dlp-args` for authentication +- **"No transcript content found"**: Video may not have subtitles available +- **"HTTP Error 429: Too Many Requests"**: YouTube rate limit exceeded. This is increasingly common. Solutions: + - **Wait 10-30 minutes and try again** (most effective) + - Use longer sleep: `--yt-dlp-args "--sleep-requests 5"` + - Try with browser cookies: `--yt-dlp-args "--cookies-from-browser brave --sleep-requests 5"` + - **Try a different video** - some videos are less restricted + - **Use a VPN** - different IP address may help + - **Try without language specification** - let yt-dlp choose any available language + - **Try English instead** - `fabric -g en` (English subtitles may be less rate-limited) + +## Configuration + +### YAML Configuration + +You can set default yt-dlp arguments in your config file (`~/.config/fabric/config.yaml`): + +```yaml +ytDlpArgs: "--cookies-from-browser brave --write-info-json" +``` + +### Environment Variables + +Set up your YouTube API key: + +```bash +export FABRIC_YOUTUBE_API_KEY="your_api_key_here" +``` + +## Tips and Best Practices + +1. **Use specific patterns**: Choose patterns that match your use case for better results +2. **Combine with other tools**: Pipe output to other commands or save to files for further processing +3. **Batch processing**: Use playlists to process multiple videos efficiently +4. **Authentication**: Use browser cookies for accessing private or age-restricted content +5. **Language support**: Specify language codes for better transcript accuracy +6. **Rate limiting**: If you encounter 429 errors, use `--sleep-requests 1` to slow down requests +7. **Persistent settings**: Set common yt-dlp args in your config file to avoid repeating them + +## Examples + +### Quick Video Summary + +```bash +fabric -y "https://www.youtube.com/watch?v=dQw4w9WgXcQ" --pattern summarize --stream +``` + +### Detailed Analysis with Authentication + +```bash +fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" \ + --yt-dlp-args "--cookies-from-browser chrome" \ + --transcript-with-timestamps \ + --comments \ + --pattern comprehensive_analysis \ + -o analysis.md +``` + +### Playlist Processing + +```bash +fabric -y "https://www.youtube.com/playlist?list=PLrAXtmRdnEQy6nuLvVUxpDnx4C0823vBN" \ + --playlist \ + --pattern extract_wisdom \ + -o playlist_wisdom.md +``` + +For more patterns and advanced usage, see the main [Fabric documentation](../README.md). diff --git a/internal/cli/cli.go b/internal/cli/cli.go index d61219d9..c77af830 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -113,11 +113,11 @@ func processYoutubeVideo( } } if flags.YouTubeTranscriptWithTimestamps { - if transcript, err = registry.YouTube.GrabTranscriptWithTimestamps(videoId, language); err != nil { + if transcript, err = registry.YouTube.GrabTranscriptWithTimestampsWithArgs(videoId, language, flags.YtDlpArgs); err != nil { return } } else { - if transcript, err = registry.YouTube.GrabTranscript(videoId, language); err != nil { + if transcript, err = registry.YouTube.GrabTranscriptWithArgs(videoId, language, flags.YtDlpArgs); err != nil { return } } diff --git a/internal/cli/flags.go b/internal/cli/flags.go index d53562f0..aff8c884 100644 --- a/internal/cli/flags.go +++ b/internal/cli/flags.go @@ -54,6 +54,7 @@ type Flags struct { YouTubeTranscriptWithTimestamps bool `long:"transcript-with-timestamps" description:"Grab transcript from YouTube video with timestamps and send to chat"` YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"` YouTubeMetadata bool `long:"metadata" description:"Output video metadata"` + YtDlpArgs string `long:"yt-dlp-args" yaml:"ytDlpArgs" description:"Additional arguments to pass to yt-dlp (e.g. '--cookies-from-browser brave')"` Language string `short:"g" long:"language" description:"Specify the Language Code for the chat, e.g. -g=en -g=zh" default:""` ScrapeURL string `short:"u" long:"scrape_url" description:"Scrape website URL to markdown using Jina AI"` ScrapeQuestion string `short:"q" long:"scrape_question" description:"Search question using Jina AI"` diff --git a/internal/tools/youtube/youtube.go b/internal/tools/youtube/youtube.go index 151cde28..46374ff1 100644 --- a/internal/tools/youtube/youtube.go +++ b/internal/tools/youtube/youtube.go @@ -38,6 +38,47 @@ func init() { timestampRegex = regexp.MustCompile(`^\d+$|^\d{1,2}:\d{2}(:\d{2})?(\.\d{3})?$`) } +// parseShellArgs parses a shell-style argument string into individual arguments +// Handles quoted strings and escaping +func parseShellArgs(argsStr string) []string { + if argsStr == "" { + return nil + } + + var args []string + var current strings.Builder + var inQuotes bool + var quoteChar rune + + for i, r := range argsStr { + switch { + case !inQuotes && (r == '"' || r == '\''): + inQuotes = true + quoteChar = r + case inQuotes && r == quoteChar: + // Check if it's escaped + if i > 0 && rune(argsStr[i-1]) == '\\' { + current.WriteRune(r) + } else { + inQuotes = false + } + case !inQuotes && (r == ' ' || r == '\t'): + if current.Len() > 0 { + args = append(args, current.String()) + current.Reset() + } + default: + current.WriteRune(r) + } + } + + if current.Len() > 0 { + args = append(args, current.String()) + } + + return args +} + func NewYouTube() (ret *YouTube) { label := "YouTube" @@ -113,17 +154,27 @@ func (o *YouTube) GrabTranscriptForUrl(url string, language string) (ret string, func (o *YouTube) GrabTranscript(videoId string, language string) (ret string, err error) { // Use yt-dlp for reliable transcript extraction - return o.tryMethodYtDlp(videoId, language) + return o.GrabTranscriptWithArgs(videoId, language, "") +} + +func (o *YouTube) GrabTranscriptWithArgs(videoId string, language string, additionalArgs string) (ret string, err error) { + // Use yt-dlp for reliable transcript extraction + return o.tryMethodYtDlp(videoId, language, additionalArgs) } func (o *YouTube) GrabTranscriptWithTimestamps(videoId string, language string) (ret string, err error) { // Use yt-dlp for reliable transcript extraction with timestamps - return o.tryMethodYtDlpWithTimestamps(videoId, language) + return o.GrabTranscriptWithTimestampsWithArgs(videoId, language, "") +} + +func (o *YouTube) GrabTranscriptWithTimestampsWithArgs(videoId string, language string, additionalArgs string) (ret string, err error) { + // Use yt-dlp for reliable transcript extraction with timestamps + return o.tryMethodYtDlpWithTimestamps(videoId, language, additionalArgs) } // tryMethodYtDlpInternal is a helper function to reduce duplication between // tryMethodYtDlp and tryMethodYtDlpWithTimestamps. -func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, processVTTFileFunc func(filename string) (string, error)) (ret string, err error) { +func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, additionalArgs string, processVTTFileFunc func(filename string) (string, error)) (ret string, err error) { // Check if yt-dlp is available if _, err = exec.LookPath("yt-dlp"); err != nil { err = fmt.Errorf("yt-dlp not found in PATH. Please install yt-dlp to use YouTube transcript functionality") @@ -152,38 +203,69 @@ func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, proces } args := append([]string{}, baseArgs...) + + // Add additional arguments if provided + if additionalArgs != "" { + additionalArgsList := parseShellArgs(additionalArgs) + args = append(args, additionalArgsList...) + } + if language != "" { langMatch := language if len(langMatch) > 2 { langMatch = langMatch[:2] } - args = append(args, "--sub-lang", langMatch) + args = append(args, "--sub-langs", langMatch) } args = append(args, videoURL) - cmd := exec.Command("yt-dlp", args...) var stderr bytes.Buffer cmd.Stderr = &stderr if err = cmd.Run(); err != nil { + stderrStr := stderr.String() + + // Check for specific YouTube errors + if strings.Contains(stderrStr, "429") || strings.Contains(stderrStr, "Too Many Requests") { + err = fmt.Errorf("YouTube rate limit exceeded. Try again later or use different yt-dlp arguments like '--sleep-requests 1' to slow down requests. Error: %v", err) + return + } + + if strings.Contains(stderrStr, "Sign in to confirm you're not a bot") || strings.Contains(stderrStr, "Use --cookies-from-browser") { + err = fmt.Errorf("YouTube requires authentication (bot detection). Use --yt-dlp-args '--cookies-from-browser BROWSER' where BROWSER is chrome, firefox, brave, etc. Error: %v", err) + return + } + if language != "" { - // Fallback: try again without specifying language + // Fallback: try without specifying language (let yt-dlp choose best available) stderr.Reset() fallbackArgs := append([]string{}, baseArgs...) + + // Add additional arguments if provided + if additionalArgs != "" { + additionalArgsList := parseShellArgs(additionalArgs) + fallbackArgs = append(fallbackArgs, additionalArgsList...) + } + + // Don't specify language, let yt-dlp choose fallbackArgs = append(fallbackArgs, videoURL) cmd = exec.Command("yt-dlp", fallbackArgs...) cmd.Stderr = &stderr if err = cmd.Run(); err != nil { - err = fmt.Errorf("yt-dlp failed: %v, stderr: %s", err, stderr.String()) + stderrStr2 := stderr.String() + if strings.Contains(stderrStr2, "429") || strings.Contains(stderrStr2, "Too Many Requests") { + err = fmt.Errorf("YouTube rate limit exceeded. Try again later or use different yt-dlp arguments like '--sleep-requests 1'. Error: %v", err) + } else { + err = fmt.Errorf("yt-dlp failed with language '%s' and fallback. Original error: %s. Fallback error: %s", language, stderrStr, stderrStr2) + } return } } else { - err = fmt.Errorf("yt-dlp failed: %v, stderr: %s", err, stderr.String()) + err = fmt.Errorf("yt-dlp failed: %v, stderr: %s", err, stderrStr) return } } - // Find VTT files using cross-platform approach vttFiles, err := o.findVTTFiles(tempDir, language) if err != nil { @@ -193,12 +275,12 @@ func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, proces return processVTTFileFunc(vttFiles[0]) } -func (o *YouTube) tryMethodYtDlp(videoId string, language string) (ret string, err error) { - return o.tryMethodYtDlpInternal(videoId, language, o.readAndCleanVTTFile) +func (o *YouTube) tryMethodYtDlp(videoId string, language string, additionalArgs string) (ret string, err error) { + return o.tryMethodYtDlpInternal(videoId, language, additionalArgs, o.readAndCleanVTTFile) } -func (o *YouTube) tryMethodYtDlpWithTimestamps(videoId string, language string) (ret string, err error) { - return o.tryMethodYtDlpInternal(videoId, language, o.readAndFormatVTTWithTimestamps) +func (o *YouTube) tryMethodYtDlpWithTimestamps(videoId string, language string, additionalArgs string) (ret string, err error) { + return o.tryMethodYtDlpInternal(videoId, language, additionalArgs, o.readAndFormatVTTWithTimestamps) } func (o *YouTube) readAndCleanVTTFile(filename string) (ret string, err error) {