mirror of
https://github.com/danielmiessler/Fabric.git
synced 2026-01-09 22:38:10 -05:00
Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f33d27f836 | ||
|
|
1694324261 | ||
|
|
3a3f5c50a8 | ||
|
|
b1abfd71c2 | ||
|
|
f5b7279225 | ||
|
|
b974e1bfd5 | ||
|
|
8dda68b3b9 | ||
|
|
33c24e0cb2 | ||
|
|
8fb0c5b8a8 | ||
|
|
d82122b624 | ||
|
|
f5966af95a | ||
|
|
9470ee1655 | ||
|
|
9a118cf637 | ||
|
|
d69757908f | ||
|
|
30525ef1c0 | ||
|
|
8414e72545 | ||
|
|
caca366511 | ||
|
|
261eb30951 | ||
|
|
bdb36ee296 | ||
|
|
2b195f204d |
34
CHANGELOG.md
34
CHANGELOG.md
@@ -1,11 +1,40 @@
|
||||
# Changelog
|
||||
|
||||
## v1.4.280 (2025-08-10)
|
||||
|
||||
### PR [#1686](https://github.com/danielmiessler/Fabric/pull/1686) by [ksylvan](https://github.com/ksylvan): Prevent duplicate text output in OpenAI streaming responses
|
||||
|
||||
- Fix: prevent duplicate text output in OpenAI streaming responses
|
||||
- Skip processing of ResponseOutputTextDone events
|
||||
- Prevent doubled text in stream output
|
||||
- Add clarifying comment about API behavior
|
||||
- Maintain delta chunk streaming functionality
|
||||
|
||||
## v1.4.279 (2025-08-10)
|
||||
|
||||
### PR [#1685](https://github.com/danielmiessler/Fabric/pull/1685) by [ksylvan](https://github.com/ksylvan): Fix Gemini Role Mapping for API Compatibility
|
||||
|
||||
- Fix Gemini role mapping to ensure proper API compatibility by converting chat roles to Gemini's user/model format
|
||||
- Map assistant role to model role per Gemini API constraints
|
||||
- Map system, developer, function, and tool roles to user role for proper handling
|
||||
- Default unrecognized roles to user role to preserve instruction context
|
||||
- Add comprehensive unit tests to validate convertMessages role mapping logic
|
||||
|
||||
## v1.4.278 (2025-08-09)
|
||||
|
||||
### PR [#1681](https://github.com/danielmiessler/Fabric/pull/1681) by [ksylvan](https://github.com/ksylvan): Enhance YouTube Support with Custom yt-dlp Arguments
|
||||
|
||||
- Add `--yt-dlp-args` flag for custom YouTube downloader options with advanced control capabilities
|
||||
- Implement smart subtitle language fallback system when requested locale is unavailable
|
||||
- Add fallback logic for YouTube subtitle language detection with auto-detection of downloaded languages
|
||||
- Replace custom argument parser with shellquote and precompile regexes for improved performance and safety
|
||||
|
||||
## v1.4.277 (2025-08-08)
|
||||
|
||||
### PR [#1679](https://github.com/danielmiessler/Fabric/pull/1679) by [ksylvan](https://github.com/ksylvan): Add cross-platform desktop notifications to Fabric CLI
|
||||
|
||||
- Add cross-platform desktop notifications with secure custom commands
|
||||
- Integrate notification sending into chat processing workflow
|
||||
- Integrate notification sending into chat processing workflow
|
||||
- Add --notification and --notification-command CLI flags and help
|
||||
- Provide cross-platform providers: macOS, Linux, Windows with fallbacks
|
||||
- Escape shell metacharacters to prevent injection vulnerabilities
|
||||
@@ -45,8 +74,7 @@
|
||||
|
||||
### Direct commits
|
||||
|
||||
- Chore: remove redundant words
|
||||
Signed-off-by: queryfast <queryfast@outlook.com>
|
||||
- Remove redundant words from codebase
|
||||
- Fix typos in t_ patterns
|
||||
|
||||
## v1.4.272 (2025-07-28)
|
||||
|
||||
@@ -554,6 +554,7 @@ Application Options:
|
||||
--notification Send desktop notification when command completes
|
||||
--notification-command= Custom command to run for notifications (overrides built-in
|
||||
notifications)
|
||||
--yt-dlp-args= Additional arguments to pass to yt-dlp (e.g. '--cookies-from-browser brave')
|
||||
|
||||
Help Options:
|
||||
-h, --help Show this help message
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
package main
|
||||
|
||||
var version = "v1.4.277"
|
||||
var version = "v1.4.280"
|
||||
|
||||
Binary file not shown.
@@ -80,6 +80,7 @@ _fabric() {
|
||||
'(--transcript-with-timestamps)--transcript-with-timestamps[Grab transcript from YouTube video with timestamps]' \
|
||||
'(--comments)--comments[Grab comments from YouTube video and send to chat]' \
|
||||
'(--metadata)--metadata[Output video metadata]' \
|
||||
'(--yt-dlp-args)--yt-dlp-args[Additional arguments to pass to yt-dlp]:yt-dlp args:' \
|
||||
'(-g --language)'{-g,--language}'[Specify the Language Code for the chat, e.g. -g=en -g=zh]:language:' \
|
||||
'(-u --scrape_url)'{-u,--scrape_url}'[Scrape website URL to markdown using Jina AI]:url:' \
|
||||
'(-q --scrape_question)'{-q,--scrape_question}'[Search question using Jina AI]:question:' \
|
||||
|
||||
@@ -13,7 +13,7 @@ _fabric() {
|
||||
_get_comp_words_by_ref -n : cur prev words cword
|
||||
|
||||
# Define all possible options/flags
|
||||
local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --notification --notification-command --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
|
||||
local opts="--pattern -p --variable -v --context -C --session --attachment -a --setup -S --temperature -t --topp -T --stream -s --presencepenalty -P --raw -r --frequencypenalty -F --listpatterns -l --listmodels -L --listcontexts -x --listsessions -X --updatepatterns -U --copy -c --model -m --modelContextLength --output -o --output-session --latest -n --changeDefaultModel -d --youtube -y --playlist --transcript --transcript-with-timestamps --comments --metadata --yt-dlp-args --language -g --scrape_url -u --scrape_question -q --seed -e --wipecontext -w --wipesession -W --printcontext --printsession --readability --input-has-vars --dry-run --serve --serveOllama --address --api-key --config --search --search-location --image-file --image-size --image-quality --image-compression --image-background --suppress-think --think-start-tag --think-end-tag --disable-responses-api --voice --list-gemini-voices --notification --notification-command --version --listextensions --addextension --rmextension --strategy --liststrategies --listvendors --shell-complete-list --help -h"
|
||||
|
||||
# Helper function for dynamic completions
|
||||
_fabric_get_list() {
|
||||
@@ -85,7 +85,7 @@ _fabric() {
|
||||
return 0
|
||||
;;
|
||||
# Options requiring simple arguments (no specific completion logic here)
|
||||
-v | --variable | -t | --temperature | -T | --topp | -P | --presencepenalty | -F | --frequencypenalty | --modelContextLength | -n | --latest | -y | --youtube | -g | --language | -u | --scrape_url | -q | --scrape_question | -e | --seed | --address | --api-key | --search-location | --image-compression | --think-start-tag | --think-end-tag | --notification-command)
|
||||
-v | --variable | -t | --temperature | -T | --topp | -P | --presencepenalty | -F | --frequencypenalty | --modelContextLength | -n | --latest | -y | --youtube | --yt-dlp-args | -g | --language | -u | --scrape_url | -q | --scrape_question | -e | --seed | --address | --api-key | --search-location | --image-compression | --think-start-tag | --think-end-tag | --notification-command)
|
||||
# No specific completion suggestions, user types the value
|
||||
return 0
|
||||
;;
|
||||
|
||||
@@ -95,6 +95,7 @@ complete -c fabric -l transcript -d "Grab transcript from YouTube video and send
|
||||
complete -c fabric -l transcript-with-timestamps -d "Grab transcript from YouTube video with timestamps"
|
||||
complete -c fabric -l comments -d "Grab comments from YouTube video and send to chat"
|
||||
complete -c fabric -l metadata -d "Output video metadata"
|
||||
complete -c fabric -l yt-dlp-args -d "Additional arguments to pass to yt-dlp (e.g. '--cookies-from-browser brave')"
|
||||
complete -c fabric -l readability -d "Convert HTML input into a clean, readable view"
|
||||
complete -c fabric -l input-has-vars -d "Apply variables to user input"
|
||||
complete -c fabric -l dry-run -d "Show what would be sent to the model without actually sending it"
|
||||
|
||||
298
docs/YouTube-Processing.md
Normal file
298
docs/YouTube-Processing.md
Normal file
@@ -0,0 +1,298 @@
|
||||
# YouTube Processing with Fabric
|
||||
|
||||
Fabric provides powerful YouTube video processing capabilities that allow you to extract transcripts, comments, and metadata from YouTube videos and playlists. This guide covers all the available options and common use cases.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- **yt-dlp**: Required for transcript extraction. Install on MacOS with:
|
||||
|
||||
```bash
|
||||
brew install yt-dlp
|
||||
```
|
||||
|
||||
Or use the package manager of your choice for your operating system.
|
||||
|
||||
See the [yt-dlp wiki page](https://github.com/yt-dlp/yt-dlp/wiki/Installation) for your specific installation instructions.
|
||||
|
||||
- **YouTube API Key** (optional): Only needed for comments and metadata extraction. Configure with:
|
||||
|
||||
```bash
|
||||
fabric --setup
|
||||
```
|
||||
|
||||
## Basic Usage
|
||||
|
||||
### Extract Transcript
|
||||
|
||||
Extract a video transcript and process it with a pattern:
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern summarize
|
||||
```
|
||||
|
||||
### Extract Transcript with Timestamps
|
||||
|
||||
Get transcript with timestamps preserved:
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --transcript-with-timestamps --pattern extract_wisdom
|
||||
```
|
||||
|
||||
### Extract Comments
|
||||
|
||||
Get video comments (requires YouTube API key):
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --comments --pattern analyze_claims
|
||||
```
|
||||
|
||||
### Extract Metadata
|
||||
|
||||
Get video metadata as JSON:
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --metadata
|
||||
```
|
||||
|
||||
## Advanced Options
|
||||
|
||||
### Custom yt-dlp Arguments
|
||||
|
||||
Pass additional arguments to yt-dlp for advanced functionality. **User-provided arguments take precedence** over built-in fabric arguments, giving you full control:
|
||||
|
||||
```bash
|
||||
# Use browser cookies for age-restricted or private videos
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--cookies-from-browser brave"
|
||||
|
||||
# Override language selection (takes precedence over -g flag)
|
||||
fabric -g en -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--sub-langs es,fr"
|
||||
|
||||
# Use specific format
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--format best"
|
||||
|
||||
# Handle rate limiting (slow down requests)
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--sleep-requests 1"
|
||||
|
||||
# Multiple arguments (use quotes)
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--cookies-from-browser firefox --write-info-json"
|
||||
|
||||
# Combine rate limiting with authentication
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--cookies-from-browser brave --sleep-requests 1"
|
||||
|
||||
# Override subtitle format (takes precedence over built-in --sub-format vtt)
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --yt-dlp-args "--sub-format srt"
|
||||
```
|
||||
|
||||
#### Argument Precedence
|
||||
|
||||
Fabric constructs the yt-dlp command in this order:
|
||||
|
||||
1. **Built-in base arguments** (`--write-auto-subs`, `--skip-download`, etc.)
|
||||
2. **Language selection** (from `-g` flag): `--sub-langs LANGUAGE`
|
||||
3. **User arguments** (from `--yt-dlp-args`): **These override any conflicting built-in arguments**
|
||||
4. **Video URL**
|
||||
|
||||
This means you can override any built-in behavior by specifying it in `--yt-dlp-args`.
|
||||
|
||||
### Playlist Processing
|
||||
|
||||
Process entire playlists:
|
||||
|
||||
```bash
|
||||
# Process all videos in a playlist
|
||||
fabric -y "https://www.youtube.com/playlist?list=PLAYLIST_ID" --playlist --pattern summarize
|
||||
|
||||
# Save playlist videos to CSV
|
||||
fabric -y "https://www.youtube.com/playlist?list=PLAYLIST_ID" --playlist -o playlist.csv
|
||||
```
|
||||
|
||||
### Language Support
|
||||
|
||||
Specify transcript language:
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" -g es --pattern translate
|
||||
```
|
||||
|
||||
## Combining Options
|
||||
|
||||
You can combine multiple YouTube processing options:
|
||||
|
||||
```bash
|
||||
# Get transcript, comments, and metadata
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" \
|
||||
--transcript \
|
||||
--comments \
|
||||
--metadata \
|
||||
--pattern comprehensive_analysis
|
||||
```
|
||||
|
||||
## Output Options
|
||||
|
||||
### Save to File
|
||||
|
||||
```bash
|
||||
# Save output to file
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern summarize -o summary.md
|
||||
|
||||
# Save entire session including input
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern summarize --output-session -o full_session.md
|
||||
```
|
||||
|
||||
### Stream Output
|
||||
|
||||
Get real-time streaming output:
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern summarize --stream
|
||||
```
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### Content Analysis
|
||||
|
||||
```bash
|
||||
# Analyze video content for key insights
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern extract_wisdom
|
||||
|
||||
# Check claims made in the video
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern analyze_claims
|
||||
```
|
||||
|
||||
### Educational Content
|
||||
|
||||
```bash
|
||||
# Create study notes from educational videos
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern create_study_notes
|
||||
|
||||
# Extract key concepts and definitions
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern extract_concepts
|
||||
```
|
||||
|
||||
### Meeting/Conference Processing
|
||||
|
||||
```bash
|
||||
# Summarize conference talks with timestamps
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" \
|
||||
--transcript-with-timestamps \
|
||||
--pattern meeting_summary
|
||||
|
||||
# Extract action items from recorded meetings
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern extract_action_items
|
||||
```
|
||||
|
||||
### Content Creation
|
||||
|
||||
```bash
|
||||
# Create social media posts from video content
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern create_social_posts
|
||||
|
||||
# Generate blog post from video transcript
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" --pattern write_blog_post
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **"yt-dlp not found"**: Install yt-dlp using pip or your package manager
|
||||
2. **Age-restricted videos**: Use `--yt-dlp-args "--cookies-from-browser BROWSER"`
|
||||
3. **No subtitles available**: Some videos don't have auto-generated subtitles
|
||||
4. **API rate limits**: YouTube API has daily quotas for comments/metadata
|
||||
5. **HTTP 429 errors**: YouTube is rate limiting subtitle requests
|
||||
|
||||
### Error Messages
|
||||
|
||||
- **"YouTube is not configured"**: Run `fabric --setup` to configure YouTube API
|
||||
- **"yt-dlp failed"**: Check video URL and try with `--yt-dlp-args` for authentication
|
||||
- **"No transcript content found"**: Video may not have subtitles available
|
||||
- **"HTTP Error 429: Too Many Requests"**: YouTube rate limit exceeded. This is increasingly common. Solutions:
|
||||
- **Wait 10-30 minutes and try again** (most effective)
|
||||
- Use longer sleep: `--yt-dlp-args "--sleep-requests 5"`
|
||||
- Try with browser cookies: `--yt-dlp-args "--cookies-from-browser brave --sleep-requests 5"`
|
||||
- **Try a different video** - some videos are less restricted
|
||||
- **Use a VPN** - different IP address may help
|
||||
- **Try without language specification** - let yt-dlp choose any available language
|
||||
- **Try English instead** - `fabric -g en` (English subtitles may be less rate-limited)
|
||||
|
||||
### Language Fallback Behavior
|
||||
|
||||
When you specify a language (e.g., `-g es` for Spanish) but that language isn't available or fails to download:
|
||||
|
||||
1. **Automatic fallback**: Fabric automatically retries without language specification
|
||||
2. **Smart file detection**: If the fallback downloads a different language (e.g., English), Fabric will automatically detect and use it
|
||||
3. **No manual intervention needed**: The process is transparent to the user
|
||||
|
||||
```bash
|
||||
# Even if Spanish isn't available, this will work with whatever language yt-dlp finds
|
||||
fabric -g es -y "https://youtube.com/watch?v=VIDEO_ID" --pattern summarize
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### YAML Configuration
|
||||
|
||||
You can set default yt-dlp arguments in your config file (`~/.config/fabric/config.yaml`):
|
||||
|
||||
```yaml
|
||||
ytDlpArgs: "--cookies-from-browser brave --write-info-json"
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Set up your YouTube API key:
|
||||
|
||||
```bash
|
||||
export FABRIC_YOUTUBE_API_KEY="your_api_key_here"
|
||||
```
|
||||
|
||||
## Tips and Best Practices
|
||||
|
||||
1. **Use specific patterns**: Choose patterns that match your use case for better results
|
||||
2. **Combine with other tools**: Pipe output to other commands or save to files for further processing
|
||||
3. **Batch processing**: Use playlists to process multiple videos efficiently
|
||||
4. **Authentication**: Use browser cookies for accessing private or age-restricted content
|
||||
5. **Language support**: Specify language codes for better transcript accuracy
|
||||
6. **Rate limiting**: If you encounter 429 errors, use `--sleep-requests 1` to slow down requests
|
||||
7. **Persistent settings**: Set common yt-dlp args in your config file to avoid repeating them
|
||||
8. **Argument precedence**: Use `--yt-dlp-args` to override any built-in behavior when needed
|
||||
9. **Testing**: Use `yt-dlp --list-subs URL` to see available subtitle languages before processing
|
||||
|
||||
## Examples
|
||||
|
||||
### Quick Video Summary
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/watch?v=dQw4w9WgXcQ" --pattern summarize --stream
|
||||
```
|
||||
|
||||
### Detailed Analysis with Authentication
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/watch?v=VIDEO_ID" \
|
||||
--yt-dlp-args "--cookies-from-browser chrome" \
|
||||
--transcript-with-timestamps \
|
||||
--comments \
|
||||
--pattern comprehensive_analysis \
|
||||
-o analysis.md
|
||||
```
|
||||
|
||||
### Playlist Processing
|
||||
|
||||
```bash
|
||||
fabric -y "https://www.youtube.com/playlist?list=PLrAXtmRdnEQy6nuLvVUxpDnx4C0823vBN" \
|
||||
--playlist \
|
||||
--pattern extract_wisdom \
|
||||
-o playlist_wisdom.md
|
||||
```
|
||||
|
||||
### Override Built-in Language Selection
|
||||
|
||||
```bash
|
||||
# Built-in language selection (-g es) is overridden by user args
|
||||
fabric -g es -y "https://www.youtube.com/watch?v=VIDEO_ID" \
|
||||
--yt-dlp-args "--sub-langs fr,de,en" \
|
||||
--pattern translate
|
||||
```
|
||||
|
||||
For more patterns and advanced usage, see the main [Fabric documentation](../README.md).
|
||||
1
go.mod
1
go.mod
@@ -37,6 +37,7 @@ require (
|
||||
require (
|
||||
github.com/google/go-cmp v0.7.0 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
|
||||
2
go.sum
2
go.sum
@@ -155,6 +155,8 @@ github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
|
||||
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
|
||||
github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
|
||||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
|
||||
@@ -113,11 +113,11 @@ func processYoutubeVideo(
|
||||
}
|
||||
}
|
||||
if flags.YouTubeTranscriptWithTimestamps {
|
||||
if transcript, err = registry.YouTube.GrabTranscriptWithTimestamps(videoId, language); err != nil {
|
||||
if transcript, err = registry.YouTube.GrabTranscriptWithTimestampsWithArgs(videoId, language, flags.YtDlpArgs); err != nil {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
if transcript, err = registry.YouTube.GrabTranscript(videoId, language); err != nil {
|
||||
if transcript, err = registry.YouTube.GrabTranscriptWithArgs(videoId, language, flags.YtDlpArgs); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,6 +54,7 @@ type Flags struct {
|
||||
YouTubeTranscriptWithTimestamps bool `long:"transcript-with-timestamps" description:"Grab transcript from YouTube video with timestamps and send to chat"`
|
||||
YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"`
|
||||
YouTubeMetadata bool `long:"metadata" description:"Output video metadata"`
|
||||
YtDlpArgs string `long:"yt-dlp-args" yaml:"ytDlpArgs" description:"Additional arguments to pass to yt-dlp (e.g. '--cookies-from-browser brave')"`
|
||||
Language string `short:"g" long:"language" description:"Specify the Language Code for the chat, e.g. -g=en -g=zh" default:""`
|
||||
ScrapeURL string `short:"u" long:"scrape_url" description:"Scrape website URL to markdown using Jina AI"`
|
||||
ScrapeQuestion string `short:"q" long:"scrape_question" description:"Search question using Jina AI"`
|
||||
|
||||
@@ -336,6 +336,19 @@ func (o *Client) convertMessages(msgs []*chat.ChatCompletionMessage) []*genai.Co
|
||||
for _, msg := range msgs {
|
||||
content := &genai.Content{Parts: []*genai.Part{}}
|
||||
|
||||
switch msg.Role {
|
||||
case chat.ChatMessageRoleAssistant:
|
||||
content.Role = "model"
|
||||
case chat.ChatMessageRoleUser:
|
||||
content.Role = "user"
|
||||
case chat.ChatMessageRoleSystem, chat.ChatMessageRoleDeveloper, chat.ChatMessageRoleFunction, chat.ChatMessageRoleTool:
|
||||
// Gemini's API only accepts "user" and "model" roles.
|
||||
// Map all other roles to "user" to preserve instruction context.
|
||||
content.Role = "user"
|
||||
default:
|
||||
content.Role = "user"
|
||||
}
|
||||
|
||||
if msg.Content != "" {
|
||||
content.Parts = append(content.Parts, &genai.Part{Text: msg.Content})
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"testing"
|
||||
|
||||
"google.golang.org/genai"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/chat"
|
||||
)
|
||||
|
||||
// Test buildModelNameFull method
|
||||
@@ -51,6 +53,30 @@ func TestExtractTextFromResponse(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Test convertMessages handles role mapping correctly
|
||||
func TestConvertMessagesRoles(t *testing.T) {
|
||||
client := &Client{}
|
||||
msgs := []*chat.ChatCompletionMessage{
|
||||
{Role: chat.ChatMessageRoleUser, Content: "user"},
|
||||
{Role: chat.ChatMessageRoleAssistant, Content: "assistant"},
|
||||
{Role: chat.ChatMessageRoleSystem, Content: "system"},
|
||||
}
|
||||
|
||||
contents := client.convertMessages(msgs)
|
||||
|
||||
expected := []string{"user", "model", "user"}
|
||||
|
||||
if len(contents) != len(expected) {
|
||||
t.Fatalf("expected %d contents, got %d", len(expected), len(contents))
|
||||
}
|
||||
|
||||
for i, c := range contents {
|
||||
if c.Role != expected[i] {
|
||||
t.Errorf("content %d expected role %s, got %s", i, expected[i], c.Role)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test isTTSModel method
|
||||
func TestIsTTSModel(t *testing.T) {
|
||||
client := &Client{}
|
||||
|
||||
@@ -115,7 +115,11 @@ func (o *Client) sendStreamResponses(
|
||||
case string(constant.ResponseOutputTextDelta("").Default()):
|
||||
channel <- event.AsResponseOutputTextDelta().Delta
|
||||
case string(constant.ResponseOutputTextDone("").Default()):
|
||||
channel <- event.AsResponseOutputTextDone().Text
|
||||
// The Responses API sends the full text again in the
|
||||
// final "done" event. Since we've already streamed all
|
||||
// delta chunks above, sending it would duplicate the
|
||||
// output. Ignore it here to prevent doubled results.
|
||||
continue
|
||||
}
|
||||
}
|
||||
if stream.Err() == nil {
|
||||
|
||||
@@ -25,17 +25,33 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/danielmiessler/fabric/internal/plugins"
|
||||
"github.com/kballard/go-shellquote"
|
||||
"google.golang.org/api/option"
|
||||
"google.golang.org/api/youtube/v3"
|
||||
)
|
||||
|
||||
var timestampRegex *regexp.Regexp
|
||||
var languageFileRegex *regexp.Regexp
|
||||
var videoPatternRegex *regexp.Regexp
|
||||
var playlistPatternRegex *regexp.Regexp
|
||||
var vttTagRegex *regexp.Regexp
|
||||
var durationRegex *regexp.Regexp
|
||||
|
||||
const TimeGapForRepeats = 10 // seconds
|
||||
|
||||
func init() {
|
||||
// Match timestamps like "00:00:01.234" or just numbers or sequence numbers
|
||||
timestampRegex = regexp.MustCompile(`^\d+$|^\d{1,2}:\d{2}(:\d{2})?(\.\d{3})?$`)
|
||||
// Match language-specific VTT files like .en.vtt, .es.vtt, .en-US.vtt, .pt-BR.vtt
|
||||
languageFileRegex = regexp.MustCompile(`\.[a-z]{2}(-[A-Z]{2})?\.vtt$`)
|
||||
// YouTube video ID pattern
|
||||
videoPatternRegex = regexp.MustCompile(`(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:live\/|[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|(?:s(?:horts)\/)|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]*)`)
|
||||
// YouTube playlist ID pattern
|
||||
playlistPatternRegex = regexp.MustCompile(`[?&]list=([a-zA-Z0-9_-]+)`)
|
||||
// VTT formatting tags like <c.colorE5E5E5>, </c>, etc.
|
||||
vttTagRegex = regexp.MustCompile(`<[^>]*>`)
|
||||
// YouTube duration format PT1H2M3S
|
||||
durationRegex = regexp.MustCompile(`(?i)PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?`)
|
||||
}
|
||||
|
||||
func NewYouTube() (ret *YouTube) {
|
||||
@@ -76,18 +92,14 @@ func (o *YouTube) initService() (err error) {
|
||||
}
|
||||
|
||||
func (o *YouTube) GetVideoOrPlaylistId(url string) (videoId string, playlistId string, err error) {
|
||||
// Video ID pattern
|
||||
videoPattern := `(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:live\/|[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|(?:s(?:horts)\/)|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]*)`
|
||||
videoRe := regexp.MustCompile(videoPattern)
|
||||
videoMatch := videoRe.FindStringSubmatch(url)
|
||||
// Extract video ID using pre-compiled regex
|
||||
videoMatch := videoPatternRegex.FindStringSubmatch(url)
|
||||
if len(videoMatch) > 1 {
|
||||
videoId = videoMatch[1]
|
||||
}
|
||||
|
||||
// Playlist ID pattern
|
||||
playlistPattern := `[?&]list=([a-zA-Z0-9_-]+)`
|
||||
playlistRe := regexp.MustCompile(playlistPattern)
|
||||
playlistMatch := playlistRe.FindStringSubmatch(url)
|
||||
// Extract playlist ID using pre-compiled regex
|
||||
playlistMatch := playlistPatternRegex.FindStringSubmatch(url)
|
||||
if len(playlistMatch) > 1 {
|
||||
playlistId = playlistMatch[1]
|
||||
}
|
||||
@@ -113,17 +125,27 @@ func (o *YouTube) GrabTranscriptForUrl(url string, language string) (ret string,
|
||||
|
||||
func (o *YouTube) GrabTranscript(videoId string, language string) (ret string, err error) {
|
||||
// Use yt-dlp for reliable transcript extraction
|
||||
return o.tryMethodYtDlp(videoId, language)
|
||||
return o.GrabTranscriptWithArgs(videoId, language, "")
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabTranscriptWithArgs(videoId string, language string, additionalArgs string) (ret string, err error) {
|
||||
// Use yt-dlp for reliable transcript extraction
|
||||
return o.tryMethodYtDlp(videoId, language, additionalArgs)
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabTranscriptWithTimestamps(videoId string, language string) (ret string, err error) {
|
||||
// Use yt-dlp for reliable transcript extraction with timestamps
|
||||
return o.tryMethodYtDlpWithTimestamps(videoId, language)
|
||||
return o.GrabTranscriptWithTimestampsWithArgs(videoId, language, "")
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabTranscriptWithTimestampsWithArgs(videoId string, language string, additionalArgs string) (ret string, err error) {
|
||||
// Use yt-dlp for reliable transcript extraction with timestamps
|
||||
return o.tryMethodYtDlpWithTimestamps(videoId, language, additionalArgs)
|
||||
}
|
||||
|
||||
// tryMethodYtDlpInternal is a helper function to reduce duplication between
|
||||
// tryMethodYtDlp and tryMethodYtDlpWithTimestamps.
|
||||
func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, processVTTFileFunc func(filename string) (string, error)) (ret string, err error) {
|
||||
func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, additionalArgs string, processVTTFileFunc func(filename string) (string, error)) (ret string, err error) {
|
||||
// Check if yt-dlp is available
|
||||
if _, err = exec.LookPath("yt-dlp"); err != nil {
|
||||
err = fmt.Errorf("yt-dlp not found in PATH. Please install yt-dlp to use YouTube transcript functionality")
|
||||
@@ -141,30 +163,93 @@ func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, proces
|
||||
// Use yt-dlp to get transcript
|
||||
videoURL := "https://www.youtube.com/watch?v=" + videoId
|
||||
outputPath := filepath.Join(tempDir, "%(title)s.%(ext)s")
|
||||
lang_match := language
|
||||
if len(language) > 2 {
|
||||
lang_match = language[:2]
|
||||
}
|
||||
cmd := exec.Command("yt-dlp",
|
||||
|
||||
baseArgs := []string{
|
||||
"--write-auto-subs",
|
||||
"--sub-lang", lang_match,
|
||||
"--skip-download",
|
||||
"--sub-format", "vtt",
|
||||
"--quiet",
|
||||
"--no-warnings",
|
||||
"-o", outputPath,
|
||||
videoURL)
|
||||
}
|
||||
|
||||
args := append([]string{}, baseArgs...)
|
||||
|
||||
// Add built-in language selection first
|
||||
if language != "" {
|
||||
langMatch := language
|
||||
if len(langMatch) > 2 {
|
||||
langMatch = langMatch[:2]
|
||||
}
|
||||
args = append(args, "--sub-langs", langMatch)
|
||||
}
|
||||
|
||||
// Add user-provided arguments last so they take precedence
|
||||
if additionalArgs != "" {
|
||||
additionalArgsList, err := shellquote.Split(additionalArgs)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid yt-dlp arguments: %v", err)
|
||||
}
|
||||
args = append(args, additionalArgsList...)
|
||||
}
|
||||
|
||||
args = append(args, videoURL)
|
||||
|
||||
cmd := exec.Command("yt-dlp", args...)
|
||||
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err = cmd.Run(); err != nil {
|
||||
err = fmt.Errorf("yt-dlp failed: %v, stderr: %s", err, stderr.String())
|
||||
return
|
||||
stderrStr := stderr.String()
|
||||
|
||||
// Check for specific YouTube errors
|
||||
if strings.Contains(stderrStr, "429") || strings.Contains(stderrStr, "Too Many Requests") {
|
||||
err = fmt.Errorf("YouTube rate limit exceeded. Try again later or use different yt-dlp arguments like '--sleep-requests 1' to slow down requests. Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if strings.Contains(stderrStr, "Sign in to confirm you're not a bot") || strings.Contains(stderrStr, "Use --cookies-from-browser") {
|
||||
err = fmt.Errorf("YouTube requires authentication (bot detection). Use --yt-dlp-args '--cookies-from-browser BROWSER' where BROWSER is chrome, firefox, brave, etc. Error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if language != "" {
|
||||
// Fallback: try without specifying language (let yt-dlp choose best available)
|
||||
stderr.Reset()
|
||||
fallbackArgs := append([]string{}, baseArgs...)
|
||||
|
||||
// Add additional arguments if provided
|
||||
if additionalArgs != "" {
|
||||
additionalArgsList, parseErr := shellquote.Split(additionalArgs)
|
||||
if parseErr != nil {
|
||||
return "", fmt.Errorf("invalid yt-dlp arguments: %v", parseErr)
|
||||
}
|
||||
fallbackArgs = append(fallbackArgs, additionalArgsList...)
|
||||
}
|
||||
|
||||
// Don't specify language, let yt-dlp choose
|
||||
fallbackArgs = append(fallbackArgs, videoURL)
|
||||
cmd = exec.Command("yt-dlp", fallbackArgs...)
|
||||
cmd.Stderr = &stderr
|
||||
if err = cmd.Run(); err != nil {
|
||||
stderrStr2 := stderr.String()
|
||||
if strings.Contains(stderrStr2, "429") || strings.Contains(stderrStr2, "Too Many Requests") {
|
||||
err = fmt.Errorf("YouTube rate limit exceeded. Try again later or use different yt-dlp arguments like '--sleep-requests 1'. Error: %v", err)
|
||||
} else {
|
||||
err = fmt.Errorf("yt-dlp failed with language '%s' and fallback. Original error: %s. Fallback error: %s", language, stderrStr, stderrStr2)
|
||||
}
|
||||
return
|
||||
}
|
||||
} else {
|
||||
err = fmt.Errorf("yt-dlp failed: %v, stderr: %s", err, stderrStr)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Find VTT files using cross-platform approach
|
||||
vttFiles, err := o.findVTTFiles(tempDir, language)
|
||||
// Try to find files with the requested language first, but fall back to any VTT file
|
||||
vttFiles, err := o.findVTTFilesWithFallback(tempDir, language)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -172,12 +257,12 @@ func (o *YouTube) tryMethodYtDlpInternal(videoId string, language string, proces
|
||||
return processVTTFileFunc(vttFiles[0])
|
||||
}
|
||||
|
||||
func (o *YouTube) tryMethodYtDlp(videoId string, language string) (ret string, err error) {
|
||||
return o.tryMethodYtDlpInternal(videoId, language, o.readAndCleanVTTFile)
|
||||
func (o *YouTube) tryMethodYtDlp(videoId string, language string, additionalArgs string) (ret string, err error) {
|
||||
return o.tryMethodYtDlpInternal(videoId, language, additionalArgs, o.readAndCleanVTTFile)
|
||||
}
|
||||
|
||||
func (o *YouTube) tryMethodYtDlpWithTimestamps(videoId string, language string) (ret string, err error) {
|
||||
return o.tryMethodYtDlpInternal(videoId, language, o.readAndFormatVTTWithTimestamps)
|
||||
func (o *YouTube) tryMethodYtDlpWithTimestamps(videoId string, language string, additionalArgs string) (ret string, err error) {
|
||||
return o.tryMethodYtDlpInternal(videoId, language, additionalArgs, o.readAndFormatVTTWithTimestamps)
|
||||
}
|
||||
|
||||
func (o *YouTube) readAndCleanVTTFile(filename string) (ret string, err error) {
|
||||
@@ -303,8 +388,7 @@ func isTimeStamp(s string) bool {
|
||||
|
||||
func removeVTTTags(s string) string {
|
||||
// Remove VTT tags like <c.colorE5E5E5>, </c>, etc.
|
||||
tagRegex := regexp.MustCompile(`<[^>]*>`)
|
||||
return tagRegex.ReplaceAllString(s, "")
|
||||
return vttTagRegex.ReplaceAllString(s, "")
|
||||
}
|
||||
|
||||
// shouldIncludeRepeat determines if repeated content should be included based on time gap
|
||||
@@ -428,7 +512,7 @@ func (o *YouTube) GrabDuration(videoId string) (ret int, err error) {
|
||||
|
||||
durationStr := videoResponse.Items[0].ContentDetails.Duration
|
||||
|
||||
matches := regexp.MustCompile(`(?i)PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?`).FindStringSubmatch(durationStr)
|
||||
matches := durationRegex.FindStringSubmatch(durationStr)
|
||||
if len(matches) == 0 {
|
||||
return 0, fmt.Errorf("invalid duration string: %s", durationStr)
|
||||
}
|
||||
@@ -588,8 +672,9 @@ func (o *YouTube) normalizeFileName(name string) string {
|
||||
|
||||
}
|
||||
|
||||
// findVTTFiles searches for VTT files in a directory using cross-platform approach
|
||||
func (o *YouTube) findVTTFiles(dir, language string) ([]string, error) {
|
||||
// findVTTFilesWithFallback searches for VTT files, handling fallback scenarios
|
||||
// where the requested language might not be available
|
||||
func (o *YouTube) findVTTFilesWithFallback(dir, requestedLanguage string) ([]string, error) {
|
||||
var vttFiles []string
|
||||
|
||||
// Walk through the directory to find VTT files
|
||||
@@ -612,14 +697,28 @@ func (o *YouTube) findVTTFiles(dir, language string) ([]string, error) {
|
||||
return nil, fmt.Errorf("no VTT files found in directory")
|
||||
}
|
||||
|
||||
// Prefer files with the specified language
|
||||
// If no specific language requested, return the first file
|
||||
if requestedLanguage == "" {
|
||||
return []string{vttFiles[0]}, nil
|
||||
}
|
||||
|
||||
// First, try to find files with the requested language
|
||||
for _, file := range vttFiles {
|
||||
if strings.Contains(file, "."+language+".vtt") {
|
||||
if strings.Contains(file, "."+requestedLanguage+".vtt") {
|
||||
return []string{file}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Return the first VTT file found if no language-specific file exists
|
||||
// If requested language not found, check if we have any language-specific files
|
||||
// This handles the fallback case where yt-dlp downloaded a different language
|
||||
for _, file := range vttFiles {
|
||||
// Look for any language pattern (e.g., .en.vtt, .es.vtt, etc.)
|
||||
if languageFileRegex.MatchString(file) {
|
||||
return []string{file}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If no language-specific files found, return the first VTT file
|
||||
return []string{vttFiles[0]}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -199,6 +199,9 @@ schema = 3
|
||||
[mod."github.com/json-iterator/go"]
|
||||
version = "v1.1.12"
|
||||
hash = "sha256-To8A0h+lbfZ/6zM+2PpRpY3+L6725OPC66lffq6fUoM="
|
||||
[mod."github.com/kballard/go-shellquote"]
|
||||
version = "v0.0.0-20180428030007-95032a82bc51"
|
||||
hash = "sha256-AOEdKETBMUC39ln6jBJ9NYdJWp++jV5lSbjNqG3dV+c="
|
||||
[mod."github.com/kevinburke/ssh_config"]
|
||||
version = "v1.2.0"
|
||||
hash = "sha256-Ta7ZOmyX8gG5tzWbY2oES70EJPfI90U7CIJS9EAce0s="
|
||||
|
||||
@@ -1 +1 @@
|
||||
"1.4.277"
|
||||
"1.4.280"
|
||||
|
||||
Reference in New Issue
Block a user