From f3a1982e30211da69fdbc289aa2c827cdc07b3bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20=C5=81uczak?= Date: Fri, 7 Feb 2025 15:25:22 +0100 Subject: [PATCH] Add the ability to grab YouTube video transcript with timestamps This commit adds the ability to grab the transcript of a YouTube video with timestamps. The timestamps are formatted as HH:MM:SS and are prepended to each line of the transcript. The feature is enabled by the new `--transcript-with-timestamps` flag, so it's similar to the existing `--transcript` flag. Example future use-case: Providing summary of a video that includes timestamps for quick navigation to specific parts of the video. --- cli/cli.go | 12 +++++-- cli/flags.go | 1 + plugins/tools/youtube/youtube.go | 58 +++++++++++++++++++++++++++++--- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/cli/cli.go b/cli/cli.go index 699ee762..7836231c 100644 --- a/cli/cli.go +++ b/cli/cli.go @@ -287,7 +287,7 @@ func Cli(version string) (err error) { func processYoutubeVideo( flags *Flags, registry *core.PluginRegistry, videoId string) (message string, err error) { - if (!flags.YouTubeComments && !flags.YouTubeMetadata) || flags.YouTubeTranscript { + if (!flags.YouTubeComments && !flags.YouTubeMetadata) || flags.YouTubeTranscript || flags.YouTubeTranscriptWithTimestamps { var transcript string var language = "en" if flags.Language != "" || registry.Language.DefaultLanguage.Value != "" { @@ -297,8 +297,14 @@ func processYoutubeVideo( language = registry.Language.DefaultLanguage.Value } } - if transcript, err = registry.YouTube.GrabTranscript(videoId, language); err != nil { - return + if flags.YouTubeTranscriptWithTimestamps { + if transcript, err = registry.YouTube.GrabTranscriptWithTimestamps(videoId, language); err != nil { + return + } + } else { + if transcript, err = registry.YouTube.GrabTranscript(videoId, language); err != nil { + return + } } message = AppendMessage(message, transcript) } diff --git a/cli/flags.go b/cli/flags.go index 2dafce6d..70426be6 100644 --- a/cli/flags.go +++ b/cli/flags.go @@ -48,6 +48,7 @@ type Flags struct { YouTube string `short:"y" long:"youtube" description:"YouTube video or play list \"URL\" to grab transcript, comments from it and send to chat or print it put to the console and store it in the output file"` YouTubePlaylist bool `long:"playlist" description:"Prefer playlist over video if both ids are present in the URL"` YouTubeTranscript bool `long:"transcript" description:"Grab transcript from YouTube video and send to chat (it is used per default)."` + YouTubeTranscriptWithTimestamps bool `long:"transcript-with-timestamps" description:"Grab transcript from YouTube video with timestamps and send to chat"` YouTubeComments bool `long:"comments" description:"Grab comments from YouTube video and send to chat"` YouTubeMetadata bool `long:"metadata" description:"Output video metadata"` Language string `short:"g" long:"language" description:"Specify the Language Code for the chat, e.g. -g=en -g=zh" default:""` diff --git a/plugins/tools/youtube/youtube.go b/plugins/tools/youtube/youtube.go index e1405b98..bb1a48e5 100644 --- a/plugins/tools/youtube/youtube.go +++ b/plugins/tools/youtube/youtube.go @@ -113,6 +113,45 @@ func (o *YouTube) GrabTranscript(videoId string, language string) (ret string, e return } +func (o *YouTube) GrabTranscriptWithTimestamps(videoId string, language string) (ret string, err error) { + var transcript string + if transcript, err = o.GrabTranscriptBase(videoId, language); err != nil { + err = fmt.Errorf("transcript not available. (%v)", err) + return + } + + // Parse the XML transcript + doc := soup.HTMLParse(transcript) + // Extract the text content from the tags + textTags := doc.FindAll("text") + var textBuilder strings.Builder + for _, textTag := range textTags { + // Extract the start and duration attributes + start := textTag.Attrs()["start"] + dur := textTag.Attrs()["dur"] + end := fmt.Sprintf("%f", parseFloat(start)+parseFloat(dur)) + // Format the timestamps + startFormatted := formatTimestamp(parseFloat(start)) + endFormatted := formatTimestamp(parseFloat(end)) + text := strings.ReplaceAll(textTag.Text(), "'", "'") + textBuilder.WriteString(fmt.Sprintf("[%s - %s] %s\n", startFormatted, endFormatted, text)) + } + ret = textBuilder.String() + return +} + +func parseFloat(s string) float64 { + f, _ := strconv.ParseFloat(s, 64) + return f +} + +func formatTimestamp(seconds float64) string { + hours := int(seconds) / 3600 + minutes := (int(seconds) % 3600) / 60 + secs := int(seconds) % 60 + return fmt.Sprintf("%02d:%02d:%02d", hours, minutes, secs) +} + func (o *YouTube) GrabTranscriptBase(videoId string, language string) (ret string, err error) { if err = o.initService(); err != nil { return @@ -265,6 +304,13 @@ func (o *YouTube) Grab(url string, options *Options) (ret *VideoInfo, err error) return } } + + if options.TranscriptWithTimestamps { + if ret.Transcript, err = o.GrabTranscriptWithTimestamps(videoId, "en"); err != nil { + return + } + } + return } @@ -372,11 +418,12 @@ type VideoMeta struct { } type Options struct { - Duration bool - Transcript bool - Comments bool - Lang string - Metadata bool + Duration bool + Transcript bool + TranscriptWithTimestamps bool + Comments bool + Lang string + Metadata bool } type VideoInfo struct { @@ -437,6 +484,7 @@ func (o *YouTube) GrabByFlags() (ret *VideoInfo, err error) { options := &Options{} flag.BoolVar(&options.Duration, "duration", false, "Output only the duration") flag.BoolVar(&options.Transcript, "transcript", false, "Output only the transcript") + flag.BoolVar(&options.TranscriptWithTimestamps, "transcriptWithTimestamps", false, "Output only the transcript with timestamps") flag.BoolVar(&options.Comments, "comments", false, "Output the comments on the video") flag.StringVar(&options.Lang, "lang", "en", "Language for the transcript (default: English)") flag.BoolVar(&options.Metadata, "metadata", false, "Output video metadata")