mirror of
https://github.com/danielmiessler/Fabric.git
synced 2026-01-09 22:38:10 -05:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
29cb3796bf | ||
|
|
f51f9e75a9 | ||
|
|
63475784c7 | ||
|
|
1a7bb27370 | ||
|
|
4badaa4c85 | ||
|
|
bf6be964fd | ||
|
|
cdbcb0a512 | ||
|
|
f81cf193a2 | ||
|
|
cba56fcde6 | ||
|
|
72cbd13917 | ||
|
|
dc722f9724 | ||
|
|
1a35f32a48 | ||
|
|
65bd2753c2 | ||
|
|
570c9a9404 | ||
|
|
15151fe9ee | ||
|
|
2aad4caf9b | ||
|
|
289fda8c74 |
7
.vscode/settings.json
vendored
7
.vscode/settings.json
vendored
@@ -53,6 +53,7 @@
|
||||
"hasura",
|
||||
"hormozi",
|
||||
"Hormozi's",
|
||||
"horts",
|
||||
"HTMLURL",
|
||||
"jaredmontoya",
|
||||
"jessevdk",
|
||||
@@ -74,10 +75,12 @@
|
||||
"markmap",
|
||||
"matplotlib",
|
||||
"mattn",
|
||||
"mbed",
|
||||
"Miessler",
|
||||
"nometa",
|
||||
"numpy",
|
||||
"ollama",
|
||||
"openaiapi",
|
||||
"opencode",
|
||||
"openrouter",
|
||||
"otiai",
|
||||
@@ -114,11 +117,13 @@
|
||||
"updatepatterns",
|
||||
"videoid",
|
||||
"webp",
|
||||
"WEBVTT",
|
||||
"wipecontext",
|
||||
"wipesession",
|
||||
"writeups",
|
||||
"xclip",
|
||||
"yourpatternname"
|
||||
"yourpatternname",
|
||||
"youtu"
|
||||
],
|
||||
"cSpell.ignorePaths": ["go.mod", ".gitignore", "CHANGELOG.md"],
|
||||
"markdownlint.config": {
|
||||
|
||||
42
CHANGELOG.md
42
CHANGELOG.md
@@ -1,5 +1,47 @@
|
||||
# Changelog
|
||||
|
||||
## v1.4.260 (2025-07-18)
|
||||
|
||||
### PR [#1634](https://github.com/danielmiessler/Fabric/pull/1634) by [ksylvan](https://github.com/ksylvan): Fix abort in Exo-Labs provider plugin; with credit to @sakithahSenid
|
||||
|
||||
- Fix abort issue in Exo-Labs provider plugin
|
||||
- Add API key setup question to Exolab AI plugin configuration
|
||||
- Include API key setup question in Exolab client with required field validation
|
||||
- Add "openaiapi" to VSCode spell check dictionary
|
||||
- Maintain existing API base URL configuration order
|
||||
|
||||
### Direct commits
|
||||
|
||||
- Update CHANGELOG after v1.4.259
|
||||
|
||||
## v1.4.259 (2025-07-18)
|
||||
|
||||
### PR [#1633](https://github.com/danielmiessler/Fabric/pull/1633) by [ksylvan](https://github.com/ksylvan): YouTube VTT Processing Enhancement
|
||||
|
||||
- Fix: prevent duplicate segments in VTT file processing by adding deduplication map to track seen segments
|
||||
- Feat: enhance VTT duplicate filtering to allow legitimate repeated content with configurable time gap detection
|
||||
- Feat: improve timestamp parsing to handle fractional seconds and optional seconds/milliseconds formats
|
||||
- Chore: refactor timestamp regex to global scope and improve performance by avoiding repeated compilation
|
||||
- Fix: Youtube VTT parsing gap test and extract seconds parsing logic into reusable function
|
||||
|
||||
### Direct commits
|
||||
|
||||
- Docs: Update CHANGELOG after v1.4.258
|
||||
|
||||
## v1.4.258 (2025-07-17)
|
||||
|
||||
### PR [#1629](https://github.com/danielmiessler/Fabric/pull/1629) by [ksylvan](https://github.com/ksylvan): Create Default (empty) .env in ~/.config/fabric on Demand
|
||||
|
||||
- Add startup check to initialize config and .env file automatically
|
||||
- Introduce ensureEnvFile function to create ~/.config/fabric/.env if missing
|
||||
- Add directory creation for config path in ensureEnvFile
|
||||
- Integrate setup flag in CLI to call ensureEnvFile on demand
|
||||
- Improve error handling and permissions in ensureEnvFile function
|
||||
|
||||
### Direct commits
|
||||
|
||||
- Update README and CHANGELOG after v1.4.257
|
||||
|
||||
## v1.4.257 (2025-07-17)
|
||||
|
||||
### PR [#1628](https://github.com/danielmiessler/Fabric/pull/1628) by [ksylvan](https://github.com/ksylvan): Introduce CLI Flag to Disable OpenAI Responses API
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
package main
|
||||
|
||||
var version = "v1.4.258"
|
||||
var version = "v1.4.261"
|
||||
|
||||
Binary file not shown.
@@ -13,6 +13,7 @@ func NewClient() (ret *Client) {
|
||||
ret = &Client{}
|
||||
ret.Client = openai.NewClientCompatibleNoSetupQuestions("Exolab", ret.configure)
|
||||
|
||||
ret.ApiKey = ret.AddSetupQuestion("API Key", false)
|
||||
ret.ApiBaseURL = ret.AddSetupQuestion("API Base URL", true)
|
||||
ret.ApiBaseURL.Value = "http://localhost:52415"
|
||||
|
||||
|
||||
@@ -160,6 +160,7 @@ func (o *Client) NeedsRawMode(modelName string) bool {
|
||||
ollamaPrefixes := []string{
|
||||
"llama3",
|
||||
"llama2",
|
||||
"mistral",
|
||||
}
|
||||
for _, prefix := range ollamaPrefixes {
|
||||
if strings.HasPrefix(modelName, prefix) {
|
||||
|
||||
61
internal/tools/youtube/timestamp_test.go
Normal file
61
internal/tools/youtube/timestamp_test.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package youtube
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseTimestampToSeconds(t *testing.T) {
|
||||
tests := []struct {
|
||||
timestamp string
|
||||
expected int
|
||||
shouldErr bool
|
||||
}{
|
||||
{"00:30", 30, false},
|
||||
{"01:30", 90, false},
|
||||
{"01:05:30", 3930, false}, // 1 hour 5 minutes 30 seconds
|
||||
{"10:00", 600, false},
|
||||
{"invalid", 0, true},
|
||||
{"1:2:3:4", 0, true}, // too many parts
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
result, err := parseTimestampToSeconds(test.timestamp)
|
||||
|
||||
if test.shouldErr {
|
||||
if err == nil {
|
||||
t.Errorf("Expected error for timestamp %s, but got none", test.timestamp)
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error for timestamp %s: %v", test.timestamp, err)
|
||||
}
|
||||
if result != test.expected {
|
||||
t.Errorf("For timestamp %s, expected %d seconds, got %d", test.timestamp, test.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldIncludeRepeat(t *testing.T) {
|
||||
tests := []struct {
|
||||
lastTimestamp string
|
||||
currentTimestamp string
|
||||
expected bool
|
||||
description string
|
||||
}{
|
||||
{"00:30", "01:30", true, "60 second gap should allow repeat"},
|
||||
{"00:30", "00:45", true, "15 second gap should allow repeat"},
|
||||
{"01:00", "01:10", true, "10 second gap should allow repeat (boundary case)"},
|
||||
{"01:00", "01:09", false, "9 second gap should not allow repeat"},
|
||||
{"00:30", "00:35", false, "5 second gap should not allow repeat"},
|
||||
{"invalid", "01:30", true, "invalid timestamp should err on side of inclusion"},
|
||||
{"01:30", "invalid", true, "invalid timestamp should err on side of inclusion"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
result := shouldIncludeRepeat(test.lastTimestamp, test.currentTimestamp)
|
||||
if result != test.expected {
|
||||
t.Errorf("%s: expected %v, got %v", test.description, test.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -29,6 +29,15 @@ import (
|
||||
"google.golang.org/api/youtube/v3"
|
||||
)
|
||||
|
||||
var timestampRegex *regexp.Regexp
|
||||
|
||||
const TimeGapForRepeats = 10 // seconds
|
||||
|
||||
func init() {
|
||||
// Match timestamps like "00:00:01.234" or just numbers or sequence numbers
|
||||
timestampRegex = regexp.MustCompile(`^\d+$|^\d{1,2}:\d{2}(:\d{2})?(\.\d{3})?$`)
|
||||
}
|
||||
|
||||
func NewYouTube() (ret *YouTube) {
|
||||
|
||||
label := "YouTube"
|
||||
@@ -180,6 +189,7 @@ func (o *YouTube) readAndCleanVTTFile(filename string) (ret string, err error) {
|
||||
// Convert VTT to plain text
|
||||
lines := strings.Split(string(content), "\n")
|
||||
var textBuilder strings.Builder
|
||||
seenSegments := make(map[string]struct{})
|
||||
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
@@ -193,8 +203,11 @@ func (o *YouTube) readAndCleanVTTFile(filename string) (ret string, err error) {
|
||||
// Remove VTT formatting tags
|
||||
line = removeVTTTags(line)
|
||||
if line != "" {
|
||||
textBuilder.WriteString(line)
|
||||
textBuilder.WriteString(" ")
|
||||
if _, exists := seenSegments[line]; !exists {
|
||||
textBuilder.WriteString(line)
|
||||
textBuilder.WriteString(" ")
|
||||
seenSegments[line] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,6 +228,10 @@ func (o *YouTube) readAndFormatVTTWithTimestamps(filename string) (ret string, e
|
||||
lines := strings.Split(string(content), "\n")
|
||||
var textBuilder strings.Builder
|
||||
var currentTimestamp string
|
||||
// Track content with timestamps to allow repeats after significant time gaps
|
||||
// This preserves legitimate repeated content (choruses, recurring phrases, etc.)
|
||||
// while still filtering out immediate duplicates from VTT formatting issues
|
||||
seenSegments := make(map[string]string) // text -> last timestamp seen
|
||||
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
@@ -246,7 +263,20 @@ func (o *YouTube) readAndFormatVTTWithTimestamps(filename string) (ret string, e
|
||||
// Remove VTT formatting tags
|
||||
cleanText := removeVTTTags(line)
|
||||
if cleanText != "" && currentTimestamp != "" {
|
||||
textBuilder.WriteString(fmt.Sprintf("[%s] %s\n", currentTimestamp, cleanText))
|
||||
// Check if we should include this segment
|
||||
shouldInclude := true
|
||||
if lastTimestamp, exists := seenSegments[cleanText]; exists {
|
||||
// Calculate time difference to determine if this is a legitimate repeat
|
||||
if !shouldIncludeRepeat(lastTimestamp, currentTimestamp) {
|
||||
shouldInclude = false
|
||||
}
|
||||
}
|
||||
|
||||
if shouldInclude {
|
||||
timestampedLine := fmt.Sprintf("[%s] %s", currentTimestamp, cleanText)
|
||||
textBuilder.WriteString(timestampedLine + "\n")
|
||||
seenSegments[cleanText] = currentTimestamp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -268,8 +298,6 @@ func formatVTTTimestamp(vttTime string) string {
|
||||
}
|
||||
|
||||
func isTimeStamp(s string) bool {
|
||||
// Match timestamps like "00:00:01.234" or just numbers
|
||||
timestampRegex := regexp.MustCompile(`^\d+$|^\d{2}:\d{2}:\d{2}`)
|
||||
return timestampRegex.MatchString(s)
|
||||
}
|
||||
|
||||
@@ -279,6 +307,76 @@ func removeVTTTags(s string) string {
|
||||
return tagRegex.ReplaceAllString(s, "")
|
||||
}
|
||||
|
||||
// shouldIncludeRepeat determines if repeated content should be included based on time gap
|
||||
func shouldIncludeRepeat(lastTimestamp, currentTimestamp string) bool {
|
||||
// Parse timestamps to calculate time difference
|
||||
lastSeconds, err1 := parseTimestampToSeconds(lastTimestamp)
|
||||
currentSeconds, err2 := parseTimestampToSeconds(currentTimestamp)
|
||||
|
||||
if err1 != nil || err2 != nil {
|
||||
// If we can't parse timestamps, err on the side of inclusion
|
||||
return true
|
||||
}
|
||||
|
||||
// Allow repeats if there's at least a TimeGapForRepeats gap
|
||||
// This threshold can be adjusted based on use case:
|
||||
// - 10 seconds works well for most content
|
||||
// - Could be made configurable in the future
|
||||
timeDiffSeconds := currentSeconds - lastSeconds
|
||||
return timeDiffSeconds >= TimeGapForRepeats
|
||||
}
|
||||
|
||||
// parseTimestampToSeconds converts timestamp string (HH:MM:SS or MM:SS) to total seconds
|
||||
func parseTimestampToSeconds(timestamp string) (int, error) {
|
||||
parts := strings.Split(timestamp, ":")
|
||||
if len(parts) < 2 || len(parts) > 3 {
|
||||
return 0, fmt.Errorf("invalid timestamp format: %s", timestamp)
|
||||
}
|
||||
|
||||
var hours, minutes, seconds int
|
||||
var err error
|
||||
|
||||
if len(parts) == 3 {
|
||||
// HH:MM:SS format
|
||||
if hours, err = strconv.Atoi(parts[0]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if minutes, err = strconv.Atoi(parts[1]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if seconds, err = parseSeconds(parts[2]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
} else {
|
||||
// MM:SS format
|
||||
if minutes, err = strconv.Atoi(parts[0]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if seconds, err = parseSeconds(parts[1]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return hours*3600 + minutes*60 + seconds, nil
|
||||
}
|
||||
|
||||
func parseSeconds(seconds_str string) (int, error) {
|
||||
var seconds int
|
||||
var err error
|
||||
if strings.Contains(seconds_str, ".") {
|
||||
// Handle fractional seconds
|
||||
second_parts := strings.Split(seconds_str, ".")
|
||||
if seconds, err = strconv.Atoi(second_parts[0]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
} else {
|
||||
if seconds, err = strconv.Atoi(seconds_str); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
return seconds, nil
|
||||
}
|
||||
|
||||
func (o *YouTube) GrabComments(videoId string) (ret []string, err error) {
|
||||
if err = o.initService(); err != nil {
|
||||
return
|
||||
|
||||
@@ -1 +1 @@
|
||||
"1.4.258"
|
||||
"1.4.261"
|
||||
|
||||
Reference in New Issue
Block a user