Compare commits

...

17 Commits

Author SHA1 Message Date
github-actions[bot]
29cb3796bf Update version to v1.4.261 and commit 2025-07-19 14:20:50 +00:00
Kayvan Sylvan
f51f9e75a9 Merge pull request #1637 from ksylvan/0719-add-mistral-to-list-of-raw-mode-models
chore: update `NeedsRawMode` to include `mistral` prefix for Ollama
2025-07-19 07:19:13 -07:00
Kayvan Sylvan
63475784c7 chore: update NeedsRawMode to include mistral prefix
### CHANGES

- Add `mistral` to `ollamaPrefixes` list.
2025-07-19 07:13:23 -07:00
Kayvan Sylvan
1a7bb27370 docs: Update CHANGELOG after v1.4.260 2025-07-18 13:01:15 -07:00
github-actions[bot]
4badaa4c85 Update version to v1.4.260 and commit 2025-07-18 19:57:27 +00:00
Kayvan Sylvan
bf6be964fd Merge pull request #1634 from ksylvan/0718-fix-exo-labs-client
Fix abort in Exo-Labs provider plugin; with credit to @sakithahSenid
2025-07-18 12:55:58 -07:00
Kayvan Sylvan
cdbcb0a512 chore: add API key setup question to Exolab AI plugin configuration
## CHANGES

- Add "openaiapi" to VSCode spell check dictionary
- Include API key setup question in Exolab client
- Configure API key as required field for setup
- Maintain existing API base URL configuration order
2025-07-18 12:40:55 -07:00
Kayvan Sylvan
f81cf193a2 docs: Update CHANGELOG after v1.4.259 2025-07-18 12:01:19 -07:00
github-actions[bot]
cba56fcde6 Update version to v1.4.259 and commit 2025-07-18 18:57:13 +00:00
Kayvan Sylvan
72cbd13917 Merge pull request #1633 from ksylvan/0718-youtube-vtt-transcript-duplication-bug
YouTube VTT Processing Enhancement
2025-07-18 11:55:44 -07:00
Kayvan Sylvan
dc722f9724 feat: improve timestamp parsing to handle fractional seconds in YouTube tool
## CHANGES

- Move timestamp regex initialization to init function
- Add parseSeconds helper function for fractional seconds
- Replace direct strconv.Atoi calls with parseSeconds function
- Support decimal seconds in timestamp format parsing
- Extract seconds parsing logic into reusable function
2025-07-18 11:53:28 -07:00
Kayvan Sylvan
1a35f32a48 fix: Youtube VTT parsing gap test 2025-07-18 11:27:23 -07:00
Kayvan Sylvan
65bd2753c2 feat: enhance VTT duplicate filtering to allow legitimate repeated content
## CHANGES

- Fix regex escape sequence for timestamp parsing
- Add configurable time gap constant for repeat detection
- Track content with timestamps instead of simple deduplication
- Implement time-based repeat inclusion logic for choruses
- Add timestamp parsing helper functions for calculations
- Allow repeated content after significant time gaps
- Preserve legitimate recurring phrases while filtering duplicates
2025-07-18 11:08:37 -07:00
Kayvan Sylvan
570c9a9404 chore: refactor timestamp regex and seenSegments logic
### CHANGES

- Update `timestampRegex` to support optional seconds/milliseconds
- Change `seenSegments` to use `struct{}` for memory efficiency
- Refactor duplicate check using `struct{}` pattern
- Improve readability by restructuring timestamp logic
2025-07-18 09:44:31 -07:00
Kayvan Sylvan
15151fe9ee chore: refactor timestamp regex to global scope and add spell check words
## CHANGES

- Move timestamp regex to global package scope
- Remove duplicate regex compilation from isTimeStamp function
- Add "horts", "mbed", "WEBVTT", "youtu" to spell checker
- Improve regex performance by avoiding repeated compilation
- Clean up code organization in YouTube module
2025-07-18 09:33:46 -07:00
Kayvan Sylvan
2aad4caf9b fix: prevent duplicate segments in VTT file processing
- Add deduplication map to track seen segments
- Skip duplicate text segments in plain VTT processing
- Skip duplicate segments in timestamped VTT processing
- Improve timestamp regex to handle more formats
- Use clean text as deduplication key consistently
2025-07-18 09:17:03 -07:00
Kayvan Sylvan
289fda8c74 docs: Update CHANGELOG after v1.4.258 2025-07-17 15:30:50 -07:00
9 changed files with 216 additions and 8 deletions

View File

@@ -53,6 +53,7 @@
"hasura",
"hormozi",
"Hormozi's",
"horts",
"HTMLURL",
"jaredmontoya",
"jessevdk",
@@ -74,10 +75,12 @@
"markmap",
"matplotlib",
"mattn",
"mbed",
"Miessler",
"nometa",
"numpy",
"ollama",
"openaiapi",
"opencode",
"openrouter",
"otiai",
@@ -114,11 +117,13 @@
"updatepatterns",
"videoid",
"webp",
"WEBVTT",
"wipecontext",
"wipesession",
"writeups",
"xclip",
"yourpatternname"
"yourpatternname",
"youtu"
],
"cSpell.ignorePaths": ["go.mod", ".gitignore", "CHANGELOG.md"],
"markdownlint.config": {

View File

@@ -1,5 +1,47 @@
# Changelog
## v1.4.260 (2025-07-18)
### PR [#1634](https://github.com/danielmiessler/Fabric/pull/1634) by [ksylvan](https://github.com/ksylvan): Fix abort in Exo-Labs provider plugin; with credit to @sakithahSenid
- Fix abort issue in Exo-Labs provider plugin
- Add API key setup question to Exolab AI plugin configuration
- Include API key setup question in Exolab client with required field validation
- Add "openaiapi" to VSCode spell check dictionary
- Maintain existing API base URL configuration order
### Direct commits
- Update CHANGELOG after v1.4.259
## v1.4.259 (2025-07-18)
### PR [#1633](https://github.com/danielmiessler/Fabric/pull/1633) by [ksylvan](https://github.com/ksylvan): YouTube VTT Processing Enhancement
- Fix: prevent duplicate segments in VTT file processing by adding deduplication map to track seen segments
- Feat: enhance VTT duplicate filtering to allow legitimate repeated content with configurable time gap detection
- Feat: improve timestamp parsing to handle fractional seconds and optional seconds/milliseconds formats
- Chore: refactor timestamp regex to global scope and improve performance by avoiding repeated compilation
- Fix: Youtube VTT parsing gap test and extract seconds parsing logic into reusable function
### Direct commits
- Docs: Update CHANGELOG after v1.4.258
## v1.4.258 (2025-07-17)
### PR [#1629](https://github.com/danielmiessler/Fabric/pull/1629) by [ksylvan](https://github.com/ksylvan): Create Default (empty) .env in ~/.config/fabric on Demand
- Add startup check to initialize config and .env file automatically
- Introduce ensureEnvFile function to create ~/.config/fabric/.env if missing
- Add directory creation for config path in ensureEnvFile
- Integrate setup flag in CLI to call ensureEnvFile on demand
- Improve error handling and permissions in ensureEnvFile function
### Direct commits
- Update README and CHANGELOG after v1.4.257
## v1.4.257 (2025-07-17)
### PR [#1628](https://github.com/danielmiessler/Fabric/pull/1628) by [ksylvan](https://github.com/ksylvan): Introduce CLI Flag to Disable OpenAI Responses API

View File

@@ -1,3 +1,3 @@
package main
var version = "v1.4.258"
var version = "v1.4.261"

Binary file not shown.

View File

@@ -13,6 +13,7 @@ func NewClient() (ret *Client) {
ret = &Client{}
ret.Client = openai.NewClientCompatibleNoSetupQuestions("Exolab", ret.configure)
ret.ApiKey = ret.AddSetupQuestion("API Key", false)
ret.ApiBaseURL = ret.AddSetupQuestion("API Base URL", true)
ret.ApiBaseURL.Value = "http://localhost:52415"

View File

@@ -160,6 +160,7 @@ func (o *Client) NeedsRawMode(modelName string) bool {
ollamaPrefixes := []string{
"llama3",
"llama2",
"mistral",
}
for _, prefix := range ollamaPrefixes {
if strings.HasPrefix(modelName, prefix) {

View File

@@ -0,0 +1,61 @@
package youtube
import (
"testing"
)
func TestParseTimestampToSeconds(t *testing.T) {
tests := []struct {
timestamp string
expected int
shouldErr bool
}{
{"00:30", 30, false},
{"01:30", 90, false},
{"01:05:30", 3930, false}, // 1 hour 5 minutes 30 seconds
{"10:00", 600, false},
{"invalid", 0, true},
{"1:2:3:4", 0, true}, // too many parts
}
for _, test := range tests {
result, err := parseTimestampToSeconds(test.timestamp)
if test.shouldErr {
if err == nil {
t.Errorf("Expected error for timestamp %s, but got none", test.timestamp)
}
} else {
if err != nil {
t.Errorf("Unexpected error for timestamp %s: %v", test.timestamp, err)
}
if result != test.expected {
t.Errorf("For timestamp %s, expected %d seconds, got %d", test.timestamp, test.expected, result)
}
}
}
}
func TestShouldIncludeRepeat(t *testing.T) {
tests := []struct {
lastTimestamp string
currentTimestamp string
expected bool
description string
}{
{"00:30", "01:30", true, "60 second gap should allow repeat"},
{"00:30", "00:45", true, "15 second gap should allow repeat"},
{"01:00", "01:10", true, "10 second gap should allow repeat (boundary case)"},
{"01:00", "01:09", false, "9 second gap should not allow repeat"},
{"00:30", "00:35", false, "5 second gap should not allow repeat"},
{"invalid", "01:30", true, "invalid timestamp should err on side of inclusion"},
{"01:30", "invalid", true, "invalid timestamp should err on side of inclusion"},
}
for _, test := range tests {
result := shouldIncludeRepeat(test.lastTimestamp, test.currentTimestamp)
if result != test.expected {
t.Errorf("%s: expected %v, got %v", test.description, test.expected, result)
}
}
}

View File

@@ -29,6 +29,15 @@ import (
"google.golang.org/api/youtube/v3"
)
var timestampRegex *regexp.Regexp
const TimeGapForRepeats = 10 // seconds
func init() {
// Match timestamps like "00:00:01.234" or just numbers or sequence numbers
timestampRegex = regexp.MustCompile(`^\d+$|^\d{1,2}:\d{2}(:\d{2})?(\.\d{3})?$`)
}
func NewYouTube() (ret *YouTube) {
label := "YouTube"
@@ -180,6 +189,7 @@ func (o *YouTube) readAndCleanVTTFile(filename string) (ret string, err error) {
// Convert VTT to plain text
lines := strings.Split(string(content), "\n")
var textBuilder strings.Builder
seenSegments := make(map[string]struct{})
for _, line := range lines {
line = strings.TrimSpace(line)
@@ -193,8 +203,11 @@ func (o *YouTube) readAndCleanVTTFile(filename string) (ret string, err error) {
// Remove VTT formatting tags
line = removeVTTTags(line)
if line != "" {
textBuilder.WriteString(line)
textBuilder.WriteString(" ")
if _, exists := seenSegments[line]; !exists {
textBuilder.WriteString(line)
textBuilder.WriteString(" ")
seenSegments[line] = struct{}{}
}
}
}
@@ -215,6 +228,10 @@ func (o *YouTube) readAndFormatVTTWithTimestamps(filename string) (ret string, e
lines := strings.Split(string(content), "\n")
var textBuilder strings.Builder
var currentTimestamp string
// Track content with timestamps to allow repeats after significant time gaps
// This preserves legitimate repeated content (choruses, recurring phrases, etc.)
// while still filtering out immediate duplicates from VTT formatting issues
seenSegments := make(map[string]string) // text -> last timestamp seen
for _, line := range lines {
line = strings.TrimSpace(line)
@@ -246,7 +263,20 @@ func (o *YouTube) readAndFormatVTTWithTimestamps(filename string) (ret string, e
// Remove VTT formatting tags
cleanText := removeVTTTags(line)
if cleanText != "" && currentTimestamp != "" {
textBuilder.WriteString(fmt.Sprintf("[%s] %s\n", currentTimestamp, cleanText))
// Check if we should include this segment
shouldInclude := true
if lastTimestamp, exists := seenSegments[cleanText]; exists {
// Calculate time difference to determine if this is a legitimate repeat
if !shouldIncludeRepeat(lastTimestamp, currentTimestamp) {
shouldInclude = false
}
}
if shouldInclude {
timestampedLine := fmt.Sprintf("[%s] %s", currentTimestamp, cleanText)
textBuilder.WriteString(timestampedLine + "\n")
seenSegments[cleanText] = currentTimestamp
}
}
}
}
@@ -268,8 +298,6 @@ func formatVTTTimestamp(vttTime string) string {
}
func isTimeStamp(s string) bool {
// Match timestamps like "00:00:01.234" or just numbers
timestampRegex := regexp.MustCompile(`^\d+$|^\d{2}:\d{2}:\d{2}`)
return timestampRegex.MatchString(s)
}
@@ -279,6 +307,76 @@ func removeVTTTags(s string) string {
return tagRegex.ReplaceAllString(s, "")
}
// shouldIncludeRepeat determines if repeated content should be included based on time gap
func shouldIncludeRepeat(lastTimestamp, currentTimestamp string) bool {
// Parse timestamps to calculate time difference
lastSeconds, err1 := parseTimestampToSeconds(lastTimestamp)
currentSeconds, err2 := parseTimestampToSeconds(currentTimestamp)
if err1 != nil || err2 != nil {
// If we can't parse timestamps, err on the side of inclusion
return true
}
// Allow repeats if there's at least a TimeGapForRepeats gap
// This threshold can be adjusted based on use case:
// - 10 seconds works well for most content
// - Could be made configurable in the future
timeDiffSeconds := currentSeconds - lastSeconds
return timeDiffSeconds >= TimeGapForRepeats
}
// parseTimestampToSeconds converts timestamp string (HH:MM:SS or MM:SS) to total seconds
func parseTimestampToSeconds(timestamp string) (int, error) {
parts := strings.Split(timestamp, ":")
if len(parts) < 2 || len(parts) > 3 {
return 0, fmt.Errorf("invalid timestamp format: %s", timestamp)
}
var hours, minutes, seconds int
var err error
if len(parts) == 3 {
// HH:MM:SS format
if hours, err = strconv.Atoi(parts[0]); err != nil {
return 0, err
}
if minutes, err = strconv.Atoi(parts[1]); err != nil {
return 0, err
}
if seconds, err = parseSeconds(parts[2]); err != nil {
return 0, err
}
} else {
// MM:SS format
if minutes, err = strconv.Atoi(parts[0]); err != nil {
return 0, err
}
if seconds, err = parseSeconds(parts[1]); err != nil {
return 0, err
}
}
return hours*3600 + minutes*60 + seconds, nil
}
func parseSeconds(seconds_str string) (int, error) {
var seconds int
var err error
if strings.Contains(seconds_str, ".") {
// Handle fractional seconds
second_parts := strings.Split(seconds_str, ".")
if seconds, err = strconv.Atoi(second_parts[0]); err != nil {
return 0, err
}
} else {
if seconds, err = strconv.Atoi(seconds_str); err != nil {
return 0, err
}
}
return seconds, nil
}
func (o *YouTube) GrabComments(videoId string) (ret []string, err error) {
if err = o.initService(); err != nil {
return

View File

@@ -1 +1 @@
"1.4.258"
"1.4.261"