Merge pull request #1915 from majiayu000/fix-1842-feature-request-parallelize-au-0101-2335

feat: parallelize audio chunk transcription for improved performance
This commit is contained in:
Kayvan Sylvan
2026-01-04 13:04:56 -08:00
committed by GitHub
2 changed files with 56 additions and 16 deletions

View File

@@ -0,0 +1,3 @@
### PR [#1915](https://github.com/danielmiessler/Fabric/pull/1915) by [majiayu000](https://github.com/majiayu000): feat: parallelize audio chunk transcription for improved performance
- Parallelize audio chunk transcription using goroutines for improved performance

View File

@@ -10,12 +10,20 @@ import (
"slices"
"sort"
"strings"
"sync"
debuglog "github.com/danielmiessler/fabric/internal/log"
openai "github.com/openai/openai-go"
)
// transcriptionResult holds the result of a single chunk transcription.
type transcriptionResult struct {
index int
text string
err error
}
// MaxAudioFileSize defines the maximum allowed size for audio uploads (25MB).
const MaxAudioFileSize int64 = 25 * 1024 * 1024
@@ -73,27 +81,56 @@ func (o *Client) TranscribeFile(ctx context.Context, filePath, model string, spl
files = []string{filePath}
}
var builder strings.Builder
resultsChan := make(chan transcriptionResult, len(files))
var wg sync.WaitGroup
for i, f := range files {
debuglog.Log("Using model %s to transcribe part %d (file name: %s)...\n", model, i+1, f)
var chunk *os.File
if chunk, err = os.Open(f); err != nil {
return "", err
}
params := openai.AudioTranscriptionNewParams{
File: chunk,
Model: openai.AudioModel(model),
}
var resp *openai.Transcription
resp, err = o.ApiClient.Audio.Transcriptions.New(ctx, params)
chunk.Close()
if err != nil {
return "", err
wg.Add(1)
go func(index int, filePath string) {
defer wg.Done()
debuglog.Log("Using model %s to transcribe part %d (file name: %s)...\n", model, index+1, filePath)
chunk, openErr := os.Open(filePath)
if openErr != nil {
resultsChan <- transcriptionResult{index: index, err: openErr}
return
}
defer chunk.Close()
params := openai.AudioTranscriptionNewParams{
File: chunk,
Model: openai.AudioModel(model),
}
resp, transcribeErr := o.ApiClient.Audio.Transcriptions.New(ctx, params)
if transcribeErr != nil {
resultsChan <- transcriptionResult{index: index, err: transcribeErr}
return
}
resultsChan <- transcriptionResult{index: index, text: resp.Text}
}(i, f)
}
wg.Wait()
close(resultsChan)
results := make([]transcriptionResult, 0, len(files))
for result := range resultsChan {
if result.err != nil {
return "", result.err
}
results = append(results, result)
}
sort.Slice(results, func(i, j int) bool {
return results[i].index < results[j].index
})
var builder strings.Builder
for i, result := range results {
if i > 0 {
builder.WriteString(" ")
}
builder.WriteString(resp.Text)
builder.WriteString(result.text)
}
return builder.String(), nil