fix: Use correct seek value in output, fix word timestamps when the initial timestamp is not zero (#1141)

Co-authored-by: Mahmoud Ashraf <hassouna97.ma@gmail.com>
This commit is contained in:
黑墨水鱼
2024-11-15 19:57:38 +08:00
committed by GitHub
parent 85e61ea111
commit 53bbe54016

View File

@@ -174,6 +174,9 @@ class BatchedInferencePipeline:
compression_ratio=get_compression_ratio(
self.tokenizer.decode(subsegment["tokens"])
),
seek=int(
chunk_metadata["start_time"] * self.model.frames_per_second
),
)
for subsegment in subsegments
]
@@ -496,7 +499,7 @@ class BatchedInferencePipeline:
for segment in result:
seg_idx += 1
yield Segment(
seek=int(result[-1]["end"] * self.model.frames_per_second),
seek=segment["seek"],
id=seg_idx,
text=segment["text"],
start=round(segment["start"], 3),
@@ -1318,7 +1321,7 @@ class WhisperModel:
yield Segment(
id=idx,
seek=seek,
seek=previous_seek,
start=segment["start"],
end=segment["end"],
text=text,
@@ -1585,7 +1588,7 @@ class WhisperModel:
for segment_idx, segment in enumerate(segments):
word_index = 0
time_offset = segment[0]["start"]
time_offset = segment[0]["seek"] / self.frames_per_second
median_duration, max_duration = median_max_durations[segment_idx]
for subsegment_idx, subsegment in enumerate(segment):
saved_tokens = 0