mirror of
https://github.com/SYSTRAN/faster-whisper.git
synced 2026-01-08 13:14:00 -05:00
fix: Use correct seek value in output, fix word timestamps when the initial timestamp is not zero (#1141)
Co-authored-by: Mahmoud Ashraf <hassouna97.ma@gmail.com>
This commit is contained in:
@@ -174,6 +174,9 @@ class BatchedInferencePipeline:
|
||||
compression_ratio=get_compression_ratio(
|
||||
self.tokenizer.decode(subsegment["tokens"])
|
||||
),
|
||||
seek=int(
|
||||
chunk_metadata["start_time"] * self.model.frames_per_second
|
||||
),
|
||||
)
|
||||
for subsegment in subsegments
|
||||
]
|
||||
@@ -496,7 +499,7 @@ class BatchedInferencePipeline:
|
||||
for segment in result:
|
||||
seg_idx += 1
|
||||
yield Segment(
|
||||
seek=int(result[-1]["end"] * self.model.frames_per_second),
|
||||
seek=segment["seek"],
|
||||
id=seg_idx,
|
||||
text=segment["text"],
|
||||
start=round(segment["start"], 3),
|
||||
@@ -1318,7 +1321,7 @@ class WhisperModel:
|
||||
|
||||
yield Segment(
|
||||
id=idx,
|
||||
seek=seek,
|
||||
seek=previous_seek,
|
||||
start=segment["start"],
|
||||
end=segment["end"],
|
||||
text=text,
|
||||
@@ -1585,7 +1588,7 @@ class WhisperModel:
|
||||
|
||||
for segment_idx, segment in enumerate(segments):
|
||||
word_index = 0
|
||||
time_offset = segment[0]["start"]
|
||||
time_offset = segment[0]["seek"] / self.frames_per_second
|
||||
median_duration, max_duration = median_max_durations[segment_idx]
|
||||
for subsegment_idx, subsegment in enumerate(segment):
|
||||
saved_tokens = 0
|
||||
|
||||
Reference in New Issue
Block a user