fix: Use correct seek value in output, fix word timestamps when the initial timestamp is not zero (#1141)

Co-authored-by: Mahmoud Ashraf <hassouna97.ma@gmail.com>
2026-01-08 13:14:00 -05:00 · 2024-11-15 19:57:38 +08:00
parent 85e61ea111
commit 53bbe54016
1 changed files with 6 additions and 3 deletions
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -174,6 +174,9 @@ class BatchedInferencePipeline:
                        compression_ratio=get_compression_ratio(
                            self.tokenizer.decode(subsegment["tokens"])
                        ),
+                        seek=int(
+                            chunk_metadata["start_time"] * self.model.frames_per_second
+                        ),
                    )
                    for subsegment in subsegments
                ]
@@ -496,7 +499,7 @@ class BatchedInferencePipeline:
                for segment in result:
                    seg_idx += 1
                    yield Segment(
-                        seek=int(result[-1]["end"] * self.model.frames_per_second),
+                        seek=segment["seek"],
                        id=seg_idx,
                        text=segment["text"],
                        start=round(segment["start"], 3),
@@ -1318,7 +1321,7 @@ class WhisperModel:

                yield Segment(
                    id=idx,
-                    seek=seek,
+                    seek=previous_seek,
                    start=segment["start"],
                    end=segment["end"],
                    text=text,
@@ -1585,7 +1588,7 @@ class WhisperModel:

        for segment_idx, segment in enumerate(segments):
            word_index = 0
-            time_offset = segment[0]["start"]
+            time_offset = segment[0]["seek"] / self.frames_per_second
            median_duration, max_duration = median_max_durations[segment_idx]
            for subsegment_idx, subsegment in enumerate(segment):
                saved_tokens = 0