mirror of
https://github.com/SYSTRAN/faster-whisper.git
synced 2026-01-09 21:48:08 -05:00
fix: Use correct seek value in output, fix word timestamps when the initial timestamp is not zero (#1141)
Co-authored-by: Mahmoud Ashraf <hassouna97.ma@gmail.com>
This commit is contained in:
@@ -174,6 +174,9 @@ class BatchedInferencePipeline:
|
|||||||
compression_ratio=get_compression_ratio(
|
compression_ratio=get_compression_ratio(
|
||||||
self.tokenizer.decode(subsegment["tokens"])
|
self.tokenizer.decode(subsegment["tokens"])
|
||||||
),
|
),
|
||||||
|
seek=int(
|
||||||
|
chunk_metadata["start_time"] * self.model.frames_per_second
|
||||||
|
),
|
||||||
)
|
)
|
||||||
for subsegment in subsegments
|
for subsegment in subsegments
|
||||||
]
|
]
|
||||||
@@ -496,7 +499,7 @@ class BatchedInferencePipeline:
|
|||||||
for segment in result:
|
for segment in result:
|
||||||
seg_idx += 1
|
seg_idx += 1
|
||||||
yield Segment(
|
yield Segment(
|
||||||
seek=int(result[-1]["end"] * self.model.frames_per_second),
|
seek=segment["seek"],
|
||||||
id=seg_idx,
|
id=seg_idx,
|
||||||
text=segment["text"],
|
text=segment["text"],
|
||||||
start=round(segment["start"], 3),
|
start=round(segment["start"], 3),
|
||||||
@@ -1318,7 +1321,7 @@ class WhisperModel:
|
|||||||
|
|
||||||
yield Segment(
|
yield Segment(
|
||||||
id=idx,
|
id=idx,
|
||||||
seek=seek,
|
seek=previous_seek,
|
||||||
start=segment["start"],
|
start=segment["start"],
|
||||||
end=segment["end"],
|
end=segment["end"],
|
||||||
text=text,
|
text=text,
|
||||||
@@ -1585,7 +1588,7 @@ class WhisperModel:
|
|||||||
|
|
||||||
for segment_idx, segment in enumerate(segments):
|
for segment_idx, segment in enumerate(segments):
|
||||||
word_index = 0
|
word_index = 0
|
||||||
time_offset = segment[0]["start"]
|
time_offset = segment[0]["seek"] / self.frames_per_second
|
||||||
median_duration, max_duration = median_max_durations[segment_idx]
|
median_duration, max_duration = median_max_durations[segment_idx]
|
||||||
for subsegment_idx, subsegment in enumerate(segment):
|
for subsegment_idx, subsegment in enumerate(segment):
|
||||||
saved_tokens = 0
|
saved_tokens = 0
|
||||||
|
|||||||
Reference in New Issue
Block a user