diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index cddd1da..6bd268d 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -1832,7 +1832,7 @@ def restore_speech_timestamps( else: segment.start = ts_map.get_original_time(segment.start) - segment.end = ts_map.get_original_time(segment.end) + segment.end = ts_map.get_original_time(segment.end, is_end=True) yield segment diff --git a/faster_whisper/vad.py b/faster_whisper/vad.py index 87e898c..7b6a4de 100644 --- a/faster_whisper/vad.py +++ b/faster_whisper/vad.py @@ -229,15 +229,19 @@ class SpeechTimestampsMap: self, time: float, chunk_index: Optional[int] = None, + is_end: bool = False, ) -> float: if chunk_index is None: - chunk_index = self.get_chunk_index(time) + chunk_index = self.get_chunk_index(time, is_end) total_silence_before = self.total_silence_before[chunk_index] return round(total_silence_before + time, self.time_precision) - def get_chunk_index(self, time: float) -> int: + def get_chunk_index(self, time: float, is_end: bool = False) -> int: sample = int(time * self.sampling_rate) + if sample in self.chunk_end_sample and is_end: + return self.chunk_end_sample.index(sample) + return min( bisect.bisect(self.chunk_end_sample, sample), len(self.chunk_end_sample) - 1,