get correct index for samples (#1336)

This commit is contained in:
Mahmoud Ashraf
2025-08-06 03:17:45 +03:00
committed by GitHub
parent d3bfd0a305
commit fbeb1ba731
2 changed files with 7 additions and 3 deletions

View File

@@ -1832,7 +1832,7 @@ def restore_speech_timestamps(
else:
segment.start = ts_map.get_original_time(segment.start)
segment.end = ts_map.get_original_time(segment.end)
segment.end = ts_map.get_original_time(segment.end, is_end=True)
yield segment

View File

@@ -229,15 +229,19 @@ class SpeechTimestampsMap:
self,
time: float,
chunk_index: Optional[int] = None,
is_end: bool = False,
) -> float:
if chunk_index is None:
chunk_index = self.get_chunk_index(time)
chunk_index = self.get_chunk_index(time, is_end)
total_silence_before = self.total_silence_before[chunk_index]
return round(total_silence_before + time, self.time_precision)
def get_chunk_index(self, time: float) -> int:
def get_chunk_index(self, time: float, is_end: bool = False) -> int:
sample = int(time * self.sampling_rate)
if sample in self.chunk_end_sample and is_end:
return self.chunk_end_sample.index(sample)
return min(
bisect.bisect(self.chunk_end_sample, sample),
len(self.chunk_end_sample) - 1,