get correct index for samples (#1336)

This commit is contained in:
Mahmoud Ashraf
2025-08-06 03:17:45 +03:00
committed by GitHub
parent d3bfd0a305
commit fbeb1ba731
2 changed files with 7 additions and 3 deletions

View File

@@ -1832,7 +1832,7 @@ def restore_speech_timestamps(
else: else:
segment.start = ts_map.get_original_time(segment.start) segment.start = ts_map.get_original_time(segment.start)
segment.end = ts_map.get_original_time(segment.end) segment.end = ts_map.get_original_time(segment.end, is_end=True)
yield segment yield segment

View File

@@ -229,15 +229,19 @@ class SpeechTimestampsMap:
self, self,
time: float, time: float,
chunk_index: Optional[int] = None, chunk_index: Optional[int] = None,
is_end: bool = False,
) -> float: ) -> float:
if chunk_index is None: if chunk_index is None:
chunk_index = self.get_chunk_index(time) chunk_index = self.get_chunk_index(time, is_end)
total_silence_before = self.total_silence_before[chunk_index] total_silence_before = self.total_silence_before[chunk_index]
return round(total_silence_before + time, self.time_precision) return round(total_silence_before + time, self.time_precision)
def get_chunk_index(self, time: float) -> int: def get_chunk_index(self, time: float, is_end: bool = False) -> int:
sample = int(time * self.sampling_rate) sample = int(time * self.sampling_rate)
if sample in self.chunk_end_sample and is_end:
return self.chunk_end_sample.index(sample)
return min( return min(
bisect.bisect(self.chunk_end_sample, sample), bisect.bisect(self.chunk_end_sample, sample),
len(self.chunk_end_sample) - 1, len(self.chunk_end_sample) - 1,