mirror of
https://github.com/SYSTRAN/faster-whisper.git
synced 2026-01-08 13:14:00 -05:00
only merge when clip_timestamps are not provided (#1345)
fixes #1340 and allows for batching multiple audio files less than 30s each
This commit is contained in:
@@ -417,15 +417,27 @@ class BatchedInferencePipeline:
|
||||
"No clip timestamps found. "
|
||||
"Set 'vad_filter' to True or provide 'clip_timestamps'."
|
||||
)
|
||||
|
||||
audio_chunks, chunks_metadata = collect_chunks(
|
||||
audio, clip_timestamps, max_duration=chunk_length
|
||||
)
|
||||
|
||||
else:
|
||||
clip_timestamps = [
|
||||
{k: int(v * sampling_rate) for k, v in segment.items()}
|
||||
for segment in clip_timestamps
|
||||
]
|
||||
|
||||
audio_chunks, chunks_metadata = collect_chunks(
|
||||
audio, clip_timestamps, max_duration=chunk_length
|
||||
)
|
||||
audio_chunks, chunks_metadata = [], []
|
||||
for clip in clip_timestamps:
|
||||
audio_chunks.append(audio[clip["start"] : clip["end"]])
|
||||
chunks_metadata.append(
|
||||
{
|
||||
"offset": clip["start"] / sampling_rate,
|
||||
"duration": (clip["end"] - clip["start"]) / sampling_rate,
|
||||
"segments": [clip],
|
||||
}
|
||||
)
|
||||
|
||||
duration_after_vad = (
|
||||
sum((segment["end"] - segment["start"]) for segment in clip_timestamps)
|
||||
|
||||
@@ -269,3 +269,24 @@ def test_monotonic_timestamps(physcisworks_path):
|
||||
assert word.start <= word.end
|
||||
assert word.end <= segments[i].end
|
||||
assert segments[-1].end <= info.duration
|
||||
|
||||
|
||||
def test_cliptimestamps_segments(jfk_path):
|
||||
model = WhisperModel("tiny")
|
||||
pipeline = BatchedInferencePipeline(model=model)
|
||||
|
||||
audio = decode_audio(jfk_path)
|
||||
audio = np.concatenate([audio, audio])
|
||||
clip_timestamps = [{"start": 0.0, "end": 11.0}, {"start": 11.0, "end": 22.0}]
|
||||
|
||||
segments, info = pipeline.transcribe(audio, clip_timestamps=clip_timestamps)
|
||||
segments = list(segments)
|
||||
|
||||
assert len(segments) == 2
|
||||
for segment, clip in zip(segments, clip_timestamps):
|
||||
assert segment.start == clip["start"]
|
||||
assert segment.end == clip["end"]
|
||||
assert segment.text == (
|
||||
" And so my fellow Americans ask not what your country can do for you, "
|
||||
"ask what you can do for your country."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user