Add duration of audio and VAD removed duration to BatchedInferencePipeline (#1186)

Co-authored-by: MahmoudAshraf97 <hassouna97.ma@gmail.com>
This commit is contained in:
Dragoș Bălan
2024-12-23 16:23:40 +01:00
committed by GitHub
parent 1b24f284c9
commit 95164297ff

View File

@@ -388,6 +388,10 @@ class BatchedInferencePipeline:
audio = decode_audio(audio, sampling_rate=sampling_rate) audio = decode_audio(audio, sampling_rate=sampling_rate)
duration = audio.shape[0] / sampling_rate duration = audio.shape[0] / sampling_rate
self.model.logger.info(
"Processing audio with duration %s", format_timestamp(duration)
)
chunk_length = chunk_length or self.model.feature_extractor.chunk_length chunk_length = chunk_length or self.model.feature_extractor.chunk_length
# if no segment split is provided, use vad_model and generate segments # if no segment split is provided, use vad_model and generate segments
if not clip_timestamps: if not clip_timestamps:
@@ -421,6 +425,11 @@ class BatchedInferencePipeline:
/ sampling_rate / sampling_rate
) )
self.model.logger.info(
"VAD filter removed %s of audio",
format_timestamp(duration - duration_after_vad),
)
audio_chunks, chunks_metadata = collect_chunks(audio, clip_timestamps) audio_chunks, chunks_metadata = collect_chunks(audio, clip_timestamps)
features = ( features = (
[self.model.feature_extractor(chunk)[..., :-1] for chunk in audio_chunks] [self.model.feature_extractor(chunk)[..., :-1] for chunk in audio_chunks]