mirror of
https://github.com/AtHeartEngineering/local_transcription.git
synced 2026-01-08 20:07:59 -05:00
44 lines
1.5 KiB
Python
44 lines
1.5 KiB
Python
import numpy as np
|
|
from pyannote.core import Annotation, SlidingWindowFeature, SlidingWindow
|
|
|
|
def concat(chunks, collar=0.05):
|
|
"""
|
|
Concatenate predictions and audio
|
|
given a list of `(diarization, waveform)` pairs
|
|
and merge contiguous single-speaker regions
|
|
with pauses shorter than `collar` seconds.
|
|
"""
|
|
first_annotation = chunks[0][0]
|
|
first_waveform = chunks[0][1]
|
|
annotation = Annotation(uri=first_annotation.uri)
|
|
data = []
|
|
for ann, wav in chunks:
|
|
annotation.update(ann)
|
|
data.append(wav.data)
|
|
annotation = annotation.support(collar)
|
|
window = SlidingWindow(
|
|
first_waveform.sliding_window.duration,
|
|
first_waveform.sliding_window.step,
|
|
first_waveform.sliding_window.start,
|
|
)
|
|
data = np.concatenate(data, axis=0)
|
|
return annotation, SlidingWindowFeature(data, window)
|
|
|
|
def colorize_transcription(transcription):
|
|
"""
|
|
Unify a speaker-aware transcription represented as
|
|
a list of `(speaker: int, text: str)` pairs
|
|
into a single text colored by speakers.
|
|
"""
|
|
colors = 2 * [
|
|
"bright_red", "bright_blue", "bright_green", "orange3", "deep_pink1",
|
|
"yellow2", "magenta", "cyan", "bright_magenta", "dodger_blue2"
|
|
]
|
|
result = []
|
|
for speaker, text in transcription:
|
|
if speaker == -1:
|
|
# No speakerfound for this text, use default terminal color
|
|
result.append(text)
|
|
else:
|
|
result.append(f"[{colors[speaker]}]{text}")
|
|
return "\n".join(result) |