added srt format

This commit is contained in:
AtHeartEngineer
2024-03-11 08:07:18 -04:00
parent 7395eb1dc4
commit 8759621569
3 changed files with 19 additions and 4 deletions

Binary file not shown.

16
format.py Normal file
View File

@@ -0,0 +1,16 @@
def format_srt(data):
srt_format = ''
for i, segment in enumerate(data, start=1):
start_time = format_time(segment['start'])
end_time = format_time(segment['end'])
speaker = segment['speaker']
text = f"{speaker}: {segment['text']}"
srt_format += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
return srt_format
def format_time(seconds):
"""Convert seconds to SRT time format HH:MM:SS,MS."""
hours, remainder = divmod(seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = int(seconds % 1 * 1000)
return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}"

View File

@@ -5,6 +5,7 @@ import torch
from werkzeug.utils import secure_filename
import os
import toml
from format import format_srt
settings = toml.load('settings.toml')
@@ -39,12 +40,10 @@ def transcribe_audio():
model = whisperx.load_model("large-v2", device, language=language, compute_type=compute_type, download_root=model_dir)
audio = whisperx.load_audio(file_path)
result = model.transcribe(audio, batch_size=batch_size)
print(result["segments"]) # before alignment
# Align Whisper output
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
print(result["segments"]) # after alignment
# Assign speaker labels
diarize_model = whisperx.DiarizationPipeline(use_auth_token=HF_TOKEN, device=device)
@@ -57,8 +56,8 @@ def transcribe_audio():
del model, model_a, diarize_model
os.remove(file_path) # Remove the uploaded file after processing
return jsonify(result["segments"])
srt = format_srt(result['segments'])
return jsonify(srt)
return jsonify(error="Invalid file type"), 400