mirror of
https://github.com/AtHeartEngineering/local_transcription.git
synced 2026-01-09 07:27:56 -05:00
added srt format
This commit is contained in:
BIN
__pycache__/format.cpython-311.pyc
Normal file
BIN
__pycache__/format.cpython-311.pyc
Normal file
Binary file not shown.
16
format.py
Normal file
16
format.py
Normal file
@@ -0,0 +1,16 @@
|
||||
def format_srt(data):
|
||||
srt_format = ''
|
||||
for i, segment in enumerate(data, start=1):
|
||||
start_time = format_time(segment['start'])
|
||||
end_time = format_time(segment['end'])
|
||||
speaker = segment['speaker']
|
||||
text = f"{speaker}: {segment['text']}"
|
||||
srt_format += f"{i}\n{start_time} --> {end_time}\n{text}\n\n"
|
||||
return srt_format
|
||||
|
||||
def format_time(seconds):
|
||||
"""Convert seconds to SRT time format HH:MM:SS,MS."""
|
||||
hours, remainder = divmod(seconds, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
milliseconds = int(seconds % 1 * 1000)
|
||||
return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{milliseconds:03}"
|
||||
7
host.py
7
host.py
@@ -5,6 +5,7 @@ import torch
|
||||
from werkzeug.utils import secure_filename
|
||||
import os
|
||||
import toml
|
||||
from format import format_srt
|
||||
|
||||
settings = toml.load('settings.toml')
|
||||
|
||||
@@ -39,12 +40,10 @@ def transcribe_audio():
|
||||
model = whisperx.load_model("large-v2", device, language=language, compute_type=compute_type, download_root=model_dir)
|
||||
audio = whisperx.load_audio(file_path)
|
||||
result = model.transcribe(audio, batch_size=batch_size)
|
||||
print(result["segments"]) # before alignment
|
||||
|
||||
# Align Whisper output
|
||||
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
|
||||
result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
|
||||
print(result["segments"]) # after alignment
|
||||
|
||||
# Assign speaker labels
|
||||
diarize_model = whisperx.DiarizationPipeline(use_auth_token=HF_TOKEN, device=device)
|
||||
@@ -57,8 +56,8 @@ def transcribe_audio():
|
||||
del model, model_a, diarize_model
|
||||
|
||||
os.remove(file_path) # Remove the uploaded file after processing
|
||||
|
||||
return jsonify(result["segments"])
|
||||
srt = format_srt(result['segments'])
|
||||
return jsonify(srt)
|
||||
|
||||
return jsonify(error="Invalid file type"), 400
|
||||
|
||||
|
||||
Reference in New Issue
Block a user