refactor: Enable cloud transcription in transcription filter

This commit enables the option for cloud transcription in the transcription filter. The `transcription_filter_data` struct now includes a boolean flag `cloud_transcription` and an instance of the `CloudTranscription` class. The `transcription_filter_properties` function has been updated to add the group properties for cloud transcription. The `transcription_filter_update` function now updates the `cloud_transcription` flag based on the user settings and starts or shuts down the cloud transcription accordingly.
This commit is contained in:
Roy Shilkrot
2024-08-13 11:28:53 -04:00
parent 6cc88b1ead
commit 648b74d053
4 changed files with 58 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
#ifndef CLOUD_TRANSCRIPTION_H
#define CLOUD_TRANSCRIPTION_H
#include <string>
#include <thread>
#include "plugin-support.h"
#include <obs.h>
class CloudTranscription {
public:
CloudTranscription();
~CloudTranscription();
void startTranscription(const std::string &audioFile);
void stopTranscription();
void setLanguage(const std::string &language)
{
obs_log(LOG_INFO, "Setting language to %s", language.c_str());
}
private:
std::thread transcriptionThread;
bool isTranscribing;
std::string language;
void transcriptionWorker(const std::string &audioFile);
};
#endif // CLOUD_TRANSCRIPTION_H

View File

@@ -19,6 +19,7 @@
#include "whisper-utils/silero-vad-onnx.h"
#include "whisper-utils/whisper-processing.h"
#include "whisper-utils/token-buffer-thread.h"
#include "cloud-transcription/CloudTranscription.h"
#define MAX_PREPROC_CHANNELS 10
@@ -119,6 +120,10 @@ struct transcription_filter_data {
TokenBufferSegmentation buffered_output_output_type =
TokenBufferSegmentation::SEGMENTATION_TOKEN;
// Cloud transcription and translation
bool cloud_transcription = false;
CloudTranscription cloudTranscription;
// ctor
transcription_filter_data() : whisper_buf_mutex(), whisper_ctx_mutex(), wshiper_thread_cv()
{

View File

@@ -472,6 +472,17 @@ void add_partial_group_properties(obs_properties_t *ppts)
3000, 50);
}
void add_cloud_transcription_group_properties(obs_properties_t *ppts)
{
// add a group for cloud transcription
obs_properties_t *partial_group = obs_properties_create();
obs_properties_add_group(ppts, "cloud_transcription_group",
MT_("cloud_transcription_enable"), OBS_GROUP_CHECKABLE,
partial_group);
// TODO: add cloud transcription options
}
obs_properties_t *transcription_filter_properties(void *data)
{
struct transcription_filter_data *gf =
@@ -490,6 +501,7 @@ obs_properties_t *transcription_filter_properties(void *data)
add_general_group_properties(ppts);
add_transcription_group_properties(ppts, gf);
add_cloud_transcription_group_properties(ppts);
add_translation_group_properties(ppts);
add_file_output_group_properties(ppts);
add_buffered_output_group_properties(ppts);

View File

@@ -416,6 +416,15 @@ void transcription_filter_update(void *data, obs_data_t *s)
}
}
}
// Update cloud transcription and translation options
gf->cloud_transcription = obs_data_get_bool(s, "cloud_transcription_enable");
if (gf->cloud_transcription) {
gf->cloudTranscription.setLanguage(gf->whisper_params.language);
gf->cloudTranscription.startTranscription();
} else {
gf->cloudTranscription.shutdown();
}
}
void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)