refactor: Enable cloud transcription in transcription filter

This commit enables the option for cloud transcription in the transcription filter. The `transcription_filter_data` struct now includes a boolean flag `cloud_transcription` and an instance of the `CloudTranscription` class. The `transcription_filter_properties` function has been updated to add the group properties for cloud transcription. The `transcription_filter_update` function now updates the `cloud_transcription` flag based on the user settings and starts or shuts down the cloud transcription accordingly.
2026-01-09 12:28:05 -05:00 · 2024-08-13 11:28:53 -04:00
parent 6cc88b1ead
commit 648b74d053
4 changed files with 58 additions and 0 deletions
--- a/src/cloud-transcription/CloudTranscription.h
+++ b/src/cloud-transcription/CloudTranscription.h
@@ -0,0 +1,32 @@
+#ifndef CLOUD_TRANSCRIPTION_H
+#define CLOUD_TRANSCRIPTION_H
+
+#include <string>
+#include <thread>
+
+#include "plugin-support.h"
+
+#include <obs.h>
+
+class CloudTranscription {
+public:
+	CloudTranscription();
+	~CloudTranscription();
+
+	void startTranscription(const std::string &audioFile);
+	void stopTranscription();
+
+	void setLanguage(const std::string &language)
+	{
+		obs_log(LOG_INFO, "Setting language to %s", language.c_str());
+	}
+
+private:
+	std::thread transcriptionThread;
+	bool isTranscribing;
+	std::string language;
+
+	void transcriptionWorker(const std::string &audioFile);
+};
+
+#endif // CLOUD_TRANSCRIPTION_H
--- a/src/transcription-filter-data.h
+++ b/src/transcription-filter-data.h
@@ -19,6 +19,7 @@
 #include "whisper-utils/silero-vad-onnx.h"
 #include "whisper-utils/whisper-processing.h"
 #include "whisper-utils/token-buffer-thread.h"
+#include "cloud-transcription/CloudTranscription.h"

 #define MAX_PREPROC_CHANNELS 10

@@ -119,6 +120,10 @@ struct transcription_filter_data {
 	TokenBufferSegmentation buffered_output_output_type =
 		TokenBufferSegmentation::SEGMENTATION_TOKEN;

+	// Cloud transcription and translation
+	bool cloud_transcription = false;
+	CloudTranscription cloudTranscription;
+
 	// ctor
 	transcription_filter_data() : whisper_buf_mutex(), whisper_ctx_mutex(), wshiper_thread_cv()
 	{
--- a/src/transcription-filter-properties.cpp
+++ b/src/transcription-filter-properties.cpp
@@ -472,6 +472,17 @@ void add_partial_group_properties(obs_properties_t *ppts)
 				      3000, 50);
 }

+void add_cloud_transcription_group_properties(obs_properties_t *ppts)
+{
+	// add a group for cloud transcription
+	obs_properties_t *partial_group = obs_properties_create();
+	obs_properties_add_group(ppts, "cloud_transcription_group",
+				 MT_("cloud_transcription_enable"), OBS_GROUP_CHECKABLE,
+				 partial_group);
+
+	// TODO: add cloud transcription options
+}
+
 obs_properties_t *transcription_filter_properties(void *data)
 {
 	struct transcription_filter_data *gf =
@@ -490,6 +501,7 @@ obs_properties_t *transcription_filter_properties(void *data)

 	add_general_group_properties(ppts);
 	add_transcription_group_properties(ppts, gf);
+	add_cloud_transcription_group_properties(ppts);
 	add_translation_group_properties(ppts);
 	add_file_output_group_properties(ppts);
 	add_buffered_output_group_properties(ppts);
--- a/src/transcription-filter.cpp
+++ b/src/transcription-filter.cpp
@@ -416,6 +416,15 @@ void transcription_filter_update(void *data, obs_data_t *s)
 			}
 		}
 	}
+
+	// Update cloud transcription and translation options
+	gf->cloud_transcription = obs_data_get_bool(s, "cloud_transcription_enable");
+	if (gf->cloud_transcription) {
+		gf->cloudTranscription.setLanguage(gf->whisper_params.language);
+		gf->cloudTranscription.startTranscription();
+	} else {
+		gf->cloudTranscription.shutdown();
+	}
 }

 void *transcription_filter_create(obs_data_t *settings, obs_source_t *filter)