refactor: Update whisper-processing.cpp to add cleared_last_sub flag

This commit is contained in:
Roy Shilkrot
2024-07-16 06:12:54 -04:00
parent f30503e3fa
commit f2ef058dd2
5 changed files with 65 additions and 43 deletions

View File

@@ -38,7 +38,7 @@
},
"name": "obs-localvocal",
"displayName": "OBS Localvocal",
"version": "0.3.2",
"version": "0.3.3",
"author": "Roy Shilkrot",
"website": "https://github.com/occ-ai/obs-localvocal",
"email": "roy.shil@gmail.com",

View File

@@ -7,8 +7,8 @@ elseif(APPLE)
FetchContent_Declare(
openssl-macos-fetch
URL "https://www.openssl.org/source/openssl-1.1.1k.tar.gz"
URL_HASH SHA256=3f
URL "https://github.com/occ-ai/occ-ai-dep-openssl/releases/download/0.0.1/openssl-3.3.1-macos.tar.gz"
URL_HASH SHA256=d578921b7168e21451f0b6e4ac4cb989c17abc6829c8c43f32136c1c2544ffde
)
FetchContent_MakeAvailable(openssl-macos-fetch)

View File

@@ -51,6 +51,7 @@ struct transcription_filter_data {
/* whisper */
std::string whisper_model_path;
bool whisper_model_loaded_new;
struct whisper_context *whisper_context;
whisper_full_params whisper_params;

View File

@@ -564,9 +564,9 @@ void transcription_filter_defaults(obs_data_t *s)
obs_data_set_default_bool(s, "buffered_output", false);
obs_data_set_default_int(s, "buffer_num_lines", 2);
obs_data_set_default_int(s, "buffer_num_chars_per_line", 30);
obs_data_set_default_int(s, "buffer_num_chars_per_line", 8);
obs_data_set_default_int(s, "buffer_output_type",
(int)TokenBufferSegmentation::SEGMENTATION_TOKEN);
(int)TokenBufferSegmentation::SEGMENTATION_WORD);
obs_data_set_default_bool(s, "vad_enabled", true);
obs_data_set_default_double(s, "vad_threshold", 0.65);
@@ -740,45 +740,64 @@ obs_properties_t *transcription_filter_properties(void *data)
obs_property_set_visible(obs_properties_get(ppts, "whisper_model_path_external"), false);
// Add a callback to the model list to handle the external model file selection
obs_property_set_modified_callback(whisper_models_list, [](obs_properties_t *props,
obs_property_t *property,
obs_data_t *settings) {
UNUSED_PARAMETER(property);
// If the selected model is the external model, show the external model file selection
// input
const char *new_model_path = obs_data_get_string(settings, "whisper_model_path");
const bool is_external = strcmp(new_model_path, "!!!external!!!") == 0;
if (is_external) {
obs_property_set_visible(
obs_properties_get(props, "whisper_model_path_external"), true);
} else {
obs_property_set_visible(
obs_properties_get(props, "whisper_model_path_external"), false);
}
const std::string model_name = new_model_path;
// if the model is english-only -> hide all the languages but english
const bool is_english_only_internal =
(model_name.find("English") != std::string::npos) && !is_external;
// clear the language selection list ("whisper_language_select")
obs_property_t *prop_lang = obs_properties_get(props, "whisper_language_select");
obs_property_list_clear(prop_lang);
if (is_english_only_internal) {
// add only the english language
obs_property_list_add_string(prop_lang, "English", "en");
// set the language to english
obs_data_set_string(settings, "whisper_language_select", "en");
} else {
// add all the languages
for (const auto &lang : whisper_available_lang) {
obs_property_list_add_string(prop_lang, lang.second.c_str(),
lang.first.c_str());
obs_property_set_modified_callback2(
whisper_models_list,
[](void *data_, obs_properties_t *props, obs_property_t *property,
obs_data_t *settings) {
UNUSED_PARAMETER(property);
struct transcription_filter_data *gf_ =
static_cast<struct transcription_filter_data *>(data_);
// If the selected model is the external model, show the external model file selection
// input
const char *new_model_path_cstr =
obs_data_get_string(settings, "whisper_model_path") != nullptr
? obs_data_get_string(settings, "whisper_model_path")
: "";
const std::string new_model_path = new_model_path_cstr;
const bool is_external =
(new_model_path.find("!!!external!!!") != std::string::npos);
if (is_external) {
obs_property_set_visible(
obs_properties_get(props, "whisper_model_path_external"),
true);
} else {
obs_property_set_visible(
obs_properties_get(props, "whisper_model_path_external"),
false);
}
// set the language to auto (default)
obs_data_set_string(settings, "whisper_language_select", "auto");
}
return true;
});
// check if this is a new model selection
if (gf_->whisper_model_loaded_new) {
// if the model is english-only -> hide all the languages but english
const bool is_english_only_internal =
(new_model_path.find("English") != std::string::npos) &&
!is_external;
// clear the language selection list ("whisper_language_select")
obs_property_t *prop_lang =
obs_properties_get(props, "whisper_language_select");
obs_property_list_clear(prop_lang);
if (is_english_only_internal) {
// add only the english language
obs_property_list_add_string(prop_lang, "English", "en");
// set the language to english
obs_data_set_string(settings, "whisper_language_select",
"en");
} else {
// add all the languages
for (const auto &lang : whisper_available_lang) {
obs_property_list_add_string(prop_lang,
lang.second.c_str(),
lang.first.c_str());
}
// set the language to auto (default)
obs_data_set_string(settings, "whisper_language_select",
"auto");
}
gf_->whisper_model_loaded_new = false;
}
return true;
},
gf);
// add translation option group
obs_properties_t *translation_group = obs_properties_create();

View File

@@ -62,6 +62,8 @@ void update_whisper_model(struct transcription_filter_data *gf)
// model path changed
obs_log(gf->log_level, "model path changed from %s to %s",
gf->whisper_model_path.c_str(), new_model_path.c_str());
gf->whisper_model_loaded_new = true;
}
// check if the new model is external file