From 34560901175de1b2bd3d7b855c52876b099b59c2 Mon Sep 17 00:00:00 2001 From: Wlad Date: Mon, 10 Apr 2023 04:21:23 +0200 Subject: [PATCH 1/8] replace gtts with macos tts --- scripts/speak.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/scripts/speak.py b/scripts/speak.py index 13517d366f..eab7b6601f 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -3,11 +3,12 @@ from playsound import playsound import requests from config import Config cfg = Config() -import gtts +# Remove the import of gtts +# import gtts -# TODO: Nicer names for these ids -voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"] +# Change voices to macOS voice identifiers +voices = ["com.apple.speech.synthesis.voice.siri_female", "com.apple.speech.synthesis.voice.siri_male"] tts_headers = { "Content-Type": "application/json", @@ -32,17 +33,14 @@ def eleven_labs_speech(text, voice_index=0): print("Response content:", response.content) return False -def gtts_speech(text): - tts = gtts.gTTS(text) - tts.save("speech.mp3") - playsound("speech.mp3") - os.remove("speech.mp3") +# Use macOS built-in TTS instead of gtts +def macos_tts_speech(text, voice_index=1): + os.system(f'say "{text}"') def say_text(text, voice_index=0): if not cfg.elevenlabs_api_key: - gtts_speech(text) + macos_tts_speech(text, voice_index) else: success = eleven_labs_speech(text, voice_index) if not success: - gtts_speech(text) - + macos_tts_speech(text, voice_index) From 2facc3e2cb65e72f21e329eb691f445be8599295 Mon Sep 17 00:00:00 2001 From: Wlad Date: Mon, 10 Apr 2023 11:11:03 +0200 Subject: [PATCH 2/8] add config for mac os tts --- .env.template | 3 ++- scripts/speak.py | 23 ++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.env.template b/.env.template index 525cd61c5f..f9a74c9ca4 100644 --- a/.env.template +++ b/.env.template @@ -11,4 +11,5 @@ OPENAI_API_BASE=your-base-url-for-azure OPENAI_API_VERSION=api-version-for-azure OPENAI_DEPLOYMENT_ID=deployment-id-for-azure IMAGE_PROVIDER=dalle -HUGGINGFACE_API_TOKEN= \ No newline at end of file +HUGGINGFACE_API_TOKEN= +USE_MAC_OS_TTS=False diff --git a/scripts/speak.py b/scripts/speak.py index eab7b6601f..1ffad84641 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -3,12 +3,10 @@ from playsound import playsound import requests from config import Config cfg = Config() +import gtts -# Remove the import of gtts -# import gtts - -# Change voices to macOS voice identifiers -voices = ["com.apple.speech.synthesis.voice.siri_female", "com.apple.speech.synthesis.voice.siri_male"] +# TODO: Nicer names for these ids +voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"] tts_headers = { "Content-Type": "application/json", @@ -33,14 +31,21 @@ def eleven_labs_speech(text, voice_index=0): print("Response content:", response.content) return False -# Use macOS built-in TTS instead of gtts -def macos_tts_speech(text, voice_index=1): +def gtts_speech(text): + tts = gtts.gTTS(text) + tts.save("speech.mp3") + playsound("speech.mp3") + os.remove("speech.mp3") + +def macos_tts_speech(text): os.system(f'say "{text}"') def say_text(text, voice_index=0): if not cfg.elevenlabs_api_key: - macos_tts_speech(text, voice_index) + if cfg.use_mac_os_tts == 'True': + macos_tts_speech(text) + gtts(text) else: success = eleven_labs_speech(text, voice_index) if not success: - macos_tts_speech(text, voice_index) + gtts_speech()(text) From 205a0c84cf0ba8690a0b49a0a498ceedb5ac987d Mon Sep 17 00:00:00 2001 From: Wlad Date: Mon, 10 Apr 2023 11:18:17 +0200 Subject: [PATCH 3/8] fix config read --- scripts/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/config.py b/scripts/config.py index 4d7adec1c0..b15d6f909a 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -52,6 +52,8 @@ class Config(metaclass=Singleton): openai.api_version = self.openai_api_version self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY") + + self.use_mac_os_tts = os.getenv("USE_MAC_OS_TTS") self.google_api_key = os.getenv("GOOGLE_API_KEY") self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID") From 64eb882947b09f4ad6c0e024fa327051c9c308be Mon Sep 17 00:00:00 2001 From: Wlad Date: Mon, 10 Apr 2023 11:20:58 +0200 Subject: [PATCH 4/8] fix code messup of assistant --- scripts/config.py | 1 + scripts/speak.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/config.py b/scripts/config.py index b15d6f909a..e9168efe5d 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -53,6 +53,7 @@ class Config(metaclass=Singleton): self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY") + self.use_mac_os_tts = False self.use_mac_os_tts = os.getenv("USE_MAC_OS_TTS") self.google_api_key = os.getenv("GOOGLE_API_KEY") diff --git a/scripts/speak.py b/scripts/speak.py index 1ffad84641..cdb92e0786 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -44,7 +44,8 @@ def say_text(text, voice_index=0): if not cfg.elevenlabs_api_key: if cfg.use_mac_os_tts == 'True': macos_tts_speech(text) - gtts(text) + else: + gtts(text) else: success = eleven_labs_speech(text, voice_index) if not success: From 1946b564a1d870029c2164914fd815356b780062 Mon Sep 17 00:00:00 2001 From: Wlad Date: Mon, 10 Apr 2023 11:22:24 +0200 Subject: [PATCH 5/8] fix gtts_speech --- scripts/speak.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/speak.py b/scripts/speak.py index cdb92e0786..e5b839fdf1 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -45,7 +45,7 @@ def say_text(text, voice_index=0): if cfg.use_mac_os_tts == 'True': macos_tts_speech(text) else: - gtts(text) + gtts_speech(text) else: success = eleven_labs_speech(text, voice_index) if not success: From ed16bba0ca0ae57c8d9ad1fcaae9555b3d7ed264 Mon Sep 17 00:00:00 2001 From: Wlad Date: Mon, 10 Apr 2023 11:22:47 +0200 Subject: [PATCH 6/8] fix gtts_speech --- scripts/speak.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/speak.py b/scripts/speak.py index e5b839fdf1..485381dc5c 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -49,4 +49,4 @@ def say_text(text, voice_index=0): else: success = eleven_labs_speech(text, voice_index) if not success: - gtts_speech()(text) + gtts_speech(text) From ec239734c59dd7b0feb9261fc0d4a8008308c72b Mon Sep 17 00:00:00 2001 From: Wlad Date: Mon, 10 Apr 2023 14:15:30 +0200 Subject: [PATCH 7/8] fix whitespace --- scripts/speak.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/speak.py b/scripts/speak.py index ed52a433ab..c48d090bd1 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -47,6 +47,7 @@ def say_text(text, voice_index=0): macos_tts_speech(text) else: gtts_speech(text) + else: success = eleven_labs_speech(text, voice_index) if not success: From 7e9941e5b1b560f33e2ad4ed7d05c47af961ec75 Mon Sep 17 00:00:00 2001 From: Wlad Date: Mon, 10 Apr 2023 14:17:18 +0200 Subject: [PATCH 8/8] fix whitespace --- scripts/speak.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/speak.py b/scripts/speak.py index c48d090bd1..c47a9f7527 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -47,8 +47,8 @@ def say_text(text, voice_index=0): macos_tts_speech(text) else: gtts_speech(text) - else: success = eleven_labs_speech(text, voice_index) if not success: gtts_speech(text) +