From 68e4af8685dac029fc86ab9c43dcd451c1d96758 Mon Sep 17 00:00:00 2001 From: Master Blood Date: Mon, 3 Apr 2023 22:38:01 -0700 Subject: [PATCH] Added a default (free) library for text to speech Adds the gTTS (Google Text-to-Speech) Python library as a fallback for text-to-speech conversion in the speak.py file. The changes were made to ensure that users can still convert text to speech even if the ElevenLabs API key is not set or if the API encounters an error. Additionally, the requirements.txt file has been updated to include the new gTTS dependency. --- requirements.txt | 1 + scripts/speak.py | 23 ++++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 063931a951..7961106b6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ pyyaml==6.0 readability-lxml==0.8.1 requests tiktoken==0.3.3 +gTTS==2.3.1 docker googlesearch-python google-api-python-client #(https://developers.google.com/custom-search/v1/overview) diff --git a/scripts/speak.py b/scripts/speak.py index 2fbcbed26f..6fef41f83e 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -3,6 +3,8 @@ from playsound import playsound import requests from config import Config cfg = Config() +import gtts + # TODO: Nicer names for these ids voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"] @@ -12,10 +14,9 @@ tts_headers = { "xi-api-key": cfg.elevenlabs_api_key } -def say_text(text, voice_index=0): +def eleven_labs_speech(text, voice_index=0): tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format( voice_id=voices[voice_index]) - formatted_message = {"text": text} response = requests.post( tts_url, headers=tts_headers, json=formatted_message) @@ -24,8 +25,24 @@ def say_text(text, voice_index=0): with open("speech.mpeg", "wb") as f: f.write(response.content) playsound("speech.mpeg") - # Delete audio file os.remove("speech.mpeg") + return True else: print("Request failed with status code:", response.status_code) print("Response content:", response.content) + return False + +def gtts_speech(text): + tts = gtts.gTTS(text) + tts.save("speech.mp3") + playsound("speech.mp3") + os.remove("speech.mp3") + +def say_text(text, voice_index=0): + if not cfg.elevenlabs_api_key: + gtts_speech(text) + else: + success = eleven_labs_speech() + if not success: + gtts_speech(text) +