diff --git a/requirements.txt b/requirements.txt index 063931a951..7961106b6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ pyyaml==6.0 readability-lxml==0.8.1 requests tiktoken==0.3.3 +gTTS==2.3.1 docker googlesearch-python google-api-python-client #(https://developers.google.com/custom-search/v1/overview) diff --git a/scripts/speak.py b/scripts/speak.py index 2fbcbed26f..6fef41f83e 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -3,6 +3,8 @@ from playsound import playsound import requests from config import Config cfg = Config() +import gtts + # TODO: Nicer names for these ids voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"] @@ -12,10 +14,9 @@ tts_headers = { "xi-api-key": cfg.elevenlabs_api_key } -def say_text(text, voice_index=0): +def eleven_labs_speech(text, voice_index=0): tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format( voice_id=voices[voice_index]) - formatted_message = {"text": text} response = requests.post( tts_url, headers=tts_headers, json=formatted_message) @@ -24,8 +25,24 @@ def say_text(text, voice_index=0): with open("speech.mpeg", "wb") as f: f.write(response.content) playsound("speech.mpeg") - # Delete audio file os.remove("speech.mpeg") + return True else: print("Request failed with status code:", response.status_code) print("Response content:", response.content) + return False + +def gtts_speech(text): + tts = gtts.gTTS(text) + tts.save("speech.mp3") + playsound("speech.mp3") + os.remove("speech.mp3") + +def say_text(text, voice_index=0): + if not cfg.elevenlabs_api_key: + gtts_speech(text) + else: + success = eleven_labs_speech() + if not success: + gtts_speech(text) +