diff --git a/.env.template b/.env.template
index 525cd61c5f..f9a74c9ca4 100644
--- a/.env.template
+++ b/.env.template
@@ -11,4 +11,5 @@ OPENAI_API_BASE=your-base-url-for-azure
 OPENAI_API_VERSION=api-version-for-azure
 OPENAI_DEPLOYMENT_ID=deployment-id-for-azure
 IMAGE_PROVIDER=dalle
-HUGGINGFACE_API_TOKEN=
\ No newline at end of file
+HUGGINGFACE_API_TOKEN=
+USE_MAC_OS_TTS=False
diff --git a/scripts/speak.py b/scripts/speak.py
index eab7b6601f..1ffad84641 100644
--- a/scripts/speak.py
+++ b/scripts/speak.py
@@ -3,12 +3,10 @@ from playsound import playsound
 import requests
 from config import Config
 cfg = Config()
+import gtts
 
-# Remove the import of gtts
-# import gtts
-
-# Change voices to macOS voice identifiers
-voices = ["com.apple.speech.synthesis.voice.siri_female", "com.apple.speech.synthesis.voice.siri_male"]
+# TODO: Nicer names for these ids
+voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
 
 tts_headers = {
     "Content-Type": "application/json",
@@ -33,14 +31,21 @@ def eleven_labs_speech(text, voice_index=0):
         print("Response content:", response.content)
         return False
 
-# Use macOS built-in TTS instead of gtts
-def macos_tts_speech(text, voice_index=1):
+def gtts_speech(text):
+    tts = gtts.gTTS(text)
+    tts.save("speech.mp3")
+    playsound("speech.mp3")
+    os.remove("speech.mp3")
+
+def macos_tts_speech(text):
     os.system(f'say "{text}"')
 
 def say_text(text, voice_index=0):
     if not cfg.elevenlabs_api_key:
-        macos_tts_speech(text, voice_index)
+        if cfg.use_mac_os_tts == 'True':
+            macos_tts_speech(text)
+        gtts(text)
     else:
         success = eleven_labs_speech(text, voice_index)
         if not success:
-            macos_tts_speech(text, voice_index)
+            gtts_speech()(text)