deleted lots of random mp3 files

added audio to text under --transcribe
2026-01-09 22:38:10 -05:00 · 2024-02-21 20:22:07 -05:00 · 2024-02-21 20:00:20 -05:00
4 changed files with 113 additions and 2 deletions
--- a/installer/client/cli/fabric.py
+++ b/installer/client/cli/fabric.py
@@ -1,4 +1,4 @@
-from .utils import Standalone, Update, Setup, Alias
+from .utils import Standalone, Update, Setup, Alias, Whisper
 import argparse
 import sys
 import time
@@ -30,6 +30,8 @@ def main():
        help="Use this option if you want to see the results in realtime. NOTE: You will not be able to pipe the output into another command.",
        action="store_true",
    )
+    parser.add_argument('--transcribe', '-T',
+                        help="transcribe audio, please enter the path to the audio file, or a url with the audio file")
    parser.add_argument(
        "--list", "-l", help="List available patterns", action="store_true"
    )
@@ -88,6 +90,10 @@ def main():
    if args.listmodels:
        standalone.fetch_available_models()
        sys.exit()
+    if args.transcribe:
+        whisper = Whisper()
+        whisper.process_file(args.transcribe)
+        sys.exit()
    if args.text is not None:
        text = args.text
    else:
--- a/installer/client/cli/utils.py
+++ b/installer/client/cli/utils.py
@@ -10,6 +10,7 @@ from tqdm import tqdm
 import zipfile
 import tempfile
 import shutil
+from pydub import AudioSegment

 current_directory = os.path.dirname(os.path.realpath(__file__))
 config_directory = os.path.expanduser("~/.config/fabric")
@@ -215,6 +216,98 @@ class Standalone:
            return sys.stdin.read()


+class Whisper:
+    def __init__(self):
+        env_file = os.path.expanduser("~/.config/fabric/.env")
+        load_dotenv(env_file)
+        try:
+            apikey = os.environ["OPENAI_API_KEY"]
+            self.client = OpenAI()
+            self.client.api_key = apikey
+        except KeyError:
+            print("OPENAI_API_KEY not found in environment variables.")
+
+        except FileNotFoundError:
+            print("No API key found. Use the --apikey option to set the key")
+        self.whole_response = []
+
+    def split_audio(self, file_path):
+        """
+        Splits the audio file into segments of the given length.
+
+        Args:
+        - file_path: The path to the audio file.
+        - segment_length_ms: Length of each segment in milliseconds.
+
+        Returns:
+        - A list of audio segments.
+        """
+        audio = AudioSegment.from_file(file_path)
+        segments = []
+        segment_length_ms = 10 * 60 * 1000  # 10 minutes in milliseconds
+        for start_ms in range(0, len(audio), segment_length_ms):
+            end_ms = start_ms + segment_length_ms
+            segment = audio[start_ms:end_ms]
+            segments.append(segment)
+
+        return segments
+
+    def process_segment(self, segment):
+        """        Transcribe an audio file and print the transcript.
+
+        Args:
+            audio_file (str): The path to the audio file to be transcribed.
+
+        Returns:
+            None
+        """
+
+        try:
+            # if audio_file.startswith("http"):
+            #     response = requests.get(audio_file)
+            #     response.raise_for_status()
+            #     with tempfile.NamedTemporaryFile(delete=False) as f:
+            #         f.write(response.content)
+            #         audio_file = f.name
+            audio_file = open(segment, "rb")
+            response = self.client.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file
+            )
+            self.whole_response.append(response.text)
+
+        except Exception as e:
+            print(f"Error: {e}")
+
+    def process_file(self, audio_file):
+        """        Transcribe an audio file and print the transcript.
+
+        Args:
+            audio_file (str): The path to the audio file to be transcribed.
+
+        Returns:
+            None
+        """
+
+        try:
+            # if audio_file.startswith("http"):
+            #     response = requests.get(audio_file)
+            #     response.raise_for_status()
+            #     with tempfile.NamedTemporaryFile(delete=False) as f:
+            #         f.write(response.content)
+            #         audio_file = f.name
+
+            segments = self.split_audio(audio_file)
+            for i, segment in enumerate(segments):
+                segment_file_path = f"segment_{i}.mp3"
+                segment.export(segment_file_path, format="mp3")
+                self.process_segment(segment_file_path)
+            print(' '.join(self.whole_response))
+
+        except Exception as e:
+            print(f"Error: {e}")
+
+
 class Update:
    def __init__(self):
        """Initialize the object with default values."""
--- a/poetry.lock
+++ b/poetry.lock
@@ -933,6 +933,17 @@ files = [
 [package.dependencies]
 typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"

+[[package]]
+name = "pydub"
+version = "0.25.1"
+description = "Manipulate audio with an simple and easy high level interface"
+optional = false
+python-versions = "*"
+files = [
+    {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"},
+    {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"},
+]
+
 [[package]]
 name = "pyjwt"
 version = "2.8.0"
@@ -1394,4 +1405,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "8aa1e3fe70b9d326a7809abd70f2d78fee286d5106ab40f7d2d61a7feaf359ef"
+content-hash = "b8025aa005b3ad74c5e76f766c9311f2fa0592d4672e7f6e328d5f27001554aa"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,7 @@ gunicorn = "^21.2.0"
 gevent = "^23.9.1"
 httpx = "^0.26.0"
 tqdm = "^4.66.1"
+pydub = "^0.25.1"


 [tool.poetry.group.server.dependencies]
Author	SHA1	Message	Date
jad2121	fbfea93b6c	deleted lots of random mp3 files	2024-02-21 20:22:07 -05:00
jad2121	c4332c9ee0	added audio to text under --transcribe	2024-02-21 20:00:20 -05:00