...

2025-08-20 04:15:43 +02:00
parent 6b9f0cf291
commit e4bb201181
95 changed files with 194 additions and 907 deletions
--- a/herolib/clients/assemblyai/client.py
+++ b/herolib/clients/assemblyai/client.py
@@ -0,0 +1,69 @@
+import os
+
+from pydub import AudioSegment
+import assemblyai as aai
+
+
+class Client:
+    def __init__(self):
+        api_key = os.getenv("ASSEMBLYAI")
+
+        if not api_key:
+            raise EnvironmentError(
+                "Please set the ASSEMBLYAI environment variable with your AssemblyAI API key."
+            )
+
+        self.api_key = api_key
+        aai.settings.api_key = self.api_key
+        self.transcriber = aai.Transcriber()
+
+    def convert_to_ogg_mono(self, input_path: str, output_path: str):
+        """Converts an audio file from .mp4 to .ogg (mono)."""
+        audio = AudioSegment.from_file(input_path, format="mp4")
+        # Convert to mono if needed by uncommenting the line below
+        # audio = audio.set_channels(1)
+        audio.export(output_path, format="ogg")
+        print(f"Converted to .ogg in {output_path}")
+
+    def transcribe_audio(self, audio_path: str, output_path: str):
+        """Transcribes the audio file and saves the transcription to a Markdown file."""
+        config = aai.TranscriptionConfig(
+            speaker_labels=True,
+        )
+
+        transcript = self.transcriber.transcribe(audio_path, config)
+
+        with open(output_path, "w", encoding="utf-8") as f:
+            for utterance in transcript.utterances:
+                f.write(
+                    f"** Speaker {utterance.speaker}:\n{utterance.text}\n-------------\n"
+                )
+
+        print(f"Transcription saved to {output_path}")
+
+    def transcribe_audio_file(self, input_path: str, output_transcription_path: str):
+        """Handles the entire process from conversion to transcription and cleanup."""
+        converted_audio_path = input_path.replace(".mp4", ".ogg")
+
+        # Convert .mp4 to .ogg
+        self.convert_to_ogg_mono(input_path, converted_audio_path)
+
+        # Perform the transcription
+        self.transcribe_audio(converted_audio_path, output_transcription_path)
+
+        # Optionally, clean up the converted file
+        os.remove(converted_audio_path)
+        print(f"Removed temporary file {converted_audio_path}")
+
+
+# Example usage:
+if __name__ == "__main__":
+    # Retrieve API key from environment variable
+
+    # Define the paths for the input audio and output transcription
+    input_audio_path = "/tmp/475353425.mp4"
+    output_transcription_path = "/tmp/transcribe_475353425.md"
+
+    # Perform the transcription process
+    client = Client()
+    client.transcribe_audio_file(input_audio_path, output_transcription_path)