speech separation works, but ends up on either channels of same audio

2025-01-28 23:19:52 +05:30
parent 88ebfb66f6
commit 9e7c01c19e
3 changed files with 53 additions and 1 deletions
@@ -1,4 +1,5 @@
 import soundfile as sf
+import numpy as np

 def export_audio(audio, output_path, sr):
    """
@@ -11,7 +12,25 @@ def export_audio(audio, output_path, sr):
    """

    try:
-        sf.write(output_path, audio, sr)
+        
+        print(f"Initial audio shape: {audio.shape}, dtype: {audio.dtype}")
+
+        if audio.ndim == 2 and audio.shape[0] == 2:
+            # Transpose stereo audio to match the expected shape
+            audio = audio.T  # From (2, num_samples) to (num_samples, 2)
+
+        # Ensure the audio data type is float32
+        audio = audio.astype('float32')
+        
+        # Normalize audio to avoid distortion
+        if np.max(np.abs(audio)) > 0:  # Avoid divide by zero
+            audio = audio / np.max(np.abs(audio))
+
+        # Verify final format
+        print(f"Final audio shape: {audio.shape}, dtype: {audio.dtype}, max: {np.max(audio)}, min: {np.min(audio)}")
+
+        
+        sf.write(output_path, audio, sr, format='wav')
        print(f"Audio saved to {output_path}")
    except Exception as e:
        print(f"Error saving audio: {e}")