Implement audio normalization and trimming functions

- Normalize audio to the range [-1, 1] - Trim silence from audio - Add test cases for both functions
2024-12-24 20:59:41 +05:30
parent 341d7fa11c
commit 0ff6a12829
3 changed files with 51 additions and 0 deletions
@@ -0,0 +1,14 @@
+import librosa
+import numpy as np
+
+def normalize_audio(audio: np.ndarray) -> np.ndarray:
+    """
+    Normalize the audio to a range of [-1, 1].
+
+    Args:
+    - audio (np.ndarray): The audio time series to normalize.
+
+    Returns:
+    - np.ndarray: The normalized audio time series.
+    """
+    return librosa.util.normalize(audio)
@@ -0,0 +1,17 @@
+import librosa
+import numpy as np
+
+def trim_audio(audio:np.ndarray, sr:int) -> np.ndarray:
+    """
+    Trim leading and trailing silence from the audio.
+
+    Args:
+    - audio (np.ndarray): The audio time series.
+    - sr (int): The sample rate of the audio.
+
+    Returns:
+    - np.ndarray: The trimmed audio time series.
+    """
+
+    audio_trimmed, _ = librosa.effects.trim(audio)
+    return audio_trimmed
@@ -0,0 +1,20 @@
+import pytest
+import librosa
+from src.preprocessing.normalize import normalize_audio
+from src.preprocessing.trim import trim_audio
+from src.input.file_reader import read_audio
+
+def test_normalize_audio():
+    file_path = "samples/cafe_crowd_talk.aiff"
+    audio, _ = read_audio(file_path)
+    normalized_audio = normalize_audio(audio)
+
+    assert normalized_audio.max() <= 1.0
+    assert normalized_audio.min() >= -1.0
+
+def test_trim_audio():
+    file_path = "samples/cafe_crowd_talk.aiff"
+    audio, sr = read_audio(file_path)
+    trimmed_audio = trim_audio(audio, sr)
+
+    assert len(trimmed_audio) <= len(audio)