From 0ff6a128291e974c32994aadedb9836afa53162c Mon Sep 17 00:00:00 2001
From: Joel Mathew Thomas <90510078+joelmathewthomas@users.noreply.github.com>
Date: Tue, 24 Dec 2024 20:59:41 +0530
Subject: [PATCH] Implement audio normalization and trimming functions

- Normalize audio to the range [-1, 1]
- Trim silence from audio
- Add test cases for both functions
---
 src/preprocessing/normalize.py | 14 ++++++++++++++
 src/preprocessing/trim.py      | 17 +++++++++++++++++
 tests/test_preprocessing.py    | 20 ++++++++++++++++++++
 3 files changed, 51 insertions(+)
 create mode 100644 tests/test_preprocessing.py

diff --git a/src/preprocessing/normalize.py b/src/preprocessing/normalize.py
index e69de29..f9e4ae3 100644
--- a/src/preprocessing/normalize.py
+++ b/src/preprocessing/normalize.py
@@ -0,0 +1,14 @@
+import librosa
+import numpy as np
+
+def normalize_audio(audio: np.ndarray) -> np.ndarray:
+    """
+    Normalize the audio to a range of [-1, 1].
+
+    Args:
+    - audio (np.ndarray): The audio time series to normalize.
+
+    Returns:
+    - np.ndarray: The normalized audio time series.
+    """
+    return librosa.util.normalize(audio)
diff --git a/src/preprocessing/trim.py b/src/preprocessing/trim.py
index e69de29..c6ecd79 100644
--- a/src/preprocessing/trim.py
+++ b/src/preprocessing/trim.py
@@ -0,0 +1,17 @@
+import librosa
+import numpy as np
+
+def trim_audio(audio:np.ndarray, sr:int) -> np.ndarray:
+    """
+    Trim leading and trailing silence from the audio.
+
+    Args:
+    - audio (np.ndarray): The audio time series.
+    - sr (int): The sample rate of the audio.
+
+    Returns:
+    - np.ndarray: The trimmed audio time series.
+    """
+
+    audio_trimmed, _ = librosa.effects.trim(audio)
+    return audio_trimmed
\ No newline at end of file
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
new file mode 100644
index 0000000..208bb5b
--- /dev/null
+++ b/tests/test_preprocessing.py
@@ -0,0 +1,20 @@
+import pytest
+import librosa
+from src.preprocessing.normalize import normalize_audio
+from src.preprocessing.trim import trim_audio
+from src.input.file_reader import read_audio
+
+def test_normalize_audio():
+    file_path = "samples/cafe_crowd_talk.aiff"
+    audio, _ = read_audio(file_path)
+    normalized_audio = normalize_audio(audio)
+
+    assert normalized_audio.max() <= 1.0
+    assert normalized_audio.min() >= -1.0
+
+def test_trim_audio():
+    file_path = "samples/cafe_crowd_talk.aiff"
+    audio, sr = read_audio(file_path)
+    trimmed_audio = trim_audio(audio, sr)
+
+    assert len(trimmed_audio) <= len(audio)
\ No newline at end of file