From 0ff6a128291e974c32994aadedb9836afa53162c Mon Sep 17 00:00:00 2001 From: Joel Mathew Thomas <90510078+joelmathewthomas@users.noreply.github.com> Date: Tue, 24 Dec 2024 20:59:41 +0530 Subject: [PATCH] Implement audio normalization and trimming functions - Normalize audio to the range [-1, 1] - Trim silence from audio - Add test cases for both functions --- src/preprocessing/normalize.py | 14 ++++++++++++++ src/preprocessing/trim.py | 17 +++++++++++++++++ tests/test_preprocessing.py | 20 ++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 tests/test_preprocessing.py diff --git a/src/preprocessing/normalize.py b/src/preprocessing/normalize.py index e69de29..f9e4ae3 100644 --- a/src/preprocessing/normalize.py +++ b/src/preprocessing/normalize.py @@ -0,0 +1,14 @@ +import librosa +import numpy as np + +def normalize_audio(audio: np.ndarray) -> np.ndarray: + """ + Normalize the audio to a range of [-1, 1]. + + Args: + - audio (np.ndarray): The audio time series to normalize. + + Returns: + - np.ndarray: The normalized audio time series. + """ + return librosa.util.normalize(audio) diff --git a/src/preprocessing/trim.py b/src/preprocessing/trim.py index e69de29..c6ecd79 100644 --- a/src/preprocessing/trim.py +++ b/src/preprocessing/trim.py @@ -0,0 +1,17 @@ +import librosa +import numpy as np + +def trim_audio(audio:np.ndarray, sr:int) -> np.ndarray: + """ + Trim leading and trailing silence from the audio. + + Args: + - audio (np.ndarray): The audio time series. + - sr (int): The sample rate of the audio. + + Returns: + - np.ndarray: The trimmed audio time series. + """ + + audio_trimmed, _ = librosa.effects.trim(audio) + return audio_trimmed \ No newline at end of file diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py new file mode 100644 index 0000000..208bb5b --- /dev/null +++ b/tests/test_preprocessing.py @@ -0,0 +1,20 @@ +import pytest +import librosa +from src.preprocessing.normalize import normalize_audio +from src.preprocessing.trim import trim_audio +from src.input.file_reader import read_audio + +def test_normalize_audio(): + file_path = "samples/cafe_crowd_talk.aiff" + audio, _ = read_audio(file_path) + normalized_audio = normalize_audio(audio) + + assert normalized_audio.max() <= 1.0 + assert normalized_audio.min() >= -1.0 + +def test_trim_audio(): + file_path = "samples/cafe_crowd_talk.aiff" + audio, sr = read_audio(file_path) + trimmed_audio = trim_audio(audio, sr) + + assert len(trimmed_audio) <= len(audio) \ No newline at end of file