Implement audio normalization and trimming functions

- Normalize audio to the range [-1, 1]
- Trim silence from audio
- Add test cases for both functions
This commit is contained in:
Joel Mathew Thomas
2024-12-24 20:59:41 +05:30
parent 341d7fa11c
commit 0ff6a12829
3 changed files with 51 additions and 0 deletions
+14
View File
@@ -0,0 +1,14 @@
import librosa
import numpy as np
def normalize_audio(audio: np.ndarray) -> np.ndarray:
"""
Normalize the audio to a range of [-1, 1].
Args:
- audio (np.ndarray): The audio time series to normalize.
Returns:
- np.ndarray: The normalized audio time series.
"""
return librosa.util.normalize(audio)
+17
View File
@@ -0,0 +1,17 @@
import librosa
import numpy as np
def trim_audio(audio:np.ndarray, sr:int) -> np.ndarray:
"""
Trim leading and trailing silence from the audio.
Args:
- audio (np.ndarray): The audio time series.
- sr (int): The sample rate of the audio.
Returns:
- np.ndarray: The trimmed audio time series.
"""
audio_trimmed, _ = librosa.effects.trim(audio)
return audio_trimmed
+20
View File
@@ -0,0 +1,20 @@
import pytest
import librosa
from src.preprocessing.normalize import normalize_audio
from src.preprocessing.trim import trim_audio
from src.input.file_reader import read_audio
def test_normalize_audio():
file_path = "samples/cafe_crowd_talk.aiff"
audio, _ = read_audio(file_path)
normalized_audio = normalize_audio(audio)
assert normalized_audio.max() <= 1.0
assert normalized_audio.min() >= -1.0
def test_trim_audio():
file_path = "samples/cafe_crowd_talk.aiff"
audio, sr = read_audio(file_path)
trimmed_audio = trim_audio(audio, sr)
assert len(trimmed_audio) <= len(audio)