Merge pull request #2 from joelmathewthomas/feature/input-and-preprocessing

Input & Preprocessing Stage: Implement Audio File Reading and Format Validation
This commit is contained in:
Joel Mathew Thomas
2024-12-24 21:38:31 +05:30
committed by GitHub
13 changed files with 169 additions and 1 deletions
+15 -1
View File
@@ -1,2 +1,16 @@
# Ignore Python bytecode cache files
__pycache__/
# Ignore pytest cache
.pytest_cache/
# Ignore virtual environment folder
venv/ venv/
samples/
# Ignore other common files
*.pyc
*.pyo
*.pyd
# Ignore VSCode config
.vscode/
+2
View File
@@ -0,0 +1,2 @@
[pytest]
pythonpath = . src
+28
View File
@@ -0,0 +1,28 @@
audioread==3.0.1
certifi==2024.12.14
cffi==1.17.1
charset-normalizer==3.4.0
decorator==5.1.1
idna==3.10
iniconfig==2.0.0
joblib==1.4.2
lazy_loader==0.4
librosa==0.10.2.post1
llvmlite==0.43.0
msgpack==1.1.0
numba==0.60.0
numpy==2.0.2
packaging==24.2
platformdirs==4.3.6
pluggy==1.5.0
pooch==1.8.2
pycparser==2.22
pytest==8.3.4
requests==2.32.3
scikit-learn==1.6.0
scipy==1.14.1
soundfile==0.12.1
soxr==0.5.0.post1
threadpoolctl==3.5.0
typing_extensions==4.12.2
urllib3==2.3.0
Binary file not shown.
View File
+12
View File
@@ -0,0 +1,12 @@
# __init__.py
import logging
from datetime import datetime
# Configure logging
logging.basicConfig(
format='%(asctime)s : %(message)s',
level = logging.INFO
)
logging.info("freq-split-enhance/input package has been imported.")
+21
View File
@@ -0,0 +1,21 @@
import os
import librosa
def read_audio(file_path):
"""
Reads an audio file and returns the audio time series and sampling rate.
Args:
file_path (str): Path to the audio file.
Returns:
tuple: audio_time_series (numpy.ndarray), sampling_rate (int)
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
try:
audio, sr = librosa.load(file_path, sr=None) # Load with original sampling rate.
return audio, sr
except Exception as e:
raise RuntimeError(f"Error reading the audio file: {e}")
+15
View File
@@ -0,0 +1,15 @@
import mimetypes
def is_supported_format(file_path):
"""
Checks if the audio file is in a supported format.
Args:
file_path (str): Path to the audio file.
Returns:
bool: True if supported, False otherwise.
"""
supported_formats = ["audio/mpeg", "audio/wav", "audio/x-aiff", "audio/x-wav", ...]
mime_type, _ = mimetypes.guess_type(file_path)
return mime_type in supported_formats
+12
View File
@@ -0,0 +1,12 @@
# __init__.py
import logging
from datetime import datetime
# Configure logging
logging.basicConfig(
format='%(asctime)s : %(message)s',
level = logging.INFO
)
logging.info("freq-split-enhance/preprocessing package has been imported.")
+14
View File
@@ -0,0 +1,14 @@
import librosa
import numpy as np
def normalize_audio(audio: np.ndarray) -> np.ndarray:
"""
Normalize the audio to a range of [-1, 1].
Args:
- audio (np.ndarray): The audio time series to normalize.
Returns:
- np.ndarray: The normalized audio time series.
"""
return librosa.util.normalize(audio)
+17
View File
@@ -0,0 +1,17 @@
import librosa
import numpy as np
def trim_audio(audio:np.ndarray, sr:int) -> np.ndarray:
"""
Trim leading and trailing silence from the audio.
Args:
- audio (np.ndarray): The audio time series.
- sr (int): The sample rate of the audio.
Returns:
- np.ndarray: The trimmed audio time series.
"""
audio_trimmed, _ = librosa.effects.trim(audio)
return audio_trimmed
+13
View File
@@ -0,0 +1,13 @@
import pytest
from src.input.file_reader import read_audio
from src.input.format_checker import is_supported_format
def test_read_audio():
file_path = "samples/cafe_crowd_talk.aiff"
audio, sr = read_audio(file_path)
assert len(audio) > 0
assert sr > 0
def test_is_supported_format():
assert is_supported_format("samples/cafe_crowd_talk.aiff") == True
assert is_supported_format("samples/unsupported_file.txt") == False
+20
View File
@@ -0,0 +1,20 @@
import pytest
import librosa
from src.preprocessing.normalize import normalize_audio
from src.preprocessing.trim import trim_audio
from src.input.file_reader import read_audio
def test_normalize_audio():
file_path = "samples/cafe_crowd_talk.aiff"
audio, _ = read_audio(file_path)
normalized_audio = normalize_audio(audio)
assert normalized_audio.max() <= 1.0
assert normalized_audio.min() >= -1.0
def test_trim_audio():
file_path = "samples/cafe_crowd_talk.aiff"
audio, sr = read_audio(file_path)
trimmed_audio = trim_audio(audio, sr)
assert len(trimmed_audio) <= len(audio)