Merge pull request #2 from joelmathewthomas/feature/input-and-preprocessing

Input & Preprocessing Stage: Implement Audio File Reading and Format Validation
2024-12-24 21:38:31 +05:30
parent 41a50ffcbe 0ff6a12829
commit 05a17bf64b
13 changed files with 169 additions and 1 deletions
@@ -1,2 +1,16 @@
 # Ignore Python bytecode cache files
 __pycache__/
 # Ignore pytest cache
 .pytest_cache/
 # Ignore virtual environment folder
 venv/
-samples/
+
 # Ignore other common files
 *.pyc
 *.pyo
 *.pyd
 # Ignore VSCode config
 .vscode/
@@ -0,0 +1,2 @@
 [pytest]
 pythonpath = . src
@@ -0,0 +1,28 @@
 audioread==3.0.1
 certifi==2024.12.14
 cffi==1.17.1
 charset-normalizer==3.4.0
 decorator==5.1.1
 idna==3.10
 iniconfig==2.0.0
 joblib==1.4.2
 lazy_loader==0.4
 librosa==0.10.2.post1
 llvmlite==0.43.0
 msgpack==1.1.0
 numba==0.60.0
 numpy==2.0.2
 packaging==24.2
 platformdirs==4.3.6
 pluggy==1.5.0
 pooch==1.8.2
 pycparser==2.22
 pytest==8.3.4
 requests==2.32.3
 scikit-learn==1.6.0
 scipy==1.14.1
 soundfile==0.12.1
 soxr==0.5.0.post1
 threadpoolctl==3.5.0
 typing_extensions==4.12.2
 urllib3==2.3.0
@@ -0,0 +1,12 @@
 # __init__.py
 import logging
 from datetime import datetime
 # Configure logging
 logging.basicConfig(
    format='%(asctime)s : %(message)s',
    level = logging.INFO
 )
 logging.info("freq-split-enhance/input package has been imported.")
@@ -0,0 +1,21 @@
 import os
 import librosa
 def read_audio(file_path):
    """
    Reads an audio file and returns the audio time series and sampling rate.
    Args:
        file_path (str): Path to the audio file.
    Returns:
        tuple: audio_time_series (numpy.ndarray), sampling_rate (int)
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    try:
        audio, sr = librosa.load(file_path, sr=None) # Load with original sampling rate.
        return audio, sr
    except Exception as e:
        raise RuntimeError(f"Error reading the audio file: {e}")
@@ -0,0 +1,15 @@
 import mimetypes
 def is_supported_format(file_path):
    """
    Checks if the audio file is in a supported format.
    Args:
        file_path (str): Path to the audio file.
    Returns:
        bool: True if supported, False otherwise.
    """
    supported_formats = ["audio/mpeg", "audio/wav", "audio/x-aiff", "audio/x-wav", ...]
    mime_type, _ = mimetypes.guess_type(file_path)
    return mime_type in supported_formats
@@ -0,0 +1,12 @@
 # __init__.py
 import logging
 from datetime import datetime
 # Configure logging
 logging.basicConfig(
    format='%(asctime)s : %(message)s',
    level = logging.INFO
 )
 logging.info("freq-split-enhance/preprocessing package has been imported.")
@@ -0,0 +1,14 @@
 import librosa
 import numpy as np
 def normalize_audio(audio: np.ndarray) -> np.ndarray:
    """
    Normalize the audio to a range of [-1, 1].
    Args:
    - audio (np.ndarray): The audio time series to normalize.
    Returns:
    - np.ndarray: The normalized audio time series.
    """
    return librosa.util.normalize(audio)
@@ -0,0 +1,17 @@
 import librosa
 import numpy as np
 def trim_audio(audio:np.ndarray, sr:int) -> np.ndarray:
    """
    Trim leading and trailing silence from the audio.
    Args:
    - audio (np.ndarray): The audio time series.
    - sr (int): The sample rate of the audio.
    Returns:
    - np.ndarray: The trimmed audio time series.
    """
    audio_trimmed, _ = librosa.effects.trim(audio)
    return audio_trimmed
@@ -0,0 +1,13 @@
 import pytest
 from  src.input.file_reader import read_audio
 from src.input.format_checker import is_supported_format
 def test_read_audio():
    file_path = "samples/cafe_crowd_talk.aiff"
    audio, sr = read_audio(file_path)
    assert len(audio) > 0
    assert sr > 0
 def test_is_supported_format():
    assert is_supported_format("samples/cafe_crowd_talk.aiff") == True
    assert is_supported_format("samples/unsupported_file.txt") == False
@@ -0,0 +1,20 @@
 import pytest
 import librosa
 from src.preprocessing.normalize import normalize_audio
 from src.preprocessing.trim import trim_audio
 from src.input.file_reader import read_audio
 def test_normalize_audio():
    file_path = "samples/cafe_crowd_talk.aiff"
    audio, _ = read_audio(file_path)
    normalized_audio = normalize_audio(audio)
    assert normalized_audio.max() <= 1.0
    assert normalized_audio.min() >= -1.0
 def test_trim_audio():
    file_path = "samples/cafe_crowd_talk.aiff"
    audio, sr = read_audio(file_path)
    trimmed_audio = trim_audio(audio, sr)
    assert len(trimmed_audio) <= len(audio)