diff --git a/.gitignore b/.gitignore index dbbe04d..06b7abc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,16 @@ +# Ignore Python bytecode cache files +__pycache__/ + +# Ignore pytest cache +.pytest_cache/ + +# Ignore virtual environment folder venv/ -samples/ + +# Ignore other common files +*.pyc +*.pyo +*.pyd + +# Ignore VSCode config +.vscode/ diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..ad5c7cc --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = . src diff --git a/requirements.txt b/requirements.txt index e69de29..5b77076 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,28 @@ +audioread==3.0.1 +certifi==2024.12.14 +cffi==1.17.1 +charset-normalizer==3.4.0 +decorator==5.1.1 +idna==3.10 +iniconfig==2.0.0 +joblib==1.4.2 +lazy_loader==0.4 +librosa==0.10.2.post1 +llvmlite==0.43.0 +msgpack==1.1.0 +numba==0.60.0 +numpy==2.0.2 +packaging==24.2 +platformdirs==4.3.6 +pluggy==1.5.0 +pooch==1.8.2 +pycparser==2.22 +pytest==8.3.4 +requests==2.32.3 +scikit-learn==1.6.0 +scipy==1.14.1 +soundfile==0.12.1 +soxr==0.5.0.post1 +threadpoolctl==3.5.0 +typing_extensions==4.12.2 +urllib3==2.3.0 diff --git a/samples/cafe_crowd_talk.aiff b/samples/cafe_crowd_talk.aiff new file mode 100644 index 0000000..80b6507 Binary files /dev/null and b/samples/cafe_crowd_talk.aiff differ diff --git a/samples/unsupported_file.txt b/samples/unsupported_file.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/input/__init__.py b/src/input/__init__.py new file mode 100644 index 0000000..e604b16 --- /dev/null +++ b/src/input/__init__.py @@ -0,0 +1,12 @@ +# __init__.py + +import logging +from datetime import datetime + +# Configure logging +logging.basicConfig( + format='%(asctime)s : %(message)s', + level = logging.INFO +) + +logging.info("freq-split-enhance/input package has been imported.") \ No newline at end of file diff --git a/src/input/file_reader.py b/src/input/file_reader.py new file mode 100644 index 0000000..f18b7e1 --- /dev/null +++ b/src/input/file_reader.py @@ -0,0 +1,21 @@ +import os +import librosa + +def read_audio(file_path): + """ + Reads an audio file and returns the audio time series and sampling rate. + + Args: + file_path (str): Path to the audio file. + + Returns: + tuple: audio_time_series (numpy.ndarray), sampling_rate (int) + """ + + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + try: + audio, sr = librosa.load(file_path, sr=None) # Load with original sampling rate. + return audio, sr + except Exception as e: + raise RuntimeError(f"Error reading the audio file: {e}") \ No newline at end of file diff --git a/src/input/format_checker.py b/src/input/format_checker.py new file mode 100644 index 0000000..65d4ebb --- /dev/null +++ b/src/input/format_checker.py @@ -0,0 +1,15 @@ +import mimetypes + +def is_supported_format(file_path): + """ + Checks if the audio file is in a supported format. + + Args: + file_path (str): Path to the audio file. + + Returns: + bool: True if supported, False otherwise. + """ + supported_formats = ["audio/mpeg", "audio/wav", "audio/x-aiff", "audio/x-wav", ...] + mime_type, _ = mimetypes.guess_type(file_path) + return mime_type in supported_formats diff --git a/src/preprocessing/__init__.py b/src/preprocessing/__init__.py new file mode 100644 index 0000000..b345187 --- /dev/null +++ b/src/preprocessing/__init__.py @@ -0,0 +1,12 @@ +# __init__.py + +import logging +from datetime import datetime + +# Configure logging +logging.basicConfig( + format='%(asctime)s : %(message)s', + level = logging.INFO +) + +logging.info("freq-split-enhance/preprocessing package has been imported.") \ No newline at end of file diff --git a/src/preprocessing/normalize.py b/src/preprocessing/normalize.py new file mode 100644 index 0000000..f9e4ae3 --- /dev/null +++ b/src/preprocessing/normalize.py @@ -0,0 +1,14 @@ +import librosa +import numpy as np + +def normalize_audio(audio: np.ndarray) -> np.ndarray: + """ + Normalize the audio to a range of [-1, 1]. + + Args: + - audio (np.ndarray): The audio time series to normalize. + + Returns: + - np.ndarray: The normalized audio time series. + """ + return librosa.util.normalize(audio) diff --git a/src/preprocessing/trim.py b/src/preprocessing/trim.py new file mode 100644 index 0000000..c6ecd79 --- /dev/null +++ b/src/preprocessing/trim.py @@ -0,0 +1,17 @@ +import librosa +import numpy as np + +def trim_audio(audio:np.ndarray, sr:int) -> np.ndarray: + """ + Trim leading and trailing silence from the audio. + + Args: + - audio (np.ndarray): The audio time series. + - sr (int): The sample rate of the audio. + + Returns: + - np.ndarray: The trimmed audio time series. + """ + + audio_trimmed, _ = librosa.effects.trim(audio) + return audio_trimmed \ No newline at end of file diff --git a/tests/test_input.py b/tests/test_input.py new file mode 100644 index 0000000..afb4ff0 --- /dev/null +++ b/tests/test_input.py @@ -0,0 +1,13 @@ +import pytest +from src.input.file_reader import read_audio +from src.input.format_checker import is_supported_format + +def test_read_audio(): + file_path = "samples/cafe_crowd_talk.aiff" + audio, sr = read_audio(file_path) + assert len(audio) > 0 + assert sr > 0 + +def test_is_supported_format(): + assert is_supported_format("samples/cafe_crowd_talk.aiff") == True + assert is_supported_format("samples/unsupported_file.txt") == False \ No newline at end of file diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py new file mode 100644 index 0000000..208bb5b --- /dev/null +++ b/tests/test_preprocessing.py @@ -0,0 +1,20 @@ +import pytest +import librosa +from src.preprocessing.normalize import normalize_audio +from src.preprocessing.trim import trim_audio +from src.input.file_reader import read_audio + +def test_normalize_audio(): + file_path = "samples/cafe_crowd_talk.aiff" + audio, _ = read_audio(file_path) + normalized_audio = normalize_audio(audio) + + assert normalized_audio.max() <= 1.0 + assert normalized_audio.min() >= -1.0 + +def test_trim_audio(): + file_path = "samples/cafe_crowd_talk.aiff" + audio, sr = read_audio(file_path) + trimmed_audio = trim_audio(audio, sr) + + assert len(trimmed_audio) <= len(audio) \ No newline at end of file