diff --git a/requirements.txt b/requirements.txt index ac44dc3..27debed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,23 @@ absl-py==2.1.0 +antlr4-python3-runtime==4.9.3 asttokens==3.0.0 astunparse==1.6.3 audioread==3.0.1 certifi==2024.12.14 cffi==1.17.1 charset-normalizer==3.4.0 +cloudpickle==3.1.0 contourpy==1.3.1 cycler==0.12.1 decorator==5.1.1 +demucs==4.0.1 +dora_search==0.1.12 +einops==0.8.0 executing==2.1.0 +filelock==3.16.1 flatbuffers==24.12.23 fonttools==4.55.3 +fsspec==2024.12.0 gast==0.6.0 google-pasta==0.2.0 grpcio==1.68.1 @@ -18,9 +25,12 @@ h5py==3.12.1 idna==3.10 iniconfig==2.0.0 jedi==0.19.2 +Jinja2==3.1.5 joblib==1.4.2 +julius==0.2.7 keras==3.7.0 kiwisolver==1.4.8 +lameenc==1.7.0 lazy_loader==0.4 libclang==18.1.1 librosa==0.10.2.post1 @@ -31,10 +41,26 @@ MarkupSafe==3.0.2 matplotlib-inline==0.1.7 mdurl==0.1.2 ml-dtypes==0.4.1 +mpmath==1.3.0 msgpack==1.1.0 namex==0.0.8 +networkx==3.4.2 numba==0.60.0 numpy==2.0.2 +nvidia-cublas-cu12==12.4.5.8 +nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cufft-cu12==11.2.1.3 +nvidia-curand-cu12==10.3.5.147 +nvidia-cusolver-cu12==11.6.1.9 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-nccl-cu12==2.21.5 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nvtx-cu12==12.4.127 +omegaconf==2.3.0 +openunmix==1.3.0 opt_einsum==3.4.0 optree==0.13.1 packaging==24.2 @@ -53,7 +79,9 @@ Pygments==2.18.0 pyparsing==3.2.0 pytest==8.3.4 python-dateutil==2.9.0.post0 +PyYAML==6.0.2 requests==2.32.3 +retrying==1.3.4 rich==13.9.4 scikit-learn==1.6.0 scipy==1.14.1 @@ -62,6 +90,8 @@ six==1.17.0 soundfile==0.12.1 soxr==0.5.0.post1 stack-data==0.6.3 +submitit==1.5.2 +sympy==1.13.1 tensorboard==2.18.0 tensorboard-data-server==0.7.2 tensorflow==2.18.0 @@ -69,7 +99,12 @@ tensorflow-hub==0.16.1 termcolor==2.5.0 tf_keras==2.18.0 threadpoolctl==3.5.0 +torch==2.5.1 +torchaudio==2.5.1 +tqdm==4.67.1 traitlets==5.14.3 +treetable==0.2.5 +triton==3.1.0 typing_extensions==4.12.2 urllib3==2.3.0 wcwidth==0.2.13 diff --git a/samples/am_contra_heart_peripheral.wav b/samples/am_contra_heart_peripheral.wav new file mode 100644 index 0000000..aa28b77 Binary files /dev/null and b/samples/am_contra_heart_peripheral.wav differ diff --git a/src/__init__.py b/src/__init__.py index e69de29..637f57f 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -0,0 +1,12 @@ +# __init__.py + +import logging +from datetime import datetime + +# Configure logging +logging.basicConfig( + format='%(asctime)s : %(message)s', + level = logging.INFO +) + +logging.info("freq-split-enhance/separation package has been imported.") \ No newline at end of file diff --git a/src/preprocessing/classify.py b/src/preprocessing/classify.py index 59eadd6..05363dc 100644 --- a/src/preprocessing/classify.py +++ b/src/preprocessing/classify.py @@ -3,6 +3,11 @@ import tensorflow_hub as hub import librosa import numpy as np import csv +import os + +# Disable CUDA +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + model = hub.load('https://tfhub.dev/google/yamnet/1') diff --git a/src/preprocessing/resample.py b/src/preprocessing/resample.py new file mode 100644 index 0000000..24352df --- /dev/null +++ b/src/preprocessing/resample.py @@ -0,0 +1,18 @@ +import librosa + +def resample(waveform, org_samplerate, new_samplerate): + """ + Reads a waveform and returns a waveform resampled to samplerate. + + Args: + waveform: waveform of the target audio. + org_samplerate : original samplerate of the audio. + new_samplerate : samplerate to which the audio is to be resampled. + """ + + try: + waveform = librosa.resample(waveform, orig_sr=org_samplerate, target_sr=new_samplerate) + return waveform, new_samplerate + except Exception as e: + raise RuntimeError(f"Error reasmpling the audio file: {e}") + diff --git a/src/separation/__init__.py b/src/separation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/separation/demucs_wrapper.py b/src/separation/demucs_wrapper.py new file mode 100644 index 0000000..0b7fc5b --- /dev/null +++ b/src/separation/demucs_wrapper.py @@ -0,0 +1,35 @@ +import subprocess +import os +import soundfile as sf +import tempfile +from pathlib import Path + +def separate_audio_with_demucs(input_file, sample_rate, output_dir: str): + """ + Use subprocess to run Demucs separation on an input audio file. + Parameters: + input_file (str): Path to the input audio file to be separated. + output_dir (str): Directory where the separated output will be saved. + """ + + + # Create the output directory if it does not exist + os.makedirs(output_dir, exist_ok=True) + + # Demucs command to separate the temp audio file + command = [ 'demucs', '--out', output_dir, input_file] + + try: + # Run the command as a subprocess + subprocess.run(command, check=True) + print(f"Separation completed. Output saved to {output_dir}") + except subprocess.CalledProcessError as e: + print(f"An error occured while running demucs : {e}") + except FileNotFoundError: + print("Demucs is not installed or not found in the system PATH") + finally: + # Cleanup the temporary file + os.remove(input_file) + + + \ No newline at end of file diff --git a/tests/test_input.py b/tests/test_input.py index afb4ff0..6c1c9c6 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -1,5 +1,5 @@ import pytest -from src.input.file_reader import read_audio +from src.input.file_reader import read_audio from src.input.format_checker import is_supported_format def test_read_audio(): diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py index 0a57873..e5baffb 100644 --- a/tests/test_preprocessing.py +++ b/tests/test_preprocessing.py @@ -1,9 +1,11 @@ import pytest import librosa +import numpy as np from src.preprocessing.normalize import normalize_audio from src.preprocessing.trim import trim_audio from src.preprocessing.classify import classify_audio from src.input.file_reader import read_audio +from src.preprocessing.resample import resample def test_normalize_audio(): file_path = "samples/cafe_crowd_talk.aiff" @@ -25,4 +27,29 @@ def test_classify(): expected_class = "Speech" predicted_class = classify_audio(file_path) - assert predicted_class == expected_class , f"Expected {expected_class}, but got {predicted_class}" \ No newline at end of file + assert predicted_class == expected_class , f"Expected {expected_class}, but got {predicted_class}" + +def test_resample(): + """ + Test the resample function to ensure it correctly resamples the waveform. + """ + + # Generate a synthetic sine wave for testing + org_samplerate = 22050 # Original sample rate + new_samplerate = 16000 # Target sample rate + duration = 1.0 # 1 second duration + t = np.linspace(0, duration, int(org_samplerate * duration), endpoint=False) + waveform = 0.5 * np.sin(2 * np.pi * 440 * t) # 440 Hz sine wave + + # Perform resampling + resampled_waveform, output_samplerate = resample(waveform, org_samplerate, new_samplerate) + + # Assertions + assert output_samplerate == new_samplerate, "The output sample rate does not match the target sample rate." + assert len(resampled_waveform) == int(new_samplerate * duration), \ + "The length of the resampled waveform does not match the expected length." + assert np.allclose( + np.max(resampled_waveform), np.max(waveform), atol=0.1 + ), "The amplitude of the resampled waveform deviates significantly from the original." + + print("Test passed: resample function works as expected.") \ No newline at end of file diff --git a/tests/test_separation.py b/tests/test_separation.py new file mode 100644 index 0000000..a297292 --- /dev/null +++ b/tests/test_separation.py @@ -0,0 +1,53 @@ +import os +import pytest +import tempfile +import soundfile as sf +from pathlib import Path +from src.input.file_reader import read_audio +from src.preprocessing.trim import trim_audio +from src.preprocessing.resample import resample +from src.separation.demucs_wrapper import separate_audio_with_demucs + + +def test_demucs_separation_with_preprocessing(): + """ + Test to ensure Demucs separation works with preprocessing and creates expected outputs. + """ + + input_file = "./samples/am_contra_heart_peripheral.wav" + file_name = Path(input_file).stem + output_path = "/tmp/demucs-test" + waveform, samplerate = read_audio(input_file) + waveform = trim_audio(waveform, samplerate) + + # Resample to 41000Hz + if samplerate != 41000: + print("Resampling audio to 41Khz") + waveform, samplerate = resample(waveform, samplerate, 41000) + + # Create a temporary file to save the processed audio + temp_audio_path = tempfile.mktemp(suffix=".wav") + + # Save the processed audio to the temporary file + sf.write(temp_audio_path, waveform, samplerate) + + # Rename the file to orignal name + dir_path = os.path.dirname(temp_audio_path) + new_audio_path = os.path.join(dir_path, f"{file_name}.wav") + os.rename(temp_audio_path, new_audio_path) + + separate_audio_with_demucs(new_audio_path, samplerate, output_path) + + # Verify the htdemucs folder exists + demucs_dir = Path(output_path) / 'htdemucs' + assert demucs_dir.exists(), "htdemucs directory not found in output path." + + # Verify the folder named after the file name (without extension) exists + file_folder = demucs_dir / file_name + assert file_folder.exists(), f"Folder {file_name} not found inside htdemucs directory." + + # Verify the expected files exist inside the folder + expected_files = ['bass.wav', 'drums.wav', 'other.wav', 'vocals.wav'] + for expected_file in expected_files: + file_path = file_folder / expected_file + assert file_path.exists(), f"Expected file {expected_file} not found in {file_name} folder."