Merge pull request #5 from joelmathewthomas/feature/separation-demucs

Add Demucs Wrapper and Preprocessing Enhancements with Tests
2024-12-26 20:59:52 +05:30
parent 53538230c4 a7176a4476
commit 9d80012cc9
10 changed files with 187 additions and 2 deletions
@@ -1,16 +1,23 @@
 absl-py==2.1.0
 antlr4-python3-runtime==4.9.3
 asttokens==3.0.0
 astunparse==1.6.3
 audioread==3.0.1
 certifi==2024.12.14
 cffi==1.17.1
 charset-normalizer==3.4.0
 cloudpickle==3.1.0
 contourpy==1.3.1
 cycler==0.12.1
 decorator==5.1.1
 demucs==4.0.1
 dora_search==0.1.12
 einops==0.8.0
 executing==2.1.0
 filelock==3.16.1
 flatbuffers==24.12.23
 fonttools==4.55.3
 fsspec==2024.12.0
 gast==0.6.0
 google-pasta==0.2.0
 grpcio==1.68.1
@@ -18,9 +25,12 @@ h5py==3.12.1
 idna==3.10
 iniconfig==2.0.0
 jedi==0.19.2
 Jinja2==3.1.5
 joblib==1.4.2
 julius==0.2.7
 keras==3.7.0
 kiwisolver==1.4.8
 lameenc==1.7.0
 lazy_loader==0.4
 libclang==18.1.1
 librosa==0.10.2.post1
@@ -31,10 +41,26 @@ MarkupSafe==3.0.2
 matplotlib-inline==0.1.7
 mdurl==0.1.2
 ml-dtypes==0.4.1
 mpmath==1.3.0
 msgpack==1.1.0
 namex==0.0.8
 networkx==3.4.2
 numba==0.60.0
 numpy==2.0.2
 nvidia-cublas-cu12==12.4.5.8
 nvidia-cuda-cupti-cu12==12.4.127
 nvidia-cuda-nvrtc-cu12==12.4.127
 nvidia-cuda-runtime-cu12==12.4.127
 nvidia-cudnn-cu12==9.1.0.70
 nvidia-cufft-cu12==11.2.1.3
 nvidia-curand-cu12==10.3.5.147
 nvidia-cusolver-cu12==11.6.1.9
 nvidia-cusparse-cu12==12.3.1.170
 nvidia-nccl-cu12==2.21.5
 nvidia-nvjitlink-cu12==12.4.127
 nvidia-nvtx-cu12==12.4.127
 omegaconf==2.3.0
 openunmix==1.3.0
 opt_einsum==3.4.0
 optree==0.13.1
 packaging==24.2
@@ -53,7 +79,9 @@ Pygments==2.18.0
 pyparsing==3.2.0
 pytest==8.3.4
 python-dateutil==2.9.0.post0
 PyYAML==6.0.2
 requests==2.32.3
 retrying==1.3.4
 rich==13.9.4
 scikit-learn==1.6.0
 scipy==1.14.1
@@ -62,6 +90,8 @@ six==1.17.0
 soundfile==0.12.1
 soxr==0.5.0.post1
 stack-data==0.6.3
 submitit==1.5.2
 sympy==1.13.1
 tensorboard==2.18.0
 tensorboard-data-server==0.7.2
 tensorflow==2.18.0
@@ -69,7 +99,12 @@ tensorflow-hub==0.16.1
 termcolor==2.5.0
 tf_keras==2.18.0
 threadpoolctl==3.5.0
 torch==2.5.1
 torchaudio==2.5.1
 tqdm==4.67.1
 traitlets==5.14.3
 treetable==0.2.5
 triton==3.1.0
 typing_extensions==4.12.2
 urllib3==2.3.0
 wcwidth==0.2.13
@@ -0,0 +1,12 @@
 # __init__.py
 import logging
 from datetime import datetime
 # Configure logging
 logging.basicConfig(
    format='%(asctime)s : %(message)s',
    level = logging.INFO
 )
 logging.info("freq-split-enhance/separation package has been imported.")
@@ -3,6 +3,11 @@ import tensorflow_hub as hub
 import librosa
 import numpy as np
 import csv
 import os
 # Disable CUDA
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 model = hub.load('https://tfhub.dev/google/yamnet/1')
@@ -0,0 +1,18 @@
 import librosa
 def resample(waveform, org_samplerate, new_samplerate):
    """
    Reads a waveform and returns a waveform resampled to samplerate.
    Args:
        waveform: waveform of the target audio.
        org_samplerate : original samplerate of the audio.
        new_samplerate : samplerate to which the audio is to be resampled.
    """
    try:
        waveform = librosa.resample(waveform, orig_sr=org_samplerate, target_sr=new_samplerate)
        return waveform, new_samplerate
    except Exception as e: 
        raise RuntimeError(f"Error reasmpling the audio file: {e}")
@@ -0,0 +1,35 @@
 import subprocess
 import os
 import soundfile as sf
 import tempfile
 from pathlib import Path
 def separate_audio_with_demucs(input_file, sample_rate, output_dir: str):
    """
    Use subprocess to run Demucs separation on an input audio file.
    Parameters:
        input_file (str): Path to the input audio file to be separated.
        output_dir (str): Directory where the separated output will be saved.
    """
    # Create the output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)
    # Demucs command to separate the temp audio file
    command = [ 'demucs', '--out', output_dir, input_file]
    try:
        # Run the command as a subprocess
        subprocess.run(command, check=True)
        print(f"Separation completed. Output saved to {output_dir}")
    except subprocess.CalledProcessError as e:
        print(f"An error occured while running demucs : {e}")
    except FileNotFoundError:
        print("Demucs is not installed or not found in the system PATH")
    finally:
        # Cleanup the temporary file
        os.remove(input_file)
@@ -1,9 +1,11 @@
 import pytest
 import librosa
 import numpy as np
 from src.preprocessing.normalize import normalize_audio
 from src.preprocessing.trim import trim_audio
 from src.preprocessing.classify import classify_audio
 from src.input.file_reader import read_audio
 from src.preprocessing.resample import resample
 def test_normalize_audio():
    file_path = "samples/cafe_crowd_talk.aiff"
@@ -26,3 +28,28 @@ def test_classify():
    predicted_class = classify_audio(file_path)
    assert predicted_class == expected_class , f"Expected {expected_class}, but got {predicted_class}"
 def test_resample():
    """
    Test the resample function to ensure it correctly resamples the waveform.
    """
    # Generate a synthetic sine wave for testing
    org_samplerate = 22050  # Original sample rate
    new_samplerate = 16000  # Target sample rate
    duration = 1.0  # 1 second duration
    t = np.linspace(0, duration, int(org_samplerate * duration), endpoint=False)
    waveform = 0.5 * np.sin(2 * np.pi * 440 * t)  # 440 Hz sine wave
    # Perform resampling
    resampled_waveform, output_samplerate = resample(waveform, org_samplerate, new_samplerate)
    # Assertions
    assert output_samplerate == new_samplerate, "The output sample rate does not match the target sample rate."
    assert len(resampled_waveform) == int(new_samplerate * duration), \
        "The length of the resampled waveform does not match the expected length."
    assert np.allclose(
        np.max(resampled_waveform), np.max(waveform), atol=0.1
    ), "The amplitude of the resampled waveform deviates significantly from the original."
    print("Test passed: resample function works as expected.")
@@ -0,0 +1,53 @@
 import os
 import pytest
 import tempfile
 import soundfile as sf
 from pathlib import Path
 from src.input.file_reader import read_audio
 from src.preprocessing.trim import trim_audio
 from src.preprocessing.resample import resample
 from src.separation.demucs_wrapper import separate_audio_with_demucs
 def test_demucs_separation_with_preprocessing():
    """
    Test to ensure Demucs separation works with preprocessing and creates expected outputs.
    """
    input_file = "./samples/am_contra_heart_peripheral.wav"
    file_name = Path(input_file).stem
    output_path = "/tmp/demucs-test"
    waveform, samplerate = read_audio(input_file)
    waveform = trim_audio(waveform, samplerate)
    # Resample to 41000Hz
    if samplerate != 41000:
        print("Resampling audio to 41Khz")
        waveform, samplerate = resample(waveform, samplerate, 41000)
    # Create a temporary file to save the processed audio
    temp_audio_path = tempfile.mktemp(suffix=".wav")
    # Save the processed audio to the temporary file
    sf.write(temp_audio_path, waveform, samplerate)
    # Rename the file to orignal name
    dir_path = os.path.dirname(temp_audio_path)
    new_audio_path = os.path.join(dir_path, f"{file_name}.wav")
    os.rename(temp_audio_path, new_audio_path)
    separate_audio_with_demucs(new_audio_path, samplerate, output_path)
    # Verify the htdemucs folder exists
    demucs_dir = Path(output_path) / 'htdemucs'
    assert demucs_dir.exists(), "htdemucs directory not found in output path."
    # Verify the folder named after the file name (without extension) exists
    file_folder = demucs_dir / file_name
    assert file_folder.exists(), f"Folder {file_name} not found inside htdemucs directory."
    # Verify the expected files exist inside the folder
    expected_files = ['bass.wav', 'drums.wav', 'other.wav', 'vocals.wav']
    for expected_file in expected_files:
        file_path = file_folder / expected_file
        assert file_path.exists(), f"Expected file {expected_file} not found in {file_name} folder."