Merge pull request #5 from joelmathewthomas/feature/separation-demucs

Add Demucs Wrapper and Preprocessing Enhancements with Tests
This commit is contained in:
Joel Mathew Thomas
2024-12-26 20:59:52 +05:30
committed by GitHub
10 changed files with 187 additions and 2 deletions
+35
View File
@@ -1,16 +1,23 @@
absl-py==2.1.0
antlr4-python3-runtime==4.9.3
asttokens==3.0.0
astunparse==1.6.3
audioread==3.0.1
certifi==2024.12.14
cffi==1.17.1
charset-normalizer==3.4.0
cloudpickle==3.1.0
contourpy==1.3.1
cycler==0.12.1
decorator==5.1.1
demucs==4.0.1
dora_search==0.1.12
einops==0.8.0
executing==2.1.0
filelock==3.16.1
flatbuffers==24.12.23
fonttools==4.55.3
fsspec==2024.12.0
gast==0.6.0
google-pasta==0.2.0
grpcio==1.68.1
@@ -18,9 +25,12 @@ h5py==3.12.1
idna==3.10
iniconfig==2.0.0
jedi==0.19.2
Jinja2==3.1.5
joblib==1.4.2
julius==0.2.7
keras==3.7.0
kiwisolver==1.4.8
lameenc==1.7.0
lazy_loader==0.4
libclang==18.1.1
librosa==0.10.2.post1
@@ -31,10 +41,26 @@ MarkupSafe==3.0.2
matplotlib-inline==0.1.7
mdurl==0.1.2
ml-dtypes==0.4.1
mpmath==1.3.0
msgpack==1.1.0
namex==0.0.8
networkx==3.4.2
numba==0.60.0
numpy==2.0.2
nvidia-cublas-cu12==12.4.5.8
nvidia-cuda-cupti-cu12==12.4.127
nvidia-cuda-nvrtc-cu12==12.4.127
nvidia-cuda-runtime-cu12==12.4.127
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.2.1.3
nvidia-curand-cu12==10.3.5.147
nvidia-cusolver-cu12==11.6.1.9
nvidia-cusparse-cu12==12.3.1.170
nvidia-nccl-cu12==2.21.5
nvidia-nvjitlink-cu12==12.4.127
nvidia-nvtx-cu12==12.4.127
omegaconf==2.3.0
openunmix==1.3.0
opt_einsum==3.4.0
optree==0.13.1
packaging==24.2
@@ -53,7 +79,9 @@ Pygments==2.18.0
pyparsing==3.2.0
pytest==8.3.4
python-dateutil==2.9.0.post0
PyYAML==6.0.2
requests==2.32.3
retrying==1.3.4
rich==13.9.4
scikit-learn==1.6.0
scipy==1.14.1
@@ -62,6 +90,8 @@ six==1.17.0
soundfile==0.12.1
soxr==0.5.0.post1
stack-data==0.6.3
submitit==1.5.2
sympy==1.13.1
tensorboard==2.18.0
tensorboard-data-server==0.7.2
tensorflow==2.18.0
@@ -69,7 +99,12 @@ tensorflow-hub==0.16.1
termcolor==2.5.0
tf_keras==2.18.0
threadpoolctl==3.5.0
torch==2.5.1
torchaudio==2.5.1
tqdm==4.67.1
traitlets==5.14.3
treetable==0.2.5
triton==3.1.0
typing_extensions==4.12.2
urllib3==2.3.0
wcwidth==0.2.13
Binary file not shown.
+12
View File
@@ -0,0 +1,12 @@
# __init__.py
import logging
from datetime import datetime
# Configure logging
logging.basicConfig(
format='%(asctime)s : %(message)s',
level = logging.INFO
)
logging.info("freq-split-enhance/separation package has been imported.")
+5
View File
@@ -3,6 +3,11 @@ import tensorflow_hub as hub
import librosa
import numpy as np
import csv
import os
# Disable CUDA
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
model = hub.load('https://tfhub.dev/google/yamnet/1')
+18
View File
@@ -0,0 +1,18 @@
import librosa
def resample(waveform, org_samplerate, new_samplerate):
"""
Reads a waveform and returns a waveform resampled to samplerate.
Args:
waveform: waveform of the target audio.
org_samplerate : original samplerate of the audio.
new_samplerate : samplerate to which the audio is to be resampled.
"""
try:
waveform = librosa.resample(waveform, orig_sr=org_samplerate, target_sr=new_samplerate)
return waveform, new_samplerate
except Exception as e:
raise RuntimeError(f"Error reasmpling the audio file: {e}")
View File
+35
View File
@@ -0,0 +1,35 @@
import subprocess
import os
import soundfile as sf
import tempfile
from pathlib import Path
def separate_audio_with_demucs(input_file, sample_rate, output_dir: str):
"""
Use subprocess to run Demucs separation on an input audio file.
Parameters:
input_file (str): Path to the input audio file to be separated.
output_dir (str): Directory where the separated output will be saved.
"""
# Create the output directory if it does not exist
os.makedirs(output_dir, exist_ok=True)
# Demucs command to separate the temp audio file
command = [ 'demucs', '--out', output_dir, input_file]
try:
# Run the command as a subprocess
subprocess.run(command, check=True)
print(f"Separation completed. Output saved to {output_dir}")
except subprocess.CalledProcessError as e:
print(f"An error occured while running demucs : {e}")
except FileNotFoundError:
print("Demucs is not installed or not found in the system PATH")
finally:
# Cleanup the temporary file
os.remove(input_file)
+27
View File
@@ -1,9 +1,11 @@
import pytest
import librosa
import numpy as np
from src.preprocessing.normalize import normalize_audio
from src.preprocessing.trim import trim_audio
from src.preprocessing.classify import classify_audio
from src.input.file_reader import read_audio
from src.preprocessing.resample import resample
def test_normalize_audio():
file_path = "samples/cafe_crowd_talk.aiff"
@@ -26,3 +28,28 @@ def test_classify():
predicted_class = classify_audio(file_path)
assert predicted_class == expected_class , f"Expected {expected_class}, but got {predicted_class}"
def test_resample():
"""
Test the resample function to ensure it correctly resamples the waveform.
"""
# Generate a synthetic sine wave for testing
org_samplerate = 22050 # Original sample rate
new_samplerate = 16000 # Target sample rate
duration = 1.0 # 1 second duration
t = np.linspace(0, duration, int(org_samplerate * duration), endpoint=False)
waveform = 0.5 * np.sin(2 * np.pi * 440 * t) # 440 Hz sine wave
# Perform resampling
resampled_waveform, output_samplerate = resample(waveform, org_samplerate, new_samplerate)
# Assertions
assert output_samplerate == new_samplerate, "The output sample rate does not match the target sample rate."
assert len(resampled_waveform) == int(new_samplerate * duration), \
"The length of the resampled waveform does not match the expected length."
assert np.allclose(
np.max(resampled_waveform), np.max(waveform), atol=0.1
), "The amplitude of the resampled waveform deviates significantly from the original."
print("Test passed: resample function works as expected.")
+53
View File
@@ -0,0 +1,53 @@
import os
import pytest
import tempfile
import soundfile as sf
from pathlib import Path
from src.input.file_reader import read_audio
from src.preprocessing.trim import trim_audio
from src.preprocessing.resample import resample
from src.separation.demucs_wrapper import separate_audio_with_demucs
def test_demucs_separation_with_preprocessing():
"""
Test to ensure Demucs separation works with preprocessing and creates expected outputs.
"""
input_file = "./samples/am_contra_heart_peripheral.wav"
file_name = Path(input_file).stem
output_path = "/tmp/demucs-test"
waveform, samplerate = read_audio(input_file)
waveform = trim_audio(waveform, samplerate)
# Resample to 41000Hz
if samplerate != 41000:
print("Resampling audio to 41Khz")
waveform, samplerate = resample(waveform, samplerate, 41000)
# Create a temporary file to save the processed audio
temp_audio_path = tempfile.mktemp(suffix=".wav")
# Save the processed audio to the temporary file
sf.write(temp_audio_path, waveform, samplerate)
# Rename the file to orignal name
dir_path = os.path.dirname(temp_audio_path)
new_audio_path = os.path.join(dir_path, f"{file_name}.wav")
os.rename(temp_audio_path, new_audio_path)
separate_audio_with_demucs(new_audio_path, samplerate, output_path)
# Verify the htdemucs folder exists
demucs_dir = Path(output_path) / 'htdemucs'
assert demucs_dir.exists(), "htdemucs directory not found in output path."
# Verify the folder named after the file name (without extension) exists
file_folder = demucs_dir / file_name
assert file_folder.exists(), f"Folder {file_name} not found inside htdemucs directory."
# Verify the expected files exist inside the folder
expected_files = ['bass.wav', 'drums.wav', 'other.wav', 'vocals.wav']
for expected_file in expected_files:
file_path = file_folder / expected_file
assert file_path.exists(), f"Expected file {expected_file} not found in {file_name} folder."