Merge pull request #5 from joelmathewthomas/feature/separation-demucs
Add Demucs Wrapper and Preprocessing Enhancements with Tests
This commit is contained in:
@@ -1,16 +1,23 @@
|
|||||||
absl-py==2.1.0
|
absl-py==2.1.0
|
||||||
|
antlr4-python3-runtime==4.9.3
|
||||||
asttokens==3.0.0
|
asttokens==3.0.0
|
||||||
astunparse==1.6.3
|
astunparse==1.6.3
|
||||||
audioread==3.0.1
|
audioread==3.0.1
|
||||||
certifi==2024.12.14
|
certifi==2024.12.14
|
||||||
cffi==1.17.1
|
cffi==1.17.1
|
||||||
charset-normalizer==3.4.0
|
charset-normalizer==3.4.0
|
||||||
|
cloudpickle==3.1.0
|
||||||
contourpy==1.3.1
|
contourpy==1.3.1
|
||||||
cycler==0.12.1
|
cycler==0.12.1
|
||||||
decorator==5.1.1
|
decorator==5.1.1
|
||||||
|
demucs==4.0.1
|
||||||
|
dora_search==0.1.12
|
||||||
|
einops==0.8.0
|
||||||
executing==2.1.0
|
executing==2.1.0
|
||||||
|
filelock==3.16.1
|
||||||
flatbuffers==24.12.23
|
flatbuffers==24.12.23
|
||||||
fonttools==4.55.3
|
fonttools==4.55.3
|
||||||
|
fsspec==2024.12.0
|
||||||
gast==0.6.0
|
gast==0.6.0
|
||||||
google-pasta==0.2.0
|
google-pasta==0.2.0
|
||||||
grpcio==1.68.1
|
grpcio==1.68.1
|
||||||
@@ -18,9 +25,12 @@ h5py==3.12.1
|
|||||||
idna==3.10
|
idna==3.10
|
||||||
iniconfig==2.0.0
|
iniconfig==2.0.0
|
||||||
jedi==0.19.2
|
jedi==0.19.2
|
||||||
|
Jinja2==3.1.5
|
||||||
joblib==1.4.2
|
joblib==1.4.2
|
||||||
|
julius==0.2.7
|
||||||
keras==3.7.0
|
keras==3.7.0
|
||||||
kiwisolver==1.4.8
|
kiwisolver==1.4.8
|
||||||
|
lameenc==1.7.0
|
||||||
lazy_loader==0.4
|
lazy_loader==0.4
|
||||||
libclang==18.1.1
|
libclang==18.1.1
|
||||||
librosa==0.10.2.post1
|
librosa==0.10.2.post1
|
||||||
@@ -31,10 +41,26 @@ MarkupSafe==3.0.2
|
|||||||
matplotlib-inline==0.1.7
|
matplotlib-inline==0.1.7
|
||||||
mdurl==0.1.2
|
mdurl==0.1.2
|
||||||
ml-dtypes==0.4.1
|
ml-dtypes==0.4.1
|
||||||
|
mpmath==1.3.0
|
||||||
msgpack==1.1.0
|
msgpack==1.1.0
|
||||||
namex==0.0.8
|
namex==0.0.8
|
||||||
|
networkx==3.4.2
|
||||||
numba==0.60.0
|
numba==0.60.0
|
||||||
numpy==2.0.2
|
numpy==2.0.2
|
||||||
|
nvidia-cublas-cu12==12.4.5.8
|
||||||
|
nvidia-cuda-cupti-cu12==12.4.127
|
||||||
|
nvidia-cuda-nvrtc-cu12==12.4.127
|
||||||
|
nvidia-cuda-runtime-cu12==12.4.127
|
||||||
|
nvidia-cudnn-cu12==9.1.0.70
|
||||||
|
nvidia-cufft-cu12==11.2.1.3
|
||||||
|
nvidia-curand-cu12==10.3.5.147
|
||||||
|
nvidia-cusolver-cu12==11.6.1.9
|
||||||
|
nvidia-cusparse-cu12==12.3.1.170
|
||||||
|
nvidia-nccl-cu12==2.21.5
|
||||||
|
nvidia-nvjitlink-cu12==12.4.127
|
||||||
|
nvidia-nvtx-cu12==12.4.127
|
||||||
|
omegaconf==2.3.0
|
||||||
|
openunmix==1.3.0
|
||||||
opt_einsum==3.4.0
|
opt_einsum==3.4.0
|
||||||
optree==0.13.1
|
optree==0.13.1
|
||||||
packaging==24.2
|
packaging==24.2
|
||||||
@@ -53,7 +79,9 @@ Pygments==2.18.0
|
|||||||
pyparsing==3.2.0
|
pyparsing==3.2.0
|
||||||
pytest==8.3.4
|
pytest==8.3.4
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
|
PyYAML==6.0.2
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
|
retrying==1.3.4
|
||||||
rich==13.9.4
|
rich==13.9.4
|
||||||
scikit-learn==1.6.0
|
scikit-learn==1.6.0
|
||||||
scipy==1.14.1
|
scipy==1.14.1
|
||||||
@@ -62,6 +90,8 @@ six==1.17.0
|
|||||||
soundfile==0.12.1
|
soundfile==0.12.1
|
||||||
soxr==0.5.0.post1
|
soxr==0.5.0.post1
|
||||||
stack-data==0.6.3
|
stack-data==0.6.3
|
||||||
|
submitit==1.5.2
|
||||||
|
sympy==1.13.1
|
||||||
tensorboard==2.18.0
|
tensorboard==2.18.0
|
||||||
tensorboard-data-server==0.7.2
|
tensorboard-data-server==0.7.2
|
||||||
tensorflow==2.18.0
|
tensorflow==2.18.0
|
||||||
@@ -69,7 +99,12 @@ tensorflow-hub==0.16.1
|
|||||||
termcolor==2.5.0
|
termcolor==2.5.0
|
||||||
tf_keras==2.18.0
|
tf_keras==2.18.0
|
||||||
threadpoolctl==3.5.0
|
threadpoolctl==3.5.0
|
||||||
|
torch==2.5.1
|
||||||
|
torchaudio==2.5.1
|
||||||
|
tqdm==4.67.1
|
||||||
traitlets==5.14.3
|
traitlets==5.14.3
|
||||||
|
treetable==0.2.5
|
||||||
|
triton==3.1.0
|
||||||
typing_extensions==4.12.2
|
typing_extensions==4.12.2
|
||||||
urllib3==2.3.0
|
urllib3==2.3.0
|
||||||
wcwidth==0.2.13
|
wcwidth==0.2.13
|
||||||
|
|||||||
Binary file not shown.
@@ -0,0 +1,12 @@
|
|||||||
|
# __init__.py
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='%(asctime)s : %(message)s',
|
||||||
|
level = logging.INFO
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info("freq-split-enhance/separation package has been imported.")
|
||||||
@@ -3,6 +3,11 @@ import tensorflow_hub as hub
|
|||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import csv
|
import csv
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Disable CUDA
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
||||||
|
|
||||||
|
|
||||||
model = hub.load('https://tfhub.dev/google/yamnet/1')
|
model = hub.load('https://tfhub.dev/google/yamnet/1')
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,18 @@
|
|||||||
|
import librosa
|
||||||
|
|
||||||
|
def resample(waveform, org_samplerate, new_samplerate):
|
||||||
|
"""
|
||||||
|
Reads a waveform and returns a waveform resampled to samplerate.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
waveform: waveform of the target audio.
|
||||||
|
org_samplerate : original samplerate of the audio.
|
||||||
|
new_samplerate : samplerate to which the audio is to be resampled.
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
waveform = librosa.resample(waveform, orig_sr=org_samplerate, target_sr=new_samplerate)
|
||||||
|
return waveform, new_samplerate
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Error reasmpling the audio file: {e}")
|
||||||
|
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import soundfile as sf
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def separate_audio_with_demucs(input_file, sample_rate, output_dir: str):
|
||||||
|
"""
|
||||||
|
Use subprocess to run Demucs separation on an input audio file.
|
||||||
|
Parameters:
|
||||||
|
input_file (str): Path to the input audio file to be separated.
|
||||||
|
output_dir (str): Directory where the separated output will be saved.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# Create the output directory if it does not exist
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Demucs command to separate the temp audio file
|
||||||
|
command = [ 'demucs', '--out', output_dir, input_file]
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Run the command as a subprocess
|
||||||
|
subprocess.run(command, check=True)
|
||||||
|
print(f"Separation completed. Output saved to {output_dir}")
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"An error occured while running demucs : {e}")
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("Demucs is not installed or not found in the system PATH")
|
||||||
|
finally:
|
||||||
|
# Cleanup the temporary file
|
||||||
|
os.remove(input_file)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,9 +1,11 @@
|
|||||||
import pytest
|
import pytest
|
||||||
import librosa
|
import librosa
|
||||||
|
import numpy as np
|
||||||
from src.preprocessing.normalize import normalize_audio
|
from src.preprocessing.normalize import normalize_audio
|
||||||
from src.preprocessing.trim import trim_audio
|
from src.preprocessing.trim import trim_audio
|
||||||
from src.preprocessing.classify import classify_audio
|
from src.preprocessing.classify import classify_audio
|
||||||
from src.input.file_reader import read_audio
|
from src.input.file_reader import read_audio
|
||||||
|
from src.preprocessing.resample import resample
|
||||||
|
|
||||||
def test_normalize_audio():
|
def test_normalize_audio():
|
||||||
file_path = "samples/cafe_crowd_talk.aiff"
|
file_path = "samples/cafe_crowd_talk.aiff"
|
||||||
@@ -26,3 +28,28 @@ def test_classify():
|
|||||||
predicted_class = classify_audio(file_path)
|
predicted_class = classify_audio(file_path)
|
||||||
|
|
||||||
assert predicted_class == expected_class , f"Expected {expected_class}, but got {predicted_class}"
|
assert predicted_class == expected_class , f"Expected {expected_class}, but got {predicted_class}"
|
||||||
|
|
||||||
|
def test_resample():
|
||||||
|
"""
|
||||||
|
Test the resample function to ensure it correctly resamples the waveform.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Generate a synthetic sine wave for testing
|
||||||
|
org_samplerate = 22050 # Original sample rate
|
||||||
|
new_samplerate = 16000 # Target sample rate
|
||||||
|
duration = 1.0 # 1 second duration
|
||||||
|
t = np.linspace(0, duration, int(org_samplerate * duration), endpoint=False)
|
||||||
|
waveform = 0.5 * np.sin(2 * np.pi * 440 * t) # 440 Hz sine wave
|
||||||
|
|
||||||
|
# Perform resampling
|
||||||
|
resampled_waveform, output_samplerate = resample(waveform, org_samplerate, new_samplerate)
|
||||||
|
|
||||||
|
# Assertions
|
||||||
|
assert output_samplerate == new_samplerate, "The output sample rate does not match the target sample rate."
|
||||||
|
assert len(resampled_waveform) == int(new_samplerate * duration), \
|
||||||
|
"The length of the resampled waveform does not match the expected length."
|
||||||
|
assert np.allclose(
|
||||||
|
np.max(resampled_waveform), np.max(waveform), atol=0.1
|
||||||
|
), "The amplitude of the resampled waveform deviates significantly from the original."
|
||||||
|
|
||||||
|
print("Test passed: resample function works as expected.")
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
import tempfile
|
||||||
|
import soundfile as sf
|
||||||
|
from pathlib import Path
|
||||||
|
from src.input.file_reader import read_audio
|
||||||
|
from src.preprocessing.trim import trim_audio
|
||||||
|
from src.preprocessing.resample import resample
|
||||||
|
from src.separation.demucs_wrapper import separate_audio_with_demucs
|
||||||
|
|
||||||
|
|
||||||
|
def test_demucs_separation_with_preprocessing():
|
||||||
|
"""
|
||||||
|
Test to ensure Demucs separation works with preprocessing and creates expected outputs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
input_file = "./samples/am_contra_heart_peripheral.wav"
|
||||||
|
file_name = Path(input_file).stem
|
||||||
|
output_path = "/tmp/demucs-test"
|
||||||
|
waveform, samplerate = read_audio(input_file)
|
||||||
|
waveform = trim_audio(waveform, samplerate)
|
||||||
|
|
||||||
|
# Resample to 41000Hz
|
||||||
|
if samplerate != 41000:
|
||||||
|
print("Resampling audio to 41Khz")
|
||||||
|
waveform, samplerate = resample(waveform, samplerate, 41000)
|
||||||
|
|
||||||
|
# Create a temporary file to save the processed audio
|
||||||
|
temp_audio_path = tempfile.mktemp(suffix=".wav")
|
||||||
|
|
||||||
|
# Save the processed audio to the temporary file
|
||||||
|
sf.write(temp_audio_path, waveform, samplerate)
|
||||||
|
|
||||||
|
# Rename the file to orignal name
|
||||||
|
dir_path = os.path.dirname(temp_audio_path)
|
||||||
|
new_audio_path = os.path.join(dir_path, f"{file_name}.wav")
|
||||||
|
os.rename(temp_audio_path, new_audio_path)
|
||||||
|
|
||||||
|
separate_audio_with_demucs(new_audio_path, samplerate, output_path)
|
||||||
|
|
||||||
|
# Verify the htdemucs folder exists
|
||||||
|
demucs_dir = Path(output_path) / 'htdemucs'
|
||||||
|
assert demucs_dir.exists(), "htdemucs directory not found in output path."
|
||||||
|
|
||||||
|
# Verify the folder named after the file name (without extension) exists
|
||||||
|
file_folder = demucs_dir / file_name
|
||||||
|
assert file_folder.exists(), f"Folder {file_name} not found inside htdemucs directory."
|
||||||
|
|
||||||
|
# Verify the expected files exist inside the folder
|
||||||
|
expected_files = ['bass.wav', 'drums.wav', 'other.wav', 'vocals.wav']
|
||||||
|
for expected_file in expected_files:
|
||||||
|
file_path = file_folder / expected_file
|
||||||
|
assert file_path.exists(), f"Expected file {expected_file} not found in {file_name} folder."
|
||||||
Reference in New Issue
Block a user