Split audio into chunks to prevent OOM issues

This commit is contained in:
Joel Mathew Thomas
2025-02-26 18:45:41 +05:30
parent 1a2737d06f
commit 4da658d064
@@ -1,35 +1,83 @@
import os
import librosa
import torch
import shutil
import soundfile as sf
import numpy as np
from df.enhance import enhance, init_df, load_audio, save_audio
def split_audio(audio, sr, chunk_size=5):
"""Split audio into chunks of `chunk_size` seconds."""
samples_per_chunk = sr * chunk_size
return [audio[i:i + samples_per_chunk] for i in range(0, len(audio), samples_per_chunk)]
def noisereduce(input_audio_path, output_audio_path, model_path=None):
"""
Apply noise reduction using DeepFilterNet.
Apply noise reduction using DeepFilterNet with chunking.
Args:
input_audio_path (str): Path to the input noisy audio file.
output_audio_path (str): Path to save the enhanced audio file.
model_path (str, optional): Path to a custom DeepFilterNet model. Defaults to None (uses the pre-trained model).
model_path (str, optional): Path to a custom DeepFilterNet model. Defaults to None (uses pre-trained model).
Returns:
str: Path to the enhanced audio file.
"""
if not os.path.exists(input_audio_path):
raise FileNotFoundError(f"Input file {input_audio_path} not found")
output_dir = os.path.dirname(output_audio_path)
os.makedirs(output_dir, exist_ok=True) # Ensure the directory exists
# Initialize DeepFilterNet model
model, df_state, _ = init_df(model_path)
# Load audio
audio, _ = load_audio(input_audio_path, sr=df_state.sr())
audio, sr = librosa.load(input_audio_path, sr=None)
# Ensure output path exists
os.makedirs(os.path.dirname(output_audio_path), exist_ok=True)
# Ensure output and chunk directories exist
parent_dir = os.path.dirname(input_audio_path)
chunk_dir = os.path.join(parent_dir, "chunks")
output_chunk_dir = os.path.join(chunk_dir, "output")
os.makedirs(chunk_dir, exist_ok=True)
os.makedirs(output_chunk_dir, exist_ok=True)
# Apply noise reduction
enhanced_audio = enhance(model, df_state, audio)
# Split audio into 5-second chunks
chunks = split_audio(audio, sr, chunk_size=5)
chunk_paths = []
for i, chunk in enumerate(chunks):
chunk_path = os.path.join(chunk_dir, f"chunk_{i}.wav")
sf.write(chunk_path, chunk, sr)
chunk_paths.append(chunk_path)
enhanced_chunk_paths = []
# Process each chunk sequentially to avoid OOM errors
for chunk_path in chunk_paths:
output_chunk_path = os.path.join(output_chunk_dir, os.path.basename(chunk_path))
# Load and enhance
chunk_audio, _ = load_audio(chunk_path, sr=df_state.sr())
enhanced_audio = enhance(model, df_state, chunk_audio)
# Save enhanced chunk
save_audio(output_chunk_path, enhanced_audio, df_state.sr())
enhanced_chunk_paths.append(output_chunk_path)
# Combine enhanced chunks back into a single audio file
final_audio = []
for chunk_path in enhanced_chunk_paths:
chunk_audio, _ = librosa.load(chunk_path, sr=sr) # Keep original sample rate
final_audio.append(chunk_audio)
final_audio = np.concatenate(final_audio, axis=0)
# Save final enhanced audio
sf.write(output_audio_path, final_audio, sr)
# Clean up temporary chunk files and directories
shutil.rmtree(chunk_dir, ignore_errors=True)
# Save the enhanced audio
save_audio(output_audio_path, enhanced_audio, df_state.sr())
return output_audio_path