Split audio into chunks to prevent OOM issues
This commit is contained in:
@@ -1,35 +1,83 @@
|
|||||||
import os
|
import os
|
||||||
|
import librosa
|
||||||
import torch
|
import torch
|
||||||
|
import shutil
|
||||||
|
import soundfile as sf
|
||||||
|
import numpy as np
|
||||||
from df.enhance import enhance, init_df, load_audio, save_audio
|
from df.enhance import enhance, init_df, load_audio, save_audio
|
||||||
|
|
||||||
|
def split_audio(audio, sr, chunk_size=5):
|
||||||
|
"""Split audio into chunks of `chunk_size` seconds."""
|
||||||
|
samples_per_chunk = sr * chunk_size
|
||||||
|
return [audio[i:i + samples_per_chunk] for i in range(0, len(audio), samples_per_chunk)]
|
||||||
|
|
||||||
def noisereduce(input_audio_path, output_audio_path, model_path=None):
|
def noisereduce(input_audio_path, output_audio_path, model_path=None):
|
||||||
"""
|
"""
|
||||||
Apply noise reduction using DeepFilterNet.
|
Apply noise reduction using DeepFilterNet with chunking.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
input_audio_path (str): Path to the input noisy audio file.
|
input_audio_path (str): Path to the input noisy audio file.
|
||||||
output_audio_path (str): Path to save the enhanced audio file.
|
output_audio_path (str): Path to save the enhanced audio file.
|
||||||
model_path (str, optional): Path to a custom DeepFilterNet model. Defaults to None (uses the pre-trained model).
|
model_path (str, optional): Path to a custom DeepFilterNet model. Defaults to None (uses pre-trained model).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Path to the enhanced audio file.
|
str: Path to the enhanced audio file.
|
||||||
"""
|
"""
|
||||||
if not os.path.exists(input_audio_path):
|
if not os.path.exists(input_audio_path):
|
||||||
raise FileNotFoundError(f"Input file {input_audio_path} not found")
|
raise FileNotFoundError(f"Input file {input_audio_path} not found")
|
||||||
|
|
||||||
|
output_dir = os.path.dirname(output_audio_path)
|
||||||
|
os.makedirs(output_dir, exist_ok=True) # Ensure the directory exists
|
||||||
|
|
||||||
# Initialize DeepFilterNet model
|
# Initialize DeepFilterNet model
|
||||||
model, df_state, _ = init_df(model_path)
|
model, df_state, _ = init_df(model_path)
|
||||||
|
|
||||||
# Load audio
|
# Load audio
|
||||||
audio, _ = load_audio(input_audio_path, sr=df_state.sr())
|
audio, sr = librosa.load(input_audio_path, sr=None)
|
||||||
|
|
||||||
# Ensure output path exists
|
# Ensure output and chunk directories exist
|
||||||
os.makedirs(os.path.dirname(output_audio_path), exist_ok=True)
|
parent_dir = os.path.dirname(input_audio_path)
|
||||||
|
chunk_dir = os.path.join(parent_dir, "chunks")
|
||||||
|
output_chunk_dir = os.path.join(chunk_dir, "output")
|
||||||
|
os.makedirs(chunk_dir, exist_ok=True)
|
||||||
|
os.makedirs(output_chunk_dir, exist_ok=True)
|
||||||
|
|
||||||
# Apply noise reduction
|
# Split audio into 5-second chunks
|
||||||
enhanced_audio = enhance(model, df_state, audio)
|
chunks = split_audio(audio, sr, chunk_size=5)
|
||||||
|
chunk_paths = []
|
||||||
|
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
chunk_path = os.path.join(chunk_dir, f"chunk_{i}.wav")
|
||||||
|
sf.write(chunk_path, chunk, sr)
|
||||||
|
chunk_paths.append(chunk_path)
|
||||||
|
|
||||||
|
enhanced_chunk_paths = []
|
||||||
|
|
||||||
|
# Process each chunk sequentially to avoid OOM errors
|
||||||
|
for chunk_path in chunk_paths:
|
||||||
|
output_chunk_path = os.path.join(output_chunk_dir, os.path.basename(chunk_path))
|
||||||
|
|
||||||
|
# Load and enhance
|
||||||
|
chunk_audio, _ = load_audio(chunk_path, sr=df_state.sr())
|
||||||
|
enhanced_audio = enhance(model, df_state, chunk_audio)
|
||||||
|
|
||||||
|
# Save enhanced chunk
|
||||||
|
save_audio(output_chunk_path, enhanced_audio, df_state.sr())
|
||||||
|
enhanced_chunk_paths.append(output_chunk_path)
|
||||||
|
|
||||||
|
# Combine enhanced chunks back into a single audio file
|
||||||
|
final_audio = []
|
||||||
|
for chunk_path in enhanced_chunk_paths:
|
||||||
|
chunk_audio, _ = librosa.load(chunk_path, sr=sr) # Keep original sample rate
|
||||||
|
final_audio.append(chunk_audio)
|
||||||
|
|
||||||
|
final_audio = np.concatenate(final_audio, axis=0)
|
||||||
|
|
||||||
|
# Save final enhanced audio
|
||||||
|
sf.write(output_audio_path, final_audio, sr)
|
||||||
|
|
||||||
|
# Clean up temporary chunk files and directories
|
||||||
|
shutil.rmtree(chunk_dir, ignore_errors=True)
|
||||||
|
|
||||||
# Save the enhanced audio
|
|
||||||
save_audio(output_audio_path, enhanced_audio, df_state.sr())
|
|
||||||
|
|
||||||
return output_audio_path
|
return output_audio_path
|
||||||
|
|||||||
Reference in New Issue
Block a user