Merge pull request #29 from joelmathewthomas/feature/api

feature/api: Add endpoints
2025-02-26 20:23:06 +05:30
parent 9561f973aa e1dac18802
commit 7ddb1339fd
21 changed files with 525 additions and 202 deletions
@@ -1,11 +1,134 @@
-from __future__ import absolute_import, unicode_literals
+import os
+import shutil
+from pathlib import Path
 from celery import shared_task
-from src.input.file_reader import read_audio #export PYTHONPATH="/home/karthikeyan/code/MainProject/freq-split-enhance:$PYTHONPATH" for exporting the module 
+from freqsplit.input.file_reader import read_audio
+from freqsplit.preprocessing.classify import classify_audio
+from freqsplit.preprocessing.normalize import normalize_audio
+from freqsplit.preprocessing.trim import trim_audio 
+from freqsplit.preprocessing.resample import resample
+from freqsplit.postprocessing.audio_writer import export_audio
+from freqsplit.separation.demucs_wrapper import separate_audio_with_demucs
+from freqsplit.refinement.deepfilternet_wrapper import noisereduce

-import time

@shared_task
-def process_uploaded_file(file_path):
-    # Simulate long-running task
-    read_audio(file_path=file_path)
-    return 'File processed'
+def save_and_classify(file_path, file_content):
+    """Save uploaded file asynchronously and classify the audio file"""
+    with open(file_path, 'wb') as destination:
+        destination.write(file_content)
+        
+    # Read the saved audio file
+    _, org_sr = read_audio(file_path) # Get original sampling rate
+    waveform, sr = read_audio(file_path, 32000, mono=True)
+    
+    # Classify the audio
+    audio_class = classify_audio(waveform, sr)
+    
+    return audio_class, org_sr
+
+@shared_task
+def normalize_audio_task(file_path):
+    """Celery task to normalize audio synchronously"""
+    try:
+        audio, sr = read_audio(file_path) # Read audio
+        normalized_audio = normalize_audio(audio) # Normalize
+        export_audio(normalized_audio, file_path, sr) # Save file
+        return True
+    except Exception as e:
+        return False
+
+@shared_task
+def trim_audio_task(file_path):
+    """Celery task to trim audio synchronously"""
+    try:
+       audio, sr = read_audio(file_path)
+       trimmed_audio = trim_audio(audio, sr)
+       export_audio(trimmed_audio, file_path, sr)
+       return True
+    except Exception as e:
+        return False
+    
+@shared_task
+def resample_audio_task(file_path, sr):
+    """Celery task to resample the audio asynchronously"""
+    try:
+         audio, org_sr = read_audio(file_path)
+         resampled_audio, sr = resample(audio, org_sr, eval(sr))
+         export_audio(resampled_audio, file_path, sr)
+         return True
+    except Exception as e:
+        return False
+
+@shared_task
+def music_separation_task(file_path):
+    """Celery task to separate music audio into sources"""
+    file_path = Path(file_path)
+    print("File path is ", file_path)
+    
+    # Determine the base directory (output path)
+    output_path = file_path.parent
+    
+    # Run Demucs separation
+    separate_audio_with_demucs(str(file_path), str(output_path))
+    
+    # Define expected output dir
+    demucs_dir = output_path / 'htdemucs'
+    file_folder = demucs_dir / file_path.stem 
+    
+    if not file_folder.exists():
+        raise RuntimeError(f"Demucs output folder not found: {file_folder}")
+
+    # Expected output files
+    expected_files = ["bass.wav", "drums.wav", "other.wav", "vocals.wav"]
+
+    # Create "sources" directory to store separated components
+    sources_dir = output_path / "sources"
+    sources_dir.mkdir(exist_ok=True)
+    
+    # Move separate files to output_path and replace original file with vocals.wav, and move other files into sources/
+    try:
+        vocals_path = file_folder / "vocals.wav"
+        if not vocals_path.exists():
+            raise RuntimeError("Vocals file not found in Demucs output")
+        
+        # Replace original file with vocals.wav while keeping original name
+        shutil.move(str(vocals_path), str(file_path))
+        
+        # Move other separated files to the "sources" directory
+        for expected_file in expected_files:
+            src_file = file_folder / expected_file
+            if src_file.exists() and expected_file != "vocals.wav":
+                shutil.move(str(src_file), str(sources_dir / expected_file))
+            
+        # Cleanup: Remove htedemucs directory
+        shutil.rmtree(str(demucs_dir))
+        
+        return True
+    
+    except Exception as e:
+        return False 
+   
+@shared_task
+def noisereduce_task(file_path):
+    """Celery task to remove noise from audio"""
+    file_path = Path(file_path)
+    
+    # Run noisereduction
+    try:
+        noisereduce(file_path, file_path)
+        return True
+    except Exception as e:
+        return False
+
+@shared_task
+def cleanup_task(file_path):
+    """Celery task to cleanup files"""
+    file_path = Path(file_path)
+    
+    # Cleanup
+    try:
+        shutil.rmtree(os.path.dirname(file_path))
+        return True
+    except Exception as e:
+        return False
@@ -0,0 +1,16 @@
+# api/utils.py
+import os
+from rest_framework import status
+
+def get_audio_file_path(request, base_dir):
+    """Returns the full path to the audio file inside the given UUID folder."""
+    
+    file_uuid = request.data.get("file_uuid")
+    if not file_uuid:
+        return False, "Missing file_uuid", status.HTTP_400_BAD_REQUEST
+    
+    dir_path = os.path.join(base_dir, file_uuid)
+    
+    if not os.path.exists(dir_path) or not os.listdir(dir_path):
+        return False, "No file found", status.HTTP_500_INTERNAL_SERVER_ERROR
+    return True, os.path.join(dir_path, os.listdir(dir_path)[0]), status.HTTP_200_OK  # Assumes only one file exists
@@ -1,14 +1,28 @@
 import os
+import uuid
+import zipfile
+from django.http import FileResponse, HttpResponse
 from rest_framework.decorators import api_view
 from rest_framework.response import Response
 from rest_framework import status
-from freqsplit.preprocessing.classify import classify_audio
+from .utils import get_audio_file_path
+from .tasks import save_and_classify
+from .tasks import normalize_audio_task
+from .tasks import trim_audio_task
+from .tasks import resample_audio_task
+from .tasks import music_separation_task
+from .tasks import noisereduce_task
+from .tasks import cleanup_task
+from freqsplit.input.format_checker import is_supported_format

-UPLOAD_DIR = "/tmp/freq-split-enhance"
+UPLOAD_DIR = "/tmp/freqsplit"

 # Ensure the temp directory exists
 os.makedirs(UPLOAD_DIR, exist_ok=True)

+#
+
+# Endpoint to upload audio and classify it to audio_class
@api_view(['POST'])
 def upload_audio(request):
    """Handles audio file upload and saves it to /tmp/freq-split-enhance"""
@@ -16,19 +30,183 @@ def upload_audio(request):
        return Response({"Error: No file provided"}, status=status.HTTP_400_BAD_REQUEST)
    
    audio_file = request.FILES['file']
-    file_path = os.path.join(UPLOAD_DIR, audio_file.name)
+    
+    # Check file format before proceeding
+    if not is_supported_format(audio_file.name):
+        return Response({"error": "Unsupported file format"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+    
+    # Generate a unique ID for this upload
+    file_uuid = str(uuid.uuid4())[:8]
+    
+    #Create a subdirectory for this upload
+    upload_dir = os.path.join(UPLOAD_DIR, file_uuid)
+    os.makedirs(upload_dir, exist_ok=True)
+    
+    file_path = os.path.join(upload_dir, audio_file.name)
    
    # Save the uploaded file
-    with open(file_path, 'wb') as destination:
-        for chunk in audio_file.chunks():
-            destination.write(chunk)
+    task = save_and_classify.apply(args=(file_path, audio_file.read()))
+    
+    if task.successful():
+        audio_class = task.result[0]
+        return Response(
+            {
+                "Status": "File uploaded successfully",
+                "file_uuid": file_uuid,
+                "audio_class": audio_class,
+                "sr": task.result[1]
+                }, 
+            status=status.HTTP_201_CREATED,
+            )

-    audio_class = classify_audio(file_path)
-            
-    return Response(
-        {
-            "Status": "File uploaded successfully",
-            "file_path": file_path,
-            "audio_class": audio_class
-            }, status=status.HTTP_201_CREATED,
-        )
+# Endpoint to normalize audio
+@api_view(['POST'])
+def normalize_audio(request):
+    """Handles audio normalization request"""
+    stat, result, status_code = get_audio_file_path(request, UPLOAD_DIR)
+    if stat == False:
+        return Response({"error": result}, status=status_code)
+    
+    # Call Celery task synchronously
+    task = normalize_audio_task.apply(args=(result,))
+    
+    if task.get():
+        return Response({"message": "Audio normalized successfully"}, status=status.HTTP_200_OK)
+    else:
+        return Response({"error": "Failed to normalize audio"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+# Endpoint to trim audio
+@api_view(['POST'])
+def trim_audio(request):
+    """Handles trimming of leading and trailing silence from an audio clip"""
+    stat, result, status_code = get_audio_file_path(request, UPLOAD_DIR)
+    if stat == False:
+        return Response({"error": result}, status=status_code)
+    
+    # Call Celery task synchronously
+    task = trim_audio_task.apply(args=(result,)) 
+
+    if task.get():
+        return Response({"message": "Audio trimmed successfully"}, status=status.HTTP_200_OK)
+    else:
+        return Response({"error": "Failed to trim audio"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+# Endpoint to resample audio
+@api_view(['POST'])
+def resample_audio(request):
+    """Handles the resampling of audio"""
+    stat, result, status_code = get_audio_file_path(request, UPLOAD_DIR)
+    if stat == False:
+        return Response({"error": result}, status=status_code)
+    
+    sr = request.data.get("sr")
+    if not sr:
+        return Response({"error": "Missing sr"}, status=status.HTTP_400_BAD_REQUEST)
+    
+    # Call Celery task synchronously
+    task = resample_audio_task.apply(args=(result, sr))
+
+    if task.get():
+        return Response({"message": f"Audio resampled to {sr} successfully"}, status=status.HTTP_200_OK)
+    else:
+        return Response({"error": "Failed to resample audio"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+# Endpoint to separate music into sources
+@api_view(['POST'])
+def separate_music(request):
+    """Handles the separatioo of music audio into source components"""
+    stat, result, status_code = get_audio_file_path(request, UPLOAD_DIR)
+    if stat == False:
+        return Response({"error": result}, status=status_code)  
+
+    # Call Celery task synchronously
+    task = music_separation_task.apply(args=(result,))
+
+    if task.get():
+        return Response({"message": f"Audio separated into sources successfully"}, status=status.HTTP_200_OK)
+    else:
+        return Response({"error": "Failed to source separate audio"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+# Endpoint to reduce noise from audio
+@api_view(['POST'])
+def noisereduce(request):
+    """Handles the reduction of noise from audio"""
+    stat, result, status_code = get_audio_file_path(request, UPLOAD_DIR)
+    if stat == False:
+        return Response({"error": result}, status=status_code)  
+
+    # Call Celery task synchronously
+    task = noisereduce_task.apply(args=(result,))
+
+    if task.get():
+        return Response({"message": f"Removed noise from audio successfully"}, status=status.HTTP_200_OK)
+    else:
+        return Response({"error": "Failed to remove noise from audio"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+# Endpoint to download audio file or zipped directory
+@api_view(['GET'])
+def download_audio(request):
+    """Handles downloading an audio file or a zipped directory."""
+    file_uuid = request.query_params.get("file_uuid")
+    
+    if not file_uuid:
+        return Response({"error": "file_uuid is required"}, status=status.HTTP_400_BAD_REQUEST)
+    
+    dir_path = os.path.join(UPLOAD_DIR, file_uuid)
+    
+    if not os.path.exists(dir_path) or not os.listdir(dir_path):
+        return Response({"error": "No file found"}, status=status.HTTP_404_NOT_FOUND)
+
+    audio_files = [f for f in os.listdir(dir_path) if f.endswith(('.wav', '.mp3', '.flac'))]
+    sources_folder = "sources" in os.listdir(dir_path)
+
+    # If only one audio file exists (no sources/)
+    if len(audio_files) == 1 and not sources_folder:
+        file_path = os.path.join(dir_path, audio_files[0])
+        return FileResponse(open(file_path, "rb"), as_attachment=True, filename=os.path.basename(file_path))
+
+    # If there are multiple audio files or a sources/ directory, create a ZIP
+    zip_file_path = os.path.join(UPLOAD_DIR, f"{file_uuid}.zip")
+    
+    # Ensure ZIP file is always fresh
+    if os.path.exists(zip_file_path):
+        os.remove(zip_file_path)
+
+    # Create ZIP file
+    with zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) as zipf:
+        for root, _, files in os.walk(dir_path):
+            for file in files:
+                file_path = os.path.join(root, file)
+                arcname = os.path.relpath(file_path, dir_path)  # Preserve folder structure inside ZIP
+                zipf.write(file_path, arcname)
+
+    # Stream the ZIP file
+    return FileResponse(open(zip_file_path, "rb"), as_attachment=True, filename=os.path.basename(zip_file_path))
+
+@api_view(['POST'])
+def cleanup(request):
+    """Handles file cleanup after pipeline processing"""
+    stat, result, status_code = get_audio_file_path(request, UPLOAD_DIR)
+    if stat == False:
+        return Response({"error": result}, status=status_code)  
+
+    # Call Celery task synchronously
+    task = cleanup_task.apply(args=(result,))
+
+    if task.get():
+        return Response({"message": f"Successfully cleaned up files on the server"}, status=status.HTTP_200_OK)
+    else:
+        return Response({"error": "Failed to cleanup files on the server"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+@api_view(['POST'])
+def cleanup_zip(request):
+    """Handles cleanup of all zip files leftover by api/download"""
+    # Delete all ZIP files in UPLOAD_DIR
+    for file in os.listdir(UPLOAD_DIR):
+        if file.endswith(".zip"):
+            file_path = os.path.join(UPLOAD_DIR, file)
+            try:
+                os.remove(file_path)
+                return Response({"message": "Cleaned up zipfiles on the server"}, status=status.HTTP_200_OK)
+            except Exception as e:
+                return Response({"message": f"Error deleting {file_path}: {e}"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
@@ -1 +1,3 @@
-from celery_app import app as celery
+from .celery import app as celery_app
+
+__all__ = ('celery_app',)
@@ -0,0 +1,9 @@
+import os
+from celery import Celery
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'backend.settings')
+
+app = Celery('backend')
+# Load config from Django settings, using a `CELERY_` prefix
+app.config_from_object('django.conf:settings', namespace='CELERY')
+app.autodiscover_tasks()
@@ -123,6 +123,10 @@ STATIC_URL = 'static/'
 # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field

 DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
+
+# COnfigure Redis as message broker
 CELERY_BROKER_URL = 'redis://localhost:6379/0'
 CELERY_ACCEPT_CONTENT = ['json']
 CELERY_TASK_SERIALIZER = 'json'
+CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
+
@@ -17,8 +17,24 @@ Including another URLconf
 from django.contrib import admin
 from django.urls import path
 from api.views import upload_audio
+from api.views import normalize_audio
+from api.views import trim_audio
+from api.views import resample_audio
+from api.views import separate_music
+from api.views import noisereduce
+from api.views import download_audio
+from api.views import cleanup
+from api.views import cleanup_zip

 urlpatterns = [
    path('admin/', admin.site.urls),
-    path('api/upload', upload_audio, name='upload_audio')
+    path('api/upload', upload_audio, name='upload_audio'),
+    path('api/normalize', normalize_audio, name="normalize_audio"),
+    path('api/trim', trim_audio, name='trim_audio'),
+    path('api/resample', resample_audio, name='resample_audio'),
+    path('api/separate', separate_music, name="separate_music"),
+    path('api/noisereduce', noisereduce, name="noisreduce"),
+    path('api/download', download_audio, name="download_audio"),
+    path('api/cleanup', cleanup, name="cleanup"),
+    path('api/cleanup_zip', cleanup_zip, name="cleanup_zip")
 ]
@@ -1,20 +0,0 @@
-import os
-import sys
-from celery import Celery
-
-# Automatically set environment variables in celery_app.py
-
-# Set Django settings module
-os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'backend.settings')
-
-# Add the project directory to sys.path (similar to the manual PYTHONPATH)
-project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
-sys.path.append(project_root)
-
-app = Celery('backend')
-
-# Load configuration from Django settings, using the CELERY namespace.
-app.config_from_object('django.conf:settings', namespace='CELERY')
-
-# Autodiscover tasks from installed apps.
-app.autodiscover_tasks()
@@ -6,9 +6,8 @@ build-backend = "setuptools.build_meta"
 name = "freqsplit"
 version = "0.1.0"
 dependencies = [
-    "absl-py==2.1.0",
-    "aiohappyeyeballs==2.4.4",
-    "aiohttp==3.11.11",
+    "aiohappyeyeballs==2.4.6",
+    "aiohttp==3.11.13",
    "aiosignal==1.3.2",
    "amqp==5.3.1",
    "antlr4-python3-runtime==4.9.3",
@@ -16,13 +15,12 @@ dependencies = [
    "asgiref==3.8.1",
    "asteroid==0.7.0",
    "asteroid-filterbanks==0.4.0",
-    "astunparse==1.6.3",
    "attrs==25.1.0",
    "audioread==3.0.1",
    "billiard==4.2.1",
    "cached-property==2.0.1",
    "celery==5.4.0",
-    "certifi==2024.12.14",
+    "certifi==2025.1.31",
    "cffi==1.17.1",
    "charset-normalizer==3.4.1",
    "click==8.1.8",
@@ -32,50 +30,38 @@ dependencies = [
    "cloudpickle==3.1.1",
    "contourpy==1.3.1",
    "cycler==0.12.1",
-    "decorator==5.1.1",
+    "decorator==5.2.1",
    "DeepFilterLib==0.5.6",
    "DeepFilterNet==0.5.6",
    "demucs==4.0.1",
    "Django==5.1.6",
+    "djangorestframework==3.15.2",
    "dora_search==0.1.12",
-    "einops==0.8.0",
+    "einops==0.8.1",
    "filelock==3.17.0",
-    "flatbuffers==25.2.10",
-    "fonttools==4.55.6",
+    "fonttools==4.56.0",
    "frozenlist==1.5.0",
-    "fsspec==2024.12.0",
-    "future==1.0.0",
-    "gast==0.6.0",
-    "google-pasta==0.2.0",
-    "grpcio==1.70.0",
-    "h5py==3.13.0",
-    "huggingface-hub==0.28.0",
+    "fsspec==2025.2.0",
+    "huggingface-hub==0.29.1",
    "idna==3.10",
    "iniconfig==2.0.0",
    "Jinja2==3.1.5",
    "joblib==1.4.2",
    "julius==0.2.7",
-    "keras==3.8.0",
    "kiwisolver==1.4.8",
    "kombu==5.4.2",
    "lameenc==1.8.1",
    "lazy_loader==0.4",
-    "libclang==18.1.1",
    "librosa==0.10.2.post1",
-    "lightning-utilities==0.11.9",
+    "lightning-utilities==0.12.0",
    "llvmlite==0.44.0",
    "loguru==0.7.3",
-    "Markdown==3.7",
-    "markdown-it-py==3.0.0",
    "MarkupSafe==3.0.2",
    "matplotlib==3.10.0",
-    "mdurl==0.1.2",
-    "mir_eval==0.7",
-    "ml-dtypes==0.4.1",
+    "mir_eval==0.8.2",
    "mpmath==1.3.0",
    "msgpack==1.1.0",
    "multidict==6.1.0",
-    "namex==0.0.8",
    "networkx==3.4.2",
    "numba==0.61.0",
    "numpy==1.26.4",
@@ -88,15 +74,15 @@ dependencies = [
    "nvidia-curand-cu12==10.3.5.147",
    "nvidia-cusolver-cu12==11.6.1.9",
    "nvidia-cusparse-cu12==12.3.1.170",
+    "nvidia-cusparselt-cu12==0.6.2",
    "nvidia-nccl-cu12==2.21.5",
    "nvidia-nvjitlink-cu12==12.4.127",
    "nvidia-nvtx-cu12==12.4.127",
    "omegaconf==2.3.0",
    "openunmix==1.3.0",
-    "opt_einsum==3.4.0",
-    "optree==0.14.0",
    "packaging==23.2",
    "pandas==2.2.3",
+    "panns-inference==0.1.1",
    "pb-bss-eval==0.0.2",
    "pesq==0.0.4",
    "pillow==11.1.0",
@@ -104,54 +90,47 @@ dependencies = [
    "pluggy==1.5.0",
    "pooch==1.8.2",
    "prompt_toolkit==3.0.50",
-    "propcache==0.2.1",
-    "protobuf==5.29.3",
+    "propcache==0.3.0",
    "pycparser==2.22",
-    "Pygments==2.19.1",
    "pyparsing==3.2.1",
    "pystoi==0.4.1",
    "pytest==8.3.4",
    "python-dateutil==2.9.0.post0",
    "pytorch-lightning==2.5.0.post0",
    "pytorch-ranger==0.1.1",
-    "pytz==2024.2",
+    "pytz==2025.1",
    "PyYAML==6.0.2",
    "redis==5.2.1",
+    "regex==2024.11.6",
    "requests==2.32.3",
    "retrying==1.3.4",
-    "rich==13.9.4",
+    "safetensors==0.5.3",
    "scikit-learn==1.6.1",
-    "scipy==1.15.1",
-    "setuptools==75.8.0",
+    "scipy==1.15.2",
+    "setuptools==75.8.1",
    "six==1.17.0",
    "soundfile==0.13.1",
    "soxr==0.5.0.post1",
    "sqlparse==0.5.3",
    "submitit==1.5.2",
    "sympy==1.13.1",
-    "tensorboard==2.18.0",
-    "tensorboard-data-server==0.7.2",
-    "tensorflow==2.18.0",
-    "tensorflow-hub==0.16.1",
-    "termcolor==2.5.0",
-    "tf_keras==2.18.0",
    "threadpoolctl==3.5.0",
-    "torch==2.5.1",
+    "tokenizers==0.21.0",
+    "torch==2.6.0",
    "torch-optimizer==0.1.0",
    "torch-stoi==0.2.3",
-    "torchaudio==2.5.1",
+    "torchaudio==2.6.0",
+    "torchlibrosa==0.1.0",
    "torchmetrics==0.11.4",
    "tqdm==4.67.1",
+    "transformers==4.49.0",
    "treetable==0.2.5",
-    "triton==3.1.0",
+    "triton==3.2.0",
    "typing_extensions==4.12.2",
    "tzdata==2025.1",
    "urllib3==2.3.0",
    "vine==5.1.0",
    "wcwidth==0.2.13",
-    "Werkzeug==3.1.3",
-    "wheel==0.45.1",
-    "wrapt==1.17.2",
    "yarl==1.18.3",
 ]

@@ -1,6 +1,5 @@
-absl-py==2.1.0
-aiohappyeyeballs==2.4.4
-aiohttp==3.11.11
+aiohappyeyeballs==2.4.6
+aiohttp==3.11.13
 aiosignal==1.3.2
 amqp==5.3.1
 antlr4-python3-runtime==4.9.3
@@ -8,13 +7,12 @@ appdirs==1.4.4
 asgiref==3.8.1
 asteroid==0.7.0
 asteroid-filterbanks==0.4.0
-astunparse==1.6.3
 attrs==25.1.0
 audioread==3.0.1
 billiard==4.2.1
 cached-property==2.0.1
 celery==5.4.0
-certifi==2024.12.14
+certifi==2025.1.31
 cffi==1.17.1
 charset-normalizer==3.4.1
 click==8.1.8
@@ -24,51 +22,38 @@ click-repl==0.3.0
 cloudpickle==3.1.1
 contourpy==1.3.1
 cycler==0.12.1
-decorator==5.1.1
+decorator==5.2.1
 DeepFilterLib==0.5.6
 DeepFilterNet==0.5.6
 demucs==4.0.1
 Django==5.1.6
 djangorestframework==3.15.2
 dora_search==0.1.12
-einops==0.8.0
+einops==0.8.1
 filelock==3.17.0
-flatbuffers==25.2.10
-fonttools==4.55.6
+fonttools==4.56.0
 frozenlist==1.5.0
-fsspec==2024.12.0
-future==1.0.0
-gast==0.6.0
-google-pasta==0.2.0
-grpcio==1.70.0
-h5py==3.13.0
-huggingface-hub==0.28.0
+fsspec==2025.2.0
+huggingface-hub==0.29.1
 idna==3.10
 iniconfig==2.0.0
 Jinja2==3.1.5
 joblib==1.4.2
 julius==0.2.7
-keras==3.8.0
 kiwisolver==1.4.8
 kombu==5.4.2
 lameenc==1.8.1
 lazy_loader==0.4
-libclang==18.1.1
 librosa==0.10.2.post1
-lightning-utilities==0.11.9
+lightning-utilities==0.12.0
 llvmlite==0.44.0
 loguru==0.7.3
-Markdown==3.7
-markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 matplotlib==3.10.0
-mdurl==0.1.2
-mir_eval==0.7
-ml-dtypes==0.4.1
+mir_eval==0.8.2
 mpmath==1.3.0
 msgpack==1.1.0
 multidict==6.1.0
-namex==0.0.8
 networkx==3.4.2
 numba==0.61.0
 numpy==1.26.4
@@ -81,15 +66,15 @@ nvidia-cufft-cu12==11.2.1.3
 nvidia-curand-cu12==10.3.5.147
 nvidia-cusolver-cu12==11.6.1.9
 nvidia-cusparse-cu12==12.3.1.170
+nvidia-cusparselt-cu12==0.6.2
 nvidia-nccl-cu12==2.21.5
 nvidia-nvjitlink-cu12==12.4.127
 nvidia-nvtx-cu12==12.4.127
 omegaconf==2.3.0
 openunmix==1.3.0
-opt_einsum==3.4.0
-optree==0.14.0
 packaging==23.2
 pandas==2.2.3
+panns-inference==0.1.1
 pb-bss-eval==0.0.2
 pesq==0.0.4
 pillow==11.1.0
@@ -97,52 +82,45 @@ platformdirs==4.3.6
 pluggy==1.5.0
 pooch==1.8.2
 prompt_toolkit==3.0.50
-propcache==0.2.1
-protobuf==5.29.3
+propcache==0.3.0
 pycparser==2.22
-Pygments==2.19.1
 pyparsing==3.2.1
 pystoi==0.4.1
 pytest==8.3.4
 python-dateutil==2.9.0.post0
 pytorch-lightning==2.5.0.post0
 pytorch-ranger==0.1.1
-pytz==2024.2
+pytz==2025.1
 PyYAML==6.0.2
 redis==5.2.1
+regex==2024.11.6
 requests==2.32.3
 retrying==1.3.4
-rich==13.9.4
+safetensors==0.5.3
 scikit-learn==1.6.1
-scipy==1.15.1
-setuptools==75.8.0
+scipy==1.15.2
+setuptools==75.8.1
 six==1.17.0
 soundfile==0.13.1
 soxr==0.5.0.post1
 sqlparse==0.5.3
 submitit==1.5.2
 sympy==1.13.1
-tensorboard==2.18.0
-tensorboard-data-server==0.7.2
-tensorflow==2.18.0
-tensorflow-hub==0.16.1
-termcolor==2.5.0
-tf_keras==2.18.0
 threadpoolctl==3.5.0
-torch==2.5.1
+tokenizers==0.21.0
+torch==2.6.0
 torch-optimizer==0.1.0
 torch-stoi==0.2.3
-torchaudio==2.5.1
+torchaudio==2.6.0
+torchlibrosa==0.1.0
 torchmetrics==0.11.4
 tqdm==4.67.1
+transformers==4.49.0
 treetable==0.2.5
-triton==3.1.0
+triton==3.2.0
 typing_extensions==4.12.2
 tzdata==2025.1
 urllib3==2.3.0
 vine==5.1.0
 wcwidth==0.2.13
-Werkzeug==3.1.3
-wheel==0.45.1
-wrapt==1.17.2
 yarl==1.18.3
@@ -9,4 +9,4 @@ logging.basicConfig(
    level = logging.INFO
 )

-logging.info("freq-split-enhance/input package has been imported.")
+logging.info("freqsplit/input package has been imported.")
@@ -1,12 +1,14 @@
 import os
 import librosa

-def read_audio(file_path):
+def read_audio(file_path, sr=None, mono=None):
    """
    Reads an audio file and returns the audio time series and sampling rate.

    Args:
        file_path (str): Path to the audio file.
+        sr (int): Sample rate at which the audio is to be loaded
+        mono (bool): True to loaded audio with single channels, else False.

    Returns:
        tuple: audio_time_series (numpy.ndarray), sampling_rate (int)
@@ -15,7 +17,11 @@ def read_audio(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    try:
-        audio, sr = librosa.load(file_path, sr=None) # Load with original sampling rate.
+        librosa_kwargs = {"sr": sr}
+        if mono is not None:  # Only add 'mono' if explicitly provided
+            librosa_kwargs["mono"] = mono
+
+        audio, sr = librosa.load(file_path, **librosa_kwargs)
        return audio, sr
    except Exception as e:
        raise RuntimeError(f"Error reading the audio file: {e}")
@@ -9,4 +9,4 @@ logging.basicConfig(
    level = logging.INFO
 )

-logging.info("freq-split-enhance/postprocessing package has been imported.")
+logging.info("freqsplit/postprocessing package has been imported.")
@@ -12,9 +12,6 @@ def export_audio(audio, output_path, sr):
    """

    try:
-        
-        print(f"Initial audio shape: {audio.shape}, dtype: {audio.dtype}")
-
        if audio.ndim == 2 and audio.shape[0] == 2:
            # Transpose stereo audio to match the expected shape
            audio = audio.T  # From (2, num_samples) to (num_samples, 2)
@@ -26,10 +23,6 @@ def export_audio(audio, output_path, sr):
        if np.max(np.abs(audio)) > 0:  # Avoid divide by zero
            audio = audio / np.max(np.abs(audio))

-        # Verify final format
-        print(f"Final audio shape: {audio.shape}, dtype: {audio.dtype}, max: {np.max(audio)}, min: {np.min(audio)}")
-
-        
        sf.write(output_path, audio, sr, format='wav')
        print(f"Audio saved to {output_path}")
    except Exception as e:
@@ -9,4 +9,4 @@ logging.basicConfig(
    level = logging.INFO
 )

-logging.info("freq-split-enhance/preprocessing package has been imported.")
+logging.info("freqsplit/preprocessing package has been imported.")
@@ -1,53 +1,43 @@
-import tensorflow as tf
-import tensorflow_hub as hub
-import librosa
 import numpy as np
-import csv
-import os
+from panns_inference import AudioTagging, labels

+# Initialize PANNs model
+at = AudioTagging(checkpoint_path=None, device='cuda')

-# Force TensorFlow to use only CPU
-tf.config.set_visible_devices([], 'GPU')
-
-model = hub.load('https://tfhub.dev/google/yamnet/1')
-
-#Find the name of the class with the top score when mean-aggregated across frames.
-def class_names_from_csv(class_map_scv_text):
-    """Returns list of class names corresponding to score vector."""
-    class_names = []
-    with tf.io.gfile.GFile(class_map_scv_text) as csvfile:
-        reader = csv.DictReader(csvfile)
-        for row in reader:
-            class_names.append(row['display_name'])
-    return class_names
-
-# Main function to process audio and classify
-def classify_audio(file_path):
+def classify_audio(waveform, sr):
    """
    Given an audio file, this function loads the audio, resamples it, 
-    normalizes it, and runs it through the YAMNet model to classify the sound.
+    normalizes it, and runs it through the PANNs model to classify the sound.

    Args:
-    - file_path (str): Path to the audio file (WAV, MP3, etc.).
+    - waveform (numpy.ndarray): waveform of the audio file (WAV, MP3, etc.).
+    - sr (int): Sampling rate of the audio.

    Returns:
    - str: Predicted class label of the audio.
    """
-    # Load audio using librosa (this handles both loading, resampling, and conversion to mono)
-    waveform, sample_rate = librosa.load(file_path, sr=16000, mono=True)  # Ensuring 16k sample rate and mono
-
-    # Normalize the waveform to [-1.0, 1.0] (librosa already returns normalized values)
-    waveform = waveform / np.max(np.abs(waveform))
-
-    # Execute the YAMNet model
-    scores, embeddings, spectrogram = model(waveform)
    
-    # Extract the class names from the model
-    class_map_path = model.class_map_path().numpy()
-    class_names = class_names_from_csv(class_map_path)
-
-    # Find the class with the highest score
-    scores_np = scores.numpy()
-    inferred_class = class_names[scores_np.mean(axis=0).argmax()]
-
-    return inferred_class
+    # Check if the sampling rate is 32000Hz
+    try:
+        if sr != 32000:
+            raise RuntimeError
+    except Exception:
+        raise RuntimeError(f"The audio is not sampled at 32000Hz, failed to classify audio.")
+    
+    # Normalize the waveform to [-1.0, 1.0]
+    waveform = waveform / np.max(np.abs(waveform))
+    
+    # Ensure waveform shape is correct for model input
+    waveform = waveform[None, :]
+    
+    # Execute the PANNs model
+    try:
+        clipwise_output, _ = at.inference(waveform)
+    except Exception as e:
+        raise RuntimeError(f"Error: Failed to classify audio: {e}")
+    
+    # Get the top predicted class
+    predicted_index = np.argmax(clipwise_output)
+    inferred_class = labels[predicted_index]
+    
+    return inferred_class
@@ -9,4 +9,4 @@ logging.basicConfig(
    level = logging.INFO
 )

-logging.info("freq-split-enhance/refinement package has been imported.")
+logging.info("freqsplit/refinement package has been imported.")
@@ -1,35 +1,83 @@
 import os
+import librosa
 import torch
+import shutil
+import soundfile as sf
+import numpy as np
 from df.enhance import enhance, init_df, load_audio, save_audio

+def split_audio(audio, sr, chunk_size=5):
+    """Split audio into chunks of `chunk_size` seconds."""
+    samples_per_chunk = sr * chunk_size
+    return [audio[i:i + samples_per_chunk] for i in range(0, len(audio), samples_per_chunk)]
+
 def noisereduce(input_audio_path, output_audio_path, model_path=None):
    """
-    Apply noise reduction using DeepFilterNet.
+    Apply noise reduction using DeepFilterNet with chunking.
    
    Args:
        input_audio_path (str): Path to the input noisy audio file.
        output_audio_path (str): Path to save the enhanced audio file.
-        model_path (str, optional): Path to a custom DeepFilterNet model. Defaults to None (uses the pre-trained model).
+        model_path (str, optional): Path to a custom DeepFilterNet model. Defaults to None (uses pre-trained model).

    Returns:
        str: Path to the enhanced audio file.
    """
    if not os.path.exists(input_audio_path):
        raise FileNotFoundError(f"Input file {input_audio_path} not found")
+
+    output_dir = os.path.dirname(output_audio_path)
+    os.makedirs(output_dir, exist_ok=True)  # Ensure the directory exists
    
    # Initialize DeepFilterNet model
    model, df_state, _ = init_df(model_path)
    
    # Load audio
-    audio, _ =  load_audio(input_audio_path, sr=df_state.sr())
+    audio, sr = librosa.load(input_audio_path, sr=None)
    
-    # Ensure output path exists
-    os.makedirs(os.path.dirname(output_audio_path), exist_ok=True)
+    # Ensure output and chunk directories exist
+    parent_dir = os.path.dirname(input_audio_path)
+    chunk_dir = os.path.join(parent_dir, "chunks")
+    output_chunk_dir = os.path.join(chunk_dir, "output")
+    os.makedirs(chunk_dir, exist_ok=True)
+    os.makedirs(output_chunk_dir, exist_ok=True)
    
-    # Apply noise reduction
-    enhanced_audio = enhance(model, df_state, audio)
+    # Split audio into 5-second chunks
+    chunks = split_audio(audio, sr, chunk_size=5)
+    chunk_paths = []
+    
+    for i, chunk in enumerate(chunks):
+        chunk_path = os.path.join(chunk_dir, f"chunk_{i}.wav")
+        sf.write(chunk_path, chunk, sr)
+        chunk_paths.append(chunk_path)
+    
+    enhanced_chunk_paths = []
+    
+    # Process each chunk sequentially to avoid OOM errors
+    for chunk_path in chunk_paths:
+        output_chunk_path = os.path.join(output_chunk_dir, os.path.basename(chunk_path))
+        
+        # Load and enhance
+        chunk_audio, _ = load_audio(chunk_path, sr=df_state.sr())
+        enhanced_audio = enhance(model, df_state, chunk_audio)
+        
+        # Save enhanced chunk
+        save_audio(output_chunk_path, enhanced_audio, df_state.sr())
+        enhanced_chunk_paths.append(output_chunk_path)
+    
+    # Combine enhanced chunks back into a single audio file
+    final_audio = []
+    for chunk_path in enhanced_chunk_paths:
+        chunk_audio, _ = librosa.load(chunk_path, sr=sr)  # Keep original sample rate
+        final_audio.append(chunk_audio)
+    
+    final_audio = np.concatenate(final_audio, axis=0)
+    
+    # Save final enhanced audio
+    sf.write(output_audio_path, final_audio, sr)
+    
+    # Clean up temporary chunk files and directories
+    shutil.rmtree(chunk_dir, ignore_errors=True)
    
-    # Save the enhanced audio
-    save_audio(output_audio_path, enhanced_audio, df_state.sr())
    
    return output_audio_path
@@ -9,4 +9,4 @@ logging.basicConfig(
    level = logging.INFO
 )

-logging.info("freq-split-enhance/separation package has been imported.")
+logging.info("freqsplit/separation package has been imported.")
@@ -9,4 +9,4 @@ logging.basicConfig(
    level = logging.INFO
 )

-logging.info("freq-split-enhance/spectogram package has been imported.")
+logging.info("freqsplit/spectogram package has been imported.")
@@ -24,8 +24,9 @@ def test_trim_audio():

 def test_classify():
    file_path = "tests/test_audio/cafe_crowd_talk.wav"
+    waveform, sr = read_audio(file_path, 32000, mono=True)
    expected_class = "Speech"
-    predicted_class = classify_audio(file_path)
+    predicted_class = classify_audio(waveform, sr)

    assert predicted_class == expected_class , f"Expected {expected_class}, but got {predicted_class}"