From dcd1cc4c8d3ed9fdc8b6533b39ba48aa6a2f04e6 Mon Sep 17 00:00:00 2001
From: Joel Mathew Thomas <90510078+joelmathewthomas@users.noreply.github.com>
Date: Tue, 18 Mar 2025 18:30:39 +0530
Subject: [PATCH 1/9] rename module
---
src/freqsplit/{spectogram => spectrogram}/__init__.py | 0
src/freqsplit/{spectogram => spectrogram}/display.py | 0
src/freqsplit/{spectogram => spectrogram}/generator.py | 10 +++++-----
3 files changed, 5 insertions(+), 5 deletions(-)
rename src/freqsplit/{spectogram => spectrogram}/__init__.py (100%)
rename src/freqsplit/{spectogram => spectrogram}/display.py (100%)
rename src/freqsplit/{spectogram => spectrogram}/generator.py (78%)
diff --git a/src/freqsplit/spectogram/__init__.py b/src/freqsplit/spectrogram/__init__.py
similarity index 100%
rename from src/freqsplit/spectogram/__init__.py
rename to src/freqsplit/spectrogram/__init__.py
diff --git a/src/freqsplit/spectogram/display.py b/src/freqsplit/spectrogram/display.py
similarity index 100%
rename from src/freqsplit/spectogram/display.py
rename to src/freqsplit/spectrogram/display.py
diff --git a/src/freqsplit/spectogram/generator.py b/src/freqsplit/spectrogram/generator.py
similarity index 78%
rename from src/freqsplit/spectogram/generator.py
rename to src/freqsplit/spectrogram/generator.py
index 8fa230d..46e6b26 100644
--- a/src/freqsplit/spectogram/generator.py
+++ b/src/freqsplit/spectrogram/generator.py
@@ -1,7 +1,7 @@
import librosa
import numpy as np
-def generate_spectogram(audio_file: str, spectogram_type: str = 'mel', sr: int = 22050):
+def generate_spectrogram(audio_file: str, spectrogram_type: str = 'mel', sr: int = 22050):
"""
Generates a spectrogram array from an audio file.
@@ -19,16 +19,16 @@ def generate_spectogram(audio_file: str, spectogram_type: str = 'mel', sr: int =
# Load the audio file
waveform, sr = librosa.load(audio_file, sr=sr)
- # Create the spectogram
- if spectogram_type == 'mel':
+ # Create the spectrogram
+ if spectrogram_type == 'mel':
spec = librosa.feature.melspectrogram(y=waveform, sr=sr)
spec_db = librosa.power_to_db(spec, ref=np.max) # Convert to decibels
plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'mel'}
- elif spectogram_type == 'stft':
+ elif spectrogram_type == 'stft':
spec = np.abs(librosa.stft(waveform))
spec_db = librosa.amplitude_to_db(spec, ref=np.max)
plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'log'}
else:
- raise ValueError(f"Unsupported spectogram type: {spectogram_type}")
+ raise ValueError(f"Unsupported spectrogram type: {spectrogram_type}")
return spec_db, plot_data
From 21843f591efe291224640671480d2b282979f79c Mon Sep 17 00:00:00 2001
From: Joel Mathew Thomas <90510078+joelmathewthomas@users.noreply.github.com>
Date: Tue, 18 Mar 2025 19:21:26 +0530
Subject: [PATCH 2/9] generate spectrogram at backend and send as response to
client
---
api/api/tasks.py | 13 +++++++++++--
api/api/views.py | 16 ++++++++++------
client/package.json | 1 +
client/src/Pages/PreviewPage.tsx | 9 +++++----
client/src/Pages/UploadPage.tsx | 4 ++++
client/src/contexts/MediaContext.tsx | 6 ++++--
6 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/api/api/tasks.py b/api/api/tasks.py
index 716bbec..0c023c0 100644
--- a/api/api/tasks.py
+++ b/api/api/tasks.py
@@ -1,5 +1,7 @@
import os
import shutil
+import json
+import numpy as np
from pathlib import Path
from celery import shared_task
from freqsplit.input.file_reader import read_audio
@@ -10,6 +12,7 @@ from freqsplit.preprocessing.resample import resample
from freqsplit.postprocessing.audio_writer import export_audio
from freqsplit.separation.demucs_wrapper import separate_audio_with_demucs
from freqsplit.refinement.deepfilternet_wrapper import noisereduce
+from freqsplit.spectrogram.generator import generate_spectrogram
@shared_task
@@ -24,8 +27,14 @@ def save_and_classify(file_path, file_content):
# Classify the audio
audio_class = classify_audio(waveform, sr)
-
- return audio_class, org_sr
+
+ # Generate spectrogram
+ spec_db, plot_data = generate_spectrogram(file_path)
+ # Convert numpy array to JSON-safe list
+ spec_db = np.nan_to_num(spec_db, nan=-80.0, posinf=-80.0, neginf=-80.0)
+ spec_data_json = json.dumps(spec_db.tolist())
+
+ return audio_class, org_sr, spec_data_json, plot_data['sr']
@shared_task
def normalize_audio_task(file_path):
diff --git a/api/api/views.py b/api/api/views.py
index 4f3cc4f..cecfd43 100644
--- a/api/api/views.py
+++ b/api/api/views.py
@@ -47,17 +47,21 @@ def upload_audio(request):
# Save the uploaded file
task = save_and_classify.apply(args=(file_path, audio_file.read()))
- if task.successful():
- audio_class = task.result[0]
+ if task.ready() and task.successful():
+ result = task.result
return Response(
{
"Status": "File uploaded successfully",
"file_uuid": file_uuid,
- "audio_class": audio_class,
- "sr": task.result[1]
- },
+ "audio_class": result[0],
+ "sr": result[1],
+ "spectrogram": result[2],
+ "spec_sr": result[3]
+ },
status=status.HTTP_201_CREATED,
- )
+ )
+ else:
+ return Response({"error": "Processing failed"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
# Endpoint to normalize audio
@api_view(['POST'])
diff --git a/client/package.json b/client/package.json
index c521b62..2e8c182 100644
--- a/client/package.json
+++ b/client/package.json
@@ -18,6 +18,7 @@
"axios": "^1.8.3",
"jszip": "^3.10.1",
"react": "^18.2.0",
+ "react-audio-spectrogram-player": "^2.0.1",
"react-dom": "^18.2.0",
"react-router-dom": "^6.22.1"
},
diff --git a/client/src/Pages/PreviewPage.tsx b/client/src/Pages/PreviewPage.tsx
index b8d976b..1fd218e 100644
--- a/client/src/Pages/PreviewPage.tsx
+++ b/client/src/Pages/PreviewPage.tsx
@@ -11,6 +11,8 @@ import {
import { VolumeUp as VolumeUpIcon, ErrorOutline as ErrorIcon } from '@mui/icons-material';
import StepperComponent from '../components/StepperComponent';
import { useMediaContext } from '../contexts/MediaContext';
+// @ts-ignore
+import SpectrogramPlayer from "react-audio-spectrogram-player"
function PreviewPage() {
const navigate = useNavigate();
@@ -84,11 +86,10 @@ function PreviewPage() {
{mediaFile.name}
Audio Classification: {audioClass || "No data received"}
diff --git a/client/src/Pages/UploadPage.tsx b/client/src/Pages/UploadPage.tsx index 7359519..bbbe1f4 100644 --- a/client/src/Pages/UploadPage.tsx +++ b/client/src/Pages/UploadPage.tsx @@ -91,6 +91,8 @@ function UploadPage() { file_uuid: string; sr: number; audio_class: string; + spectrogram: string; + spec_sr: number; }>("/api/upload", formData, { headers: { "Content-Type": "multipart/form-data", @@ -104,6 +106,8 @@ function UploadPage() { audio_class: res.data.audio_class, file_uuid: res.data.file_uuid, sr: res.data.sr, + spectrogram: res.data.spectrogram, + spec_sr: res.data.spec_sr })); setUpload(true); } diff --git a/client/src/contexts/MediaContext.tsx b/client/src/contexts/MediaContext.tsx index 985d5e4..c42ac1a 100644 --- a/client/src/contexts/MediaContext.tsx +++ b/client/src/contexts/MediaContext.tsx @@ -3,8 +3,8 @@ import React, { createContext, useState, useContext } from 'react'; interface MediaContextType { mediaFile: { name: string; url: string; type: string } | null; setMediaFile: (file: { name: string; url: string; type: string }) => void; - response: { file_uuid: string; sr: number; audio_class: string }; - setResponse: (response: { file_uuid: string; sr: number; audio_class: string }) => void; + response: { file_uuid: string; sr: number; audio_class: string, spectrogram: string, spec_sr: number }; + setResponse: (response: { file_uuid: string; sr: number; audio_class: string, spectrogram: string, spec_sr: number }) => void; extractedFiles: { name: string; url: string }[]; setExtractedFiles: (files: {name: string; url: string }[]) => void; downloadedFileURL: string; @@ -20,6 +20,8 @@ export const MediaProvider: React.FC<{ children: React.ReactNode }> = ({ childre audio_class: "", file_uuid: "", sr: 0, + spectrogram: "", + spec_sr: 0 }); const [extractedFiles, setExtractedFiles] = useStateAudio Classification: {audioClass || "No data received"}
From b622585fb9617fc3255fdd2535338a0599c53206 Mon Sep 17 00:00:00 2001 From: Joel Mathew Thomas <90510078+joelmathewthomas@users.noreply.github.com> Date: Tue, 18 Mar 2025 20:37:20 +0530 Subject: [PATCH 5/9] add endpoint /api/spectrogram to calculate spectrograms --- api/api/tasks.py | 14 ++++++++++++++ api/api/views.py | 30 ++++++++++++++++++++++++++++++ api/backend/urls.py | 2 ++ 3 files changed, 46 insertions(+) diff --git a/api/api/tasks.py b/api/api/tasks.py index 0c023c0..0c5cc54 100644 --- a/api/api/tasks.py +++ b/api/api/tasks.py @@ -131,6 +131,20 @@ def noisereduce_task(file_path): return False @shared_task +def generate_spectrogram_task(file_path): + """Celery task to generate spectrogram""" + try: + file_path = Path(file_path) + + # Generate spectrogram + spec_db, plot_data = generate_spectrogram(file_path) + spec_db = np.nan_to_num(spec_db, nan=-80.0, posinf=-80.0, neginf=-80.0) + spec_data_json = json.dumps(spec_db.tolist()) + + return True, spec_data_json, plot_data['sr'] + except Exception as e: + return False +@shared_task def cleanup_task(file_path): """Celery task to cleanup files""" file_path = Path(file_path) diff --git a/api/api/views.py b/api/api/views.py index cecfd43..6c0cf6f 100644 --- a/api/api/views.py +++ b/api/api/views.py @@ -12,6 +12,7 @@ from .tasks import trim_audio_task from .tasks import resample_audio_task from .tasks import music_separation_task from .tasks import noisereduce_task +from .tasks import generate_spectrogram_task from .tasks import cleanup_task from freqsplit.input.format_checker import is_supported_format @@ -147,6 +148,35 @@ def noisereduce(request): else: return Response({"error": "Failed to remove noise from audio"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) +# Endpoint to generate spectrograms +@api_view(['POST']) +def generate_spectrogram(request): + """Handle generation of spectrogram""" + file_uuid = request.data.get("file_uuid") + file_name = request.data.get("file_name") + file_path = os.path.join(UPLOAD_DIR, file_uuid, file_name) + + # Check if file exists + if os.path.exists(file_path): + # Call Celery task synchronously + task = generate_spectrogram_task.apply(args=(file_path,)) + + if task.ready() and task.successful(): + result = task.result + if result[0]: + return Response( + { + "Status": "Spectrogram generated successfully", + "spectrogram": result[1], + "spec_sr": result[2] + }, + status=status.HTTP_200_OK + ) + else: + return Response({"error": "Failed to generate spectrogram"}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + else: + return Response({"error": "File Not Found"}, status=status.HTTP_404_NOT_FOUND) + # Endpoint to download audio file or zipped directory @api_view(['GET']) def download_audio(request): diff --git a/api/backend/urls.py b/api/backend/urls.py index 727daa7..a675ee7 100644 --- a/api/backend/urls.py +++ b/api/backend/urls.py @@ -23,6 +23,7 @@ from api.views import resample_audio from api.views import separate_music from api.views import noisereduce from api.views import download_audio +from api.views import generate_spectrogram from api.views import cleanup from api.views import cleanup_zip @@ -35,6 +36,7 @@ urlpatterns = [ path('api/separate', separate_music, name="separate_music"), path('api/noisereduce', noisereduce, name="noisreduce"), path('api/download', download_audio, name="download_audio"), + path('api/spectrogram', generate_spectrogram, name="generate_spectrogram"), path('api/cleanup', cleanup, name="cleanup"), path('api/cleanup_zip', cleanup_zip, name="cleanup_zip") ] From 018db7ea304e2c1333a5829d182a6475fb083ea8 Mon Sep 17 00:00:00 2001 From: Joel Mathew Thomas <90510078+joelmathewthomas@users.noreply.github.com> Date: Tue, 18 Mar 2025 20:58:50 +0530 Subject: [PATCH 6/9] generate spectrograms for music sources and store it in context --- client/src/Pages/ProcessingPage.tsx | 25 ++++++++++++++++++++++++- client/src/contexts/MediaContext.tsx | 4 ++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/client/src/Pages/ProcessingPage.tsx b/client/src/Pages/ProcessingPage.tsx index df2e7c8..f04206b 100644 --- a/client/src/Pages/ProcessingPage.tsx +++ b/client/src/Pages/ProcessingPage.tsx @@ -94,9 +94,32 @@ function ProcessingPage() { if (!fileData.dir) { const fileBlob = await fileData.async("blob"); const fileURL = URL.createObjectURL(fileBlob); - fileURLs.push({ name: filename, url: fileURL }); + + // Get spectrograms + setProgress(95); + setStatusText("Calculating Spectrograms"); + + const formData = new FormData(); + formData.append("file_uuid", response.file_uuid); + formData.append("file_name", filename); + + const res = await axios.post<{ + Status: string; + spectrogram: string; + spec_sr: number; + }>("/api/spectrogram", formData, { + headers: { + "Content-Type": "multipart/form-data", + }, + }) + + if (res.status === 200 && res.data){ + + } + fileURLs.push({ name: filename, url: fileURL, spectrogram: res.data.spectrogram, spec_sr: res.data.spec_sr }); } } + console.log(fileURLs) setExtractedFiles(fileURLs); setProgress(100); }; diff --git a/client/src/contexts/MediaContext.tsx b/client/src/contexts/MediaContext.tsx index c42ac1a..94f63dd 100644 --- a/client/src/contexts/MediaContext.tsx +++ b/client/src/contexts/MediaContext.tsx @@ -5,8 +5,8 @@ interface MediaContextType { setMediaFile: (file: { name: string; url: string; type: string }) => void; response: { file_uuid: string; sr: number; audio_class: string, spectrogram: string, spec_sr: number }; setResponse: (response: { file_uuid: string; sr: number; audio_class: string, spectrogram: string, spec_sr: number }) => void; - extractedFiles: { name: string; url: string }[]; - setExtractedFiles: (files: {name: string; url: string }[]) => void; + extractedFiles: { name: string; url: string, spectrogram: string, spec_sr: number }[]; + setExtractedFiles: (files: {name: string; url: string, spectrogram: string, spec_sr: number}[]) => void; downloadedFileURL: string; setDownloadedFileURL: ( file: string) => void; } From 9139ddce53dccce6df14400befcbcce990b17a31 Mon Sep 17 00:00:00 2001 From: Joel Mathew Thomas <90510078+joelmathewthomas@users.noreply.github.com> Date: Tue, 18 Mar 2025 21:06:40 +0530 Subject: [PATCH 7/9] preview spectrograms for music --- client/src/Pages/ResultsPage.tsx | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/client/src/Pages/ResultsPage.tsx b/client/src/Pages/ResultsPage.tsx index 36988ba..4044b74 100644 --- a/client/src/Pages/ResultsPage.tsx +++ b/client/src/Pages/ResultsPage.tsx @@ -18,6 +18,8 @@ import { } from '@mui/icons-material'; import StepperComponent from '../components/StepperComponent'; import { useMediaContext } from '../contexts/MediaContext'; +// @ts-ignore +import SpectrogramPlayer from "react-audio-spectrogram-player" function ResultsPage() { const navigate = useNavigate(); @@ -25,7 +27,6 @@ function ResultsPage() { console.log("Extracted files are", extractedFiles); // const [isPlaying, setIsPlaying] = useState(false); const audioRefs = [useRef(null), useRef(null), useRef(null),useRef(null)]; - const mediaFileRef = useRef(null); const audioClass = response.audio_class const isVideo = mediaFile?.type.includes('video'); @@ -101,11 +102,14 @@ function ResultsPage() {