From 08983d7e726853f8a3312b87dcf176bd14303de4 Mon Sep 17 00:00:00 2001 From: Joel Mathew Thomas <90510078+joelmathewthomas@users.noreply.github.com> Date: Mon, 6 Jan 2025 22:11:49 +0530 Subject: [PATCH] add code to generate spectogram array and plot_data --- src/spectogram/generator.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/spectogram/generator.py diff --git a/src/spectogram/generator.py b/src/spectogram/generator.py new file mode 100644 index 0000000..8fa230d --- /dev/null +++ b/src/spectogram/generator.py @@ -0,0 +1,34 @@ +import librosa +import numpy as np + +def generate_spectogram(audio_file: str, spectogram_type: str = 'mel', sr: int = 22050): + """ + Generates a spectrogram array from an audio file. + + Args: + audio_file (str): Path to the input audio file. + spectrogram_type (str): Type of spectrogram ('stft', 'mel'). Default is 'mel'. + sr (int): Sampling rate for audio. Default is 22050. + + Returns: + tuple: A tuple containing: + - np.ndarray: Spectrogram array (in decibels). + - dict: Metadata for plotting (sr, x_axis, y_axis). + """ + + # Load the audio file + waveform, sr = librosa.load(audio_file, sr=sr) + + # Create the spectogram + if spectogram_type == 'mel': + spec = librosa.feature.melspectrogram(y=waveform, sr=sr) + spec_db = librosa.power_to_db(spec, ref=np.max) # Convert to decibels + plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'mel'} + elif spectogram_type == 'stft': + spec = np.abs(librosa.stft(waveform)) + spec_db = librosa.amplitude_to_db(spec, ref=np.max) + plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'log'} + else: + raise ValueError(f"Unsupported spectogram type: {spectogram_type}") + + return spec_db, plot_data