freqsplit/src/spectogram/generator.py

import librosa
import numpy as np

def generate_spectogram(audio_file: str, spectogram_type: str = 'mel', sr: int = 22050):
    """
    Generates a spectrogram array from an audio file.

    Args:
        audio_file (str): Path to the input audio file.
        spectrogram_type (str): Type of spectrogram ('stft', 'mel'). Default is 'mel'.
        sr (int): Sampling rate for audio. Default is 22050.

    Returns:
        tuple: A tuple containing:
            - np.ndarray: Spectrogram array (in decibels).
            - dict: Metadata for plotting (sr, x_axis, y_axis).
    """

    # Load the audio file
    waveform, sr = librosa.load(audio_file, sr=sr)

    # Create the spectogram
    if spectogram_type == 'mel':
        spec = librosa.feature.melspectrogram(y=waveform, sr=sr)
        spec_db = librosa.power_to_db(spec, ref=np.max) # Convert to decibels
        plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'mel'}
    elif spectogram_type == 'stft':
        spec = np.abs(librosa.stft(waveform))
        spec_db = librosa.amplitude_to_db(spec, ref=np.max)
        plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'log'}
    else:
        raise ValueError(f"Unsupported spectogram type: {spectogram_type}")

    return spec_db, plot_data