Merge pull request #8 from joelmathewthomas/feature/spectrogram

Add spectogram package
2025-01-09 11:09:07 +05:30
parent 6795878bab 6a19dbed20
commit 740827e7a9
4 changed files with 68 additions and 0 deletions
@@ -38,6 +38,7 @@ llvmlite==0.43.0
 Markdown==3.7
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
 matplotlib==3.10.0
 matplotlib-inline==0.1.7
 mdurl==0.1.2
 ml-dtypes==0.4.1
@@ -77,6 +78,9 @@ pure_eval==0.2.3
 pycparser==2.22
 Pygments==2.18.0
 pyparsing==3.2.0
 PyQt6==6.8.0
 PyQt6-Qt6==6.8.1
 PyQt6_sip==13.9.1
 pytest==8.3.4
 python-dateutil==2.9.0.post0
 PyYAML==6.0.2
@@ -0,0 +1,12 @@
 # __init__.py
 import logging
 from datetime import datetime
 # Configure logging
 logging.basicConfig(
    format='%(asctime)s : %(message)s',
    level = logging.INFO
 )
 logging.info("freq-split-enhance/spectogram package has been imported.")
@@ -0,0 +1,18 @@
 import matplotlib.pyplot as plt
 import librosa.display
 import numpy as np
 def display_spectrogram(spec_array: np.ndarray, plot_data: dict):
    """
    Displays a spectrogram array using Matplotlib.
    Args:
        spec_array (np.ndarray): Spectrogram array (in decibels).
        plot_data (dict): Metadata for plotting (e.g., sr, x_axis, y_axis).
    """
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spec_array, sr=plot_data['sr'], x_axis=plot_data['x_axis'], y_axis=plot_data['y_axis'])
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"Spectrogram ({plot_data['y_axis'].upper()} axis)")
    plt.tight_layout()
    plt.show()
@@ -0,0 +1,34 @@
 import librosa
 import numpy as np
 def generate_spectogram(audio_file: str, spectogram_type: str = 'mel', sr: int = 22050):
    """
    Generates a spectrogram array from an audio file.
    Args:
        audio_file (str): Path to the input audio file.
        spectrogram_type (str): Type of spectrogram ('stft', 'mel'). Default is 'mel'.
        sr (int): Sampling rate for audio. Default is 22050.
    Returns:
        tuple: A tuple containing:
            - np.ndarray: Spectrogram array (in decibels).
            - dict: Metadata for plotting (sr, x_axis, y_axis).
    """
    # Load the audio file
    waveform, sr = librosa.load(audio_file, sr=sr)
    # Create the spectogram
    if spectogram_type == 'mel':
        spec = librosa.feature.melspectrogram(y=waveform, sr=sr)
        spec_db = librosa.power_to_db(spec, ref=np.max) # Convert to decibels
        plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'mel'}
    elif spectogram_type == 'stft':
        spec = np.abs(librosa.stft(waveform))
        spec_db = librosa.amplitude_to_db(spec, ref=np.max)
        plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'log'}
    else:
        raise ValueError(f"Unsupported spectogram type: {spectogram_type}")
    return spec_db, plot_data