Merge pull request #8 from joelmathewthomas/feature/spectrogram

Add spectogram package
2025-01-09 11:09:07 +05:30
parent 6795878bab 6a19dbed20
commit 740827e7a9
4 changed files with 68 additions and 0 deletions
@@ -38,6 +38,7 @@ llvmlite==0.43.0
 Markdown==3.7
 markdown-it-py==3.0.0
 MarkupSafe==3.0.2
+matplotlib==3.10.0
 matplotlib-inline==0.1.7
 mdurl==0.1.2
 ml-dtypes==0.4.1
@@ -77,6 +78,9 @@ pure_eval==0.2.3
 pycparser==2.22
 Pygments==2.18.0
 pyparsing==3.2.0
+PyQt6==6.8.0
+PyQt6-Qt6==6.8.1
+PyQt6_sip==13.9.1
 pytest==8.3.4
 python-dateutil==2.9.0.post0
 PyYAML==6.0.2
@@ -0,0 +1,12 @@
+# __init__.py
+
+import logging
+from datetime import datetime
+
+# Configure logging
+logging.basicConfig(
+    format='%(asctime)s : %(message)s',
+    level = logging.INFO
+)
+
+logging.info("freq-split-enhance/spectogram package has been imported.")
@@ -0,0 +1,18 @@
+import matplotlib.pyplot as plt
+import librosa.display
+import numpy as np
+
+def display_spectrogram(spec_array: np.ndarray, plot_data: dict):
+    """
+    Displays a spectrogram array using Matplotlib.
+
+    Args:
+        spec_array (np.ndarray): Spectrogram array (in decibels).
+        plot_data (dict): Metadata for plotting (e.g., sr, x_axis, y_axis).
+    """
+    plt.figure(figsize=(10, 4))
+    librosa.display.specshow(spec_array, sr=plot_data['sr'], x_axis=plot_data['x_axis'], y_axis=plot_data['y_axis'])
+    plt.colorbar(format='%+2.0f dB')
+    plt.title(f"Spectrogram ({plot_data['y_axis'].upper()} axis)")
+    plt.tight_layout()
+    plt.show()
@@ -0,0 +1,34 @@
+import librosa
+import numpy as np
+
+def generate_spectogram(audio_file: str, spectogram_type: str = 'mel', sr: int = 22050):
+    """
+    Generates a spectrogram array from an audio file.
+
+    Args:
+        audio_file (str): Path to the input audio file.
+        spectrogram_type (str): Type of spectrogram ('stft', 'mel'). Default is 'mel'.
+        sr (int): Sampling rate for audio. Default is 22050.
+
+    Returns:
+        tuple: A tuple containing:
+            - np.ndarray: Spectrogram array (in decibels).
+            - dict: Metadata for plotting (sr, x_axis, y_axis).
+    """
+
+    # Load the audio file
+    waveform, sr = librosa.load(audio_file, sr=sr)
+
+    # Create the spectogram
+    if spectogram_type == 'mel':
+        spec = librosa.feature.melspectrogram(y=waveform, sr=sr)
+        spec_db = librosa.power_to_db(spec, ref=np.max) # Convert to decibels
+        plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'mel'}
+    elif spectogram_type == 'stft':
+        spec = np.abs(librosa.stft(waveform))
+        spec_db = librosa.amplitude_to_db(spec, ref=np.max)
+        plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'log'}
+    else:
+        raise ValueError(f"Unsupported spectogram type: {spectogram_type}")
+
+    return spec_db, plot_data