Merge pull request #8 from joelmathewthomas/feature/spectrogram

Add spectogram package
This commit is contained in:
Joel Mathew Thomas
2025-01-09 11:09:07 +05:30
committed by GitHub
4 changed files with 68 additions and 0 deletions
+4
View File
@@ -38,6 +38,7 @@ llvmlite==0.43.0
Markdown==3.7
markdown-it-py==3.0.0
MarkupSafe==3.0.2
matplotlib==3.10.0
matplotlib-inline==0.1.7
mdurl==0.1.2
ml-dtypes==0.4.1
@@ -77,6 +78,9 @@ pure_eval==0.2.3
pycparser==2.22
Pygments==2.18.0
pyparsing==3.2.0
PyQt6==6.8.0
PyQt6-Qt6==6.8.1
PyQt6_sip==13.9.1
pytest==8.3.4
python-dateutil==2.9.0.post0
PyYAML==6.0.2
+12
View File
@@ -0,0 +1,12 @@
# __init__.py
import logging
from datetime import datetime
# Configure logging
logging.basicConfig(
format='%(asctime)s : %(message)s',
level = logging.INFO
)
logging.info("freq-split-enhance/spectogram package has been imported.")
+18
View File
@@ -0,0 +1,18 @@
import matplotlib.pyplot as plt
import librosa.display
import numpy as np
def display_spectrogram(spec_array: np.ndarray, plot_data: dict):
"""
Displays a spectrogram array using Matplotlib.
Args:
spec_array (np.ndarray): Spectrogram array (in decibels).
plot_data (dict): Metadata for plotting (e.g., sr, x_axis, y_axis).
"""
plt.figure(figsize=(10, 4))
librosa.display.specshow(spec_array, sr=plot_data['sr'], x_axis=plot_data['x_axis'], y_axis=plot_data['y_axis'])
plt.colorbar(format='%+2.0f dB')
plt.title(f"Spectrogram ({plot_data['y_axis'].upper()} axis)")
plt.tight_layout()
plt.show()
+34
View File
@@ -0,0 +1,34 @@
import librosa
import numpy as np
def generate_spectogram(audio_file: str, spectogram_type: str = 'mel', sr: int = 22050):
"""
Generates a spectrogram array from an audio file.
Args:
audio_file (str): Path to the input audio file.
spectrogram_type (str): Type of spectrogram ('stft', 'mel'). Default is 'mel'.
sr (int): Sampling rate for audio. Default is 22050.
Returns:
tuple: A tuple containing:
- np.ndarray: Spectrogram array (in decibels).
- dict: Metadata for plotting (sr, x_axis, y_axis).
"""
# Load the audio file
waveform, sr = librosa.load(audio_file, sr=sr)
# Create the spectogram
if spectogram_type == 'mel':
spec = librosa.feature.melspectrogram(y=waveform, sr=sr)
spec_db = librosa.power_to_db(spec, ref=np.max) # Convert to decibels
plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'mel'}
elif spectogram_type == 'stft':
spec = np.abs(librosa.stft(waveform))
spec_db = librosa.amplitude_to_db(spec, ref=np.max)
plot_data = {'sr': sr, 'x_axis': 'time', 'y_axis': 'log'}
else:
raise ValueError(f"Unsupported spectogram type: {spectogram_type}")
return spec_db, plot_data