Remove unnecessary libraries and replace with librosa

- Removed unused imports for  and
- Replaced  resampling with  for waveform handling
This commit is contained in:
Joel Mathew Thomas
2024-12-26 00:49:10 +05:30
parent 1ddbbadfc8
commit cbebf7bd93
3 changed files with 25 additions and 24 deletions
+19 -1
View File
@@ -1,19 +1,26 @@
absl-py==2.1.0 absl-py==2.1.0
asttokens==3.0.0
astunparse==1.6.3 astunparse==1.6.3
audioread==3.0.1 audioread==3.0.1
certifi==2024.12.14 certifi==2024.12.14
cffi==1.17.1 cffi==1.17.1
charset-normalizer==3.4.0 charset-normalizer==3.4.0
contourpy==1.3.1
cycler==0.12.1
decorator==5.1.1 decorator==5.1.1
executing==2.1.0
flatbuffers==24.12.23 flatbuffers==24.12.23
fonttools==4.55.3
gast==0.6.0 gast==0.6.0
google-pasta==0.2.0 google-pasta==0.2.0
grpcio==1.68.1 grpcio==1.68.1
h5py==3.12.1 h5py==3.12.1
idna==3.10 idna==3.10
iniconfig==2.0.0 iniconfig==2.0.0
jedi==0.19.2
joblib==1.4.2 joblib==1.4.2
keras==3.7.0 keras==3.7.0
kiwisolver==1.4.8
lazy_loader==0.4 lazy_loader==0.4
libclang==18.1.1 libclang==18.1.1
librosa==0.10.2.post1 librosa==0.10.2.post1
@@ -21,6 +28,7 @@ llvmlite==0.43.0
Markdown==3.7 Markdown==3.7
markdown-it-py==3.0.0 markdown-it-py==3.0.0
MarkupSafe==3.0.2 MarkupSafe==3.0.2
matplotlib-inline==0.1.7
mdurl==0.1.2 mdurl==0.1.2
ml-dtypes==0.4.1 ml-dtypes==0.4.1
msgpack==1.1.0 msgpack==1.1.0
@@ -30,21 +38,29 @@ numpy==2.0.2
opt_einsum==3.4.0 opt_einsum==3.4.0
optree==0.13.1 optree==0.13.1
packaging==24.2 packaging==24.2
parso==0.8.4
pexpect==4.9.0
pillow==11.0.0
platformdirs==4.3.6 platformdirs==4.3.6
pluggy==1.5.0 pluggy==1.5.0
pooch==1.8.2 pooch==1.8.2
prompt_toolkit==3.0.48
protobuf==5.29.2 protobuf==5.29.2
ptyprocess==0.7.0
pure_eval==0.2.3
pycparser==2.22 pycparser==2.22
Pygments==2.18.0 Pygments==2.18.0
pyparsing==3.2.0
pytest==8.3.4 pytest==8.3.4
python-dateutil==2.9.0.post0
requests==2.32.3 requests==2.32.3
rich==13.9.4 rich==13.9.4
scikit-learn==1.6.0 scikit-learn==1.6.0
scipy==1.14.1
setuptools==75.6.0 setuptools==75.6.0
six==1.17.0 six==1.17.0
soundfile==0.12.1 soundfile==0.12.1
soxr==0.5.0.post1 soxr==0.5.0.post1
stack-data==0.6.3
tensorboard==2.18.0 tensorboard==2.18.0
tensorboard-data-server==0.7.2 tensorboard-data-server==0.7.2
tensorflow==2.18.0 tensorflow==2.18.0
@@ -52,8 +68,10 @@ tensorflow-hub==0.16.1
termcolor==2.5.0 termcolor==2.5.0
tf_keras==2.18.0 tf_keras==2.18.0
threadpoolctl==3.5.0 threadpoolctl==3.5.0
traitlets==5.14.3
typing_extensions==4.12.2 typing_extensions==4.12.2
urllib3==2.3.0 urllib3==2.3.0
wcwidth==0.2.13
Werkzeug==3.1.3 Werkzeug==3.1.3
wheel==0.45.1 wheel==0.45.1
wrapt==1.17.0 wrapt==1.17.0
-3
View File
@@ -17,7 +17,6 @@ grpcio==1.68.1
h5py==3.12.1 h5py==3.12.1
idna==3.10 idna==3.10
iniconfig==2.0.0 iniconfig==2.0.0
ipython==8.31.0
jedi==0.19.2 jedi==0.19.2
joblib==1.4.2 joblib==1.4.2
keras==3.7.0 keras==3.7.0
@@ -29,7 +28,6 @@ llvmlite==0.43.0
Markdown==3.7 Markdown==3.7
markdown-it-py==3.0.0 markdown-it-py==3.0.0
MarkupSafe==3.0.2 MarkupSafe==3.0.2
matplotlib==3.10.0
matplotlib-inline==0.1.7 matplotlib-inline==0.1.7
mdurl==0.1.2 mdurl==0.1.2
ml-dtypes==0.4.1 ml-dtypes==0.4.1
@@ -58,7 +56,6 @@ python-dateutil==2.9.0.post0
requests==2.32.3 requests==2.32.3
rich==13.9.4 rich==13.9.4
scikit-learn==1.6.0 scikit-learn==1.6.0
scipy==1.14.1
setuptools==75.6.0 setuptools==75.6.0
six==1.17.0 six==1.17.0
soundfile==0.12.1 soundfile==0.12.1
+6 -20
View File
@@ -1,13 +1,9 @@
import tensorflow as tf import tensorflow as tf
import tensorflow_hub as hub import tensorflow_hub as hub
import librosa
import numpy as np import numpy as np
import csv import csv
import matplotlib.pyplot as plt
from IPython.display import Audio
from scipy.io import wavfile
from scipy import signal
model = hub.load('https://tfhub.dev/google/yamnet/1') model = hub.load('https://tfhub.dev/google/yamnet/1')
#Find the name of the class with the top score when mean-aggregated across frames. #Find the name of the class with the top score when mean-aggregated across frames.
@@ -24,27 +20,17 @@ def class_names_from_csv(class_map_scv_text):
class_map_path = model.class_map_path().numpy() class_map_path = model.class_map_path().numpy()
class_names = class_names_from_csv(class_map_path) class_names = class_names_from_csv(class_map_path)
# Resample audio to 16K
def ensure_sample_rate(original_sample_rate, waveform, desired_sample_rate=16000):
"""Resample waveform if required."""
if original_sample_rate != desired_sample_rate:
desired_length = int(round(float(len(waveform)) / original_sample_rate * desired_sample_rate))
waveform = signal.resample(waveform, desired_length)
return desired_sample_rate, waveform
# wav_file_name = 'speech_whistling2.wav'
wav_file_name = 'cafe_crowd_talk.wav' wav_file_name = 'cafe_crowd_talk.wav'
sample_rate, wav_data = wavfile.read(wav_file_name, 'rb') waveform, sample_rate = librosa.load(wav_file_name, sr=16000)
sample_rate, wav_data = ensure_sample_rate(sample_rate, wav_data)
# Show some basic information about the audio. # Show some basic information about the audio.
duration = len(wav_data)/sample_rate duration = len(waveform)/sample_rate
print(f'Sample rate: {sample_rate} Hz') print(f'Sample rate: {sample_rate} Hz')
print(f'Total duration: {duration:.2f}s') print(f'Total duration: {duration:.2f}s')
print(f'Size of the input: {len(wav_data)}') print(f'Size of the input: {len(waveform)}')
# The wav_data needs to be normalized to values in [-1.0, 1.0] # The waveform needs to be normalized to values in [-1.0, 1.0] (librosa load already does this)
waveform = wav_data / tf.int16.max # No need to do this as librosa already normalizes# The wav_data needs to be normalized to values in [-1.0, 1.0]
# Execute the Model # Execute the Model
# Check the output. # Check the output.