diff --git a/requirements.txt b/requirements.txt index 589adbe..af1627b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,26 @@ absl-py==2.1.0 +asttokens==3.0.0 astunparse==1.6.3 audioread==3.0.1 certifi==2024.12.14 cffi==1.17.1 charset-normalizer==3.4.0 +contourpy==1.3.1 +cycler==0.12.1 decorator==5.1.1 +executing==2.1.0 flatbuffers==24.12.23 +fonttools==4.55.3 gast==0.6.0 google-pasta==0.2.0 grpcio==1.68.1 h5py==3.12.1 idna==3.10 iniconfig==2.0.0 +jedi==0.19.2 joblib==1.4.2 keras==3.7.0 +kiwisolver==1.4.8 lazy_loader==0.4 libclang==18.1.1 librosa==0.10.2.post1 @@ -21,6 +28,7 @@ llvmlite==0.43.0 Markdown==3.7 markdown-it-py==3.0.0 MarkupSafe==3.0.2 +matplotlib-inline==0.1.7 mdurl==0.1.2 ml-dtypes==0.4.1 msgpack==1.1.0 @@ -30,21 +38,29 @@ numpy==2.0.2 opt_einsum==3.4.0 optree==0.13.1 packaging==24.2 +parso==0.8.4 +pexpect==4.9.0 +pillow==11.0.0 platformdirs==4.3.6 pluggy==1.5.0 pooch==1.8.2 +prompt_toolkit==3.0.48 protobuf==5.29.2 +ptyprocess==0.7.0 +pure_eval==0.2.3 pycparser==2.22 Pygments==2.18.0 +pyparsing==3.2.0 pytest==8.3.4 +python-dateutil==2.9.0.post0 requests==2.32.3 rich==13.9.4 scikit-learn==1.6.0 -scipy==1.14.1 setuptools==75.6.0 six==1.17.0 soundfile==0.12.1 soxr==0.5.0.post1 +stack-data==0.6.3 tensorboard==2.18.0 tensorboard-data-server==0.7.2 tensorflow==2.18.0 @@ -52,8 +68,10 @@ tensorflow-hub==0.16.1 termcolor==2.5.0 tf_keras==2.18.0 threadpoolctl==3.5.0 +traitlets==5.14.3 typing_extensions==4.12.2 urllib3==2.3.0 +wcwidth==0.2.13 Werkzeug==3.1.3 wheel==0.45.1 wrapt==1.17.0 diff --git a/samples/requirements.txt b/samples/requirements.txt index dc35a38..af1627b 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -17,7 +17,6 @@ grpcio==1.68.1 h5py==3.12.1 idna==3.10 iniconfig==2.0.0 -ipython==8.31.0 jedi==0.19.2 joblib==1.4.2 keras==3.7.0 @@ -29,7 +28,6 @@ llvmlite==0.43.0 Markdown==3.7 markdown-it-py==3.0.0 MarkupSafe==3.0.2 -matplotlib==3.10.0 matplotlib-inline==0.1.7 mdurl==0.1.2 ml-dtypes==0.4.1 @@ -58,7 +56,6 @@ python-dateutil==2.9.0.post0 requests==2.32.3 rich==13.9.4 scikit-learn==1.6.0 -scipy==1.14.1 setuptools==75.6.0 six==1.17.0 soundfile==0.12.1 diff --git a/src/preprocessing/classify.py b/src/preprocessing/classify.py index a96ecf4..d1fc76c 100644 --- a/src/preprocessing/classify.py +++ b/src/preprocessing/classify.py @@ -1,13 +1,9 @@ import tensorflow as tf import tensorflow_hub as hub +import librosa import numpy as np import csv -import matplotlib.pyplot as plt -from IPython.display import Audio -from scipy.io import wavfile -from scipy import signal - model = hub.load('https://tfhub.dev/google/yamnet/1') #Find the name of the class with the top score when mean-aggregated across frames. @@ -24,27 +20,17 @@ def class_names_from_csv(class_map_scv_text): class_map_path = model.class_map_path().numpy() class_names = class_names_from_csv(class_map_path) -# Resample audio to 16K -def ensure_sample_rate(original_sample_rate, waveform, desired_sample_rate=16000): - """Resample waveform if required.""" - if original_sample_rate != desired_sample_rate: - desired_length = int(round(float(len(waveform)) / original_sample_rate * desired_sample_rate)) - waveform = signal.resample(waveform, desired_length) - return desired_sample_rate, waveform - -# wav_file_name = 'speech_whistling2.wav' wav_file_name = 'cafe_crowd_talk.wav' -sample_rate, wav_data = wavfile.read(wav_file_name, 'rb') -sample_rate, wav_data = ensure_sample_rate(sample_rate, wav_data) +waveform, sample_rate = librosa.load(wav_file_name, sr=16000) # Show some basic information about the audio. -duration = len(wav_data)/sample_rate +duration = len(waveform)/sample_rate print(f'Sample rate: {sample_rate} Hz') print(f'Total duration: {duration:.2f}s') -print(f'Size of the input: {len(wav_data)}') +print(f'Size of the input: {len(waveform)}') -# The wav_data needs to be normalized to values in [-1.0, 1.0] -waveform = wav_data / tf.int16.max +# The waveform needs to be normalized to values in [-1.0, 1.0] (librosa load already does this) +# No need to do this as librosa already normalizes# The wav_data needs to be normalized to values in [-1.0, 1.0] # Execute the Model # Check the output.