|
@ -28,8 +28,8 @@ def waveform_to_examples(data, sample_rate, return_tensor=True): |
|
|
"""Converts audio waveform into an array of examples for VGGish. |
|
|
"""Converts audio waveform into an array of examples for VGGish. |
|
|
|
|
|
|
|
|
Args: |
|
|
Args: |
|
|
data: |
|
|
|
|
|
np.array of 2 dimension, second of which is number of channels. |
|
|
|
|
|
|
|
|
data: np.array of either one dimension (mono) or two dimensions |
|
|
|
|
|
(multi-channel, with the outer dimension representing channels). |
|
|
Each sample is generally expected to lie in the range [-1.0, +1.0], |
|
|
Each sample is generally expected to lie in the range [-1.0, +1.0], |
|
|
although this is not required. |
|
|
although this is not required. |
|
|
sample_rate: Sample rate of data. |
|
|
sample_rate: Sample rate of data. |
|
@ -43,10 +43,8 @@ def waveform_to_examples(data, sample_rate, return_tensor=True): |
|
|
|
|
|
|
|
|
""" |
|
|
""" |
|
|
# Convert to mono. |
|
|
# Convert to mono. |
|
|
if data.shape[1] > 1: |
|
|
|
|
|
|
|
|
if len(data.shape) > 1: |
|
|
data = np.mean(data, axis=1) |
|
|
data = np.mean(data, axis=1) |
|
|
else: |
|
|
|
|
|
data = data.squeeze(1) |
|
|
|
|
|
# Resample to the rate assumed by VGGish. |
|
|
# Resample to the rate assumed by VGGish. |
|
|
if sample_rate != vggish_params.SAMPLE_RATE: |
|
|
if sample_rate != vggish_params.SAMPLE_RATE: |
|
|
data = resampy.resample(data, sample_rate, vggish_params.SAMPLE_RATE) |
|
|
data = resampy.resample(data, sample_rate, vggish_params.SAMPLE_RATE) |
|
|