Browse Source
Allow all int types of audio data
Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
main
1 changed files with
3 additions and
6 deletions
-
torch_vggish.py
|
@ -62,15 +62,12 @@ class Vggish(NNOperator): |
|
|
outs = features.to("cpu") |
|
|
outs = features.to("cpu") |
|
|
return [AudioOutput(outs.detach().numpy())] |
|
|
return [AudioOutput(outs.detach().numpy())] |
|
|
|
|
|
|
|
|
def preprocess(self, audio: Union[str, numpy.ndarray], sr: int = None): |
|
|
|
|
|
if audio.dtype == numpy.int32: |
|
|
|
|
|
samples = audio / 2147483648.0 |
|
|
|
|
|
elif audio.dtype == numpy.int16: |
|
|
|
|
|
samples = audio / 32768.0 |
|
|
|
|
|
|
|
|
def preprocess(self, audio: numpy.ndarray, sr: int = None): |
|
|
|
|
|
ii = numpy.iinfo(audio.dtype) |
|
|
|
|
|
samples = 2 * audio / (ii.max - ii.min + 1) |
|
|
return vggish_input.waveform_to_examples(samples, sr, return_tensor=True) |
|
|
return vggish_input.waveform_to_examples(samples, sr, return_tensor=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# if __name__ == '__main__': |
|
|
# if __name__ == '__main__': |
|
|
# encoder = Vggish() |
|
|
# encoder = Vggish() |
|
|
# |
|
|
# |
|
|