# Copyright 2021 Zilliz. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import torch import torch.nn as nn import numpy as np import sys from pathlib import Path sys.path.append(str(Path(__file__).parent)) import vggish_input class Model(nn.Module): """ PyTorch model class """ def __init__(self): super().__init__() self.features = nn.Sequential( nn.Conv2d(1, 64, 3, 1, 1), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2), nn.Conv2d(64, 128, 3, 1, 1), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2), nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, 1, 1), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2), nn.Conv2d(256, 512, 3, 1, 1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, 1, 1), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2)) self.embeddings = nn.Sequential( nn.Linear(512 * 24, 4096), nn.ReLU(inplace=True), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, 128), #nn.ReLU(inplace=True) ) def forward(self, x): x = self.features(x).permute(0, 2, 3, 1).contiguous() x = x.view(x.size(0), -1) x = self.embeddings(x) return x def preprocess(self, audio_path: str): audio_tensors = vggish_input.wavfile_to_examples(audio_path) return audio_tensors def train(self): """ For training model """ pass