Implement with towhee.models.vggish

Signed-off-by: Jael Gu <mengjia.gu@zilliz.com>
3 years ago · e3f1b9d8a8
2 changed files with 15 additions and 38 deletions
--- a/pytorch/model.py
+++ b/pytorch/model.py
@ -12,12 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from torch import nn
 import torch
 import torch.nn as nn
 import numpy as np
 import sys
 from pathlib import Path
 from towhee.models.vggish.torch_vggish import VGG
 sys.path.append(str(Path(__file__).parent))
 import vggish_input
@ -26,39 +27,18 @@ class Model(nn.Module):
    """
    PyTorch model class
    """
    def __init__(self):
    def __init__(self, weights_path: str=None):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, 3, 1, 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, 3, 1, 1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(256, 512, 3, 1, 1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, 1, 1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2))
        self.embeddings = nn.Sequential(
            nn.Linear(512 * 24, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 128),
            #nn.ReLU(inplace=True)
        )
        self._model = VGG()
        if not weights_path:
            path = str(Path(__file__).parent)
            weights_path = path + '/vggish.pth'
        state_dict = torch.load(weights_path, map_location=torch.device('cpu'))
        self._model.load_state_dict(state_dict)
        self._model.eval()
    def forward(self, x):
        x = self.features(x).permute(0, 2, 3, 1).contiguous()
        x = x.view(x.size(0), -1)
        x = self.embeddings(x)
        return x
        return self._model(x)
    def preprocess(self, audio_path: str):
        audio_tensors = vggish_input.wavfile_to_examples(audio_path)
--- a/torch_vggish.py
+++ b/torch_vggish.py
@ -29,7 +29,7 @@ class TorchVggish(Operator):
    """
    """
    def __init__(self, framework: str = 'pytorch') -> None:
    def __init__(self, framework: str = 'pytorch', weights_path: str=None) -> None:
        super().__init__()
        if framework == 'pytorch':
            import importlib.util
@ -38,13 +38,10 @@ class TorchVggish(Operator):
            spec = importlib.util.spec_from_file_location(opname, path)
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)
        self.model = module.Model()
        path = str(Path(__file__).parent)
        self.model.load_state_dict(torch.load(path + '/pytorch/vggish.pth', map_location=torch.device('cpu')))
        self.model = module.Model(weights_path)
    def __call__(self, audio_path: str) -> NamedTuple('Outputs', [('embs', numpy.ndarray)]):
        audio_tensors = self.model.preprocess(audio_path)
        features = self.model.forward(audio_tensors)
        features = self.model._model(audio_tensors)
        Outputs = NamedTuple('Outputs', [('embs', numpy.ndarray)])
        return Outputs(features.detach().numpy())