vit-image-embedding/vit_image_embedding.py


								# Copyright 2021 Zilliz. All rights reserved.

								#

								# Licensed under the Apache License, Version 2.0 (the "License");

								# you may not use this file except in compliance with the License.

								# You may obtain a copy of the License at

								#

								#     http://www.apache.org/licenses/LICENSE-2.0

								#

								# Unless required by applicable law or agreed to in writing, software

								# distributed under the License is distributed on an "AS IS" BASIS,

								# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

								# See the License for the specific language governing permissions and

								# limitations under the License.


								import sys

								from typing import NamedTuple

								from pathlib import Path

								from PIL import Image

								import torch

								import numpy


								from towhee.operator import Operator


								class VitImageEmbedding(Operator):

								    """

								    Embedding extractor using ViT.

								    Args:

								        model_name (`string`):

								            Model name.

								        weights_path (`string`):

								            Path to local weights.

								    """


								    def __init__(self, model_name: str = 'vit_large_patch16_224',

								                 framework: str = 'pytorch', weights_path: str = None) -> None:

								        super().__init__()

								        sys.path.append(str(Path(__file__).parent))

								        if framework == 'pytorch':

								            import pytorch

								            from pytorch.model import Model

								        self.model = Model(model_name, weights_path)

								        config = pytorch.resolve_data_config({}, model=self.model._model)

								        self.tfms = pytorch.create_transform(**config)


								    def __call__(self, img_path: str) -> NamedTuple('Outputs', [('feature_vector', numpy.ndarray)]):

								        Outputs = NamedTuple('Outputs', [('feature_vector', numpy.ndarray)])

								        img = self.tfms(Image.open(img_path)).unsqueeze(0)

								        features = self.model(img)

								        return Outputs(features.flatten().detach().numpy())