Update

Signed-off-by: shiyu22 <shiyu.chen@zilliz.com>
4 years ago · dad377dd45
9 changed files with 433 additions and 25 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,28 +1,35 @@
 # .gitattributes
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 # Source files
 # ============
 *.pxd    text diff=python
 *.py     text diff=python
 *.py3    text diff=python
 *.pyw    text diff=python
 *.pyx    text diff=python
 *.pyz    text diff=python
 *.pyi    text diff=python
 # Binary files
 # ============
 *.db     binary
 *.p      binary
 *.pkl    binary
 *.pickle binary
 *.pyc    binary export-ignore
 *.pyo    binary export-ignore
 *.pyd    binary
 # Jupyter notebook
 *.ipynb  text
 # Model files
 *.bin.* filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text 
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.tar.gz filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,209 @@
 ### Linux ###
 *~
 # temporary files which can be created if a process still has a handle open of a deleted file
 .fuse_hidden*
 # KDE directory preferences
 .directory
 # Linux trash folder which might appear on any partition or disk
 .Trash-*
 # .nfs files are created when an open file is removed but is still being accessed
 .nfs*
 ### OSX ###
 # General
 .DS_Store
 .AppleDouble
 .LSOverride
 # Icon must end with two \r
 Icon
 # Thumbnails
 ._*
 # Files that might appear in the root of a volume
 .DocumentRevisions-V100
 .fseventsd
 .Spotlight-V100
 .TemporaryItems
 .Trashes
 .VolumeIcon.icns
 .com.apple.timemachine.donotpresent
 # Directories potentially created on remote AFP share
 .AppleDB
 .AppleDesktop
 Network Trash Folder
 Temporary Items
 .apdisk
 ### Python ###
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 ### Windows ###
 # Windows thumbnail cache files
 Thumbs.db
 Thumbs.db:encryptable
 ehthumbs.db
 ehthumbs_vista.db
 # Dump file
 *.stackdump
 # Folder config file
 [Dd]esktop.ini
 # Recycle Bin used on file shares
 $RECYCLE.BIN/
 # Windows Installer files
 *.cab
 *.msi
 *.msix
 *.msm
 *.msp
 # Windows shortcuts
 *.lnk
--- a/README.md
+++ b/README.md
@ -1,2 +1,56 @@
 # vit-image-embedding
 # ViT Embedding Operator
 Authors: kyle he
 ## Overview
 The ViT(Vision Transformer) is a model for image classification that employs a Transformer-like architecture over patches of the image. This includes the use of Multi-Head Attention, Scaled Dot-Product Attention and other architectural features seen in the Transformer architecture traditionally used for NLP[1], which is trained on [imagenet dataset](https://image-net.org/download.php).
 ## Interface
 ```python
 __init__(self, model_name: str = 'vit_large_patch16_224',
                 framework: str = 'pytorch', weights_path: str = None)
 ```
 **Args:**
 - model_name:
  - the model name for embedding
  - supported types: `str`, for example 'vit_large_patch16_224'
 - framework:
  - the framework of the model
  - supported types: `str`, default is 'pytorch'
 - weights_path:
  - the weights path
  - supported types: `str`, default is None, using pretrained weights
 ```python
 __call__(self, img_path: str)
 ```
 **Args:**
 - img_path:
  - the input image path
  - supported types: `str`
 **Returns:**
 The Operator returns a tuple `Tuple[('embedding', numpy.ndarray)]` containing following fields:
 - feature_vector:
  - the embedding of the image
  - data type: `numpy.ndarray`
 ## Requirements
 You can get the required python package by [requirements.txt](./requirements.txt).
 ## How it works
 The `towhee/vit-embedding` Operator implements the function of image embedding, which can add to the pipeline. For example, it's the key Operator named embedding_model within [vit-embedding](https://hub.towhee.io/towhee/vit-embedding) pipeline.
 ## Reference
 [1].https://arxiv.org/abs/2010.11929
--- a/init.py
+++ b/init.py
@ -0,0 +1,21 @@
 # Copyright 2021 Zilliz. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 # For requirements.
 try:
    import timm
 except ModuleNotFoundError:
    os.system('pip install timm')
--- a/pytorch/init.py
+++ b/pytorch/init.py
@ -0,0 +1,13 @@
 # Copyright 2021 Zilliz. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/pytorch/model.py
+++ b/pytorch/model.py
@ -0,0 +1,39 @@
 # Copyright 2021 Zilliz. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import torch
 import timm
 class Model():
    """
    PyTorch model class
    """
    def __init__(self, model_name: str, weights_path: str):
        super().__init__()
        if weights_path:
            self._model = timm.create_model(model_name, checkpoint_path=weights_path, num_classes=0)
        else:
            self._model = timm.create_model(model_name, pretrained=True, num_classes=0)
        self._model.eval()
    def __call__(self, img_tensor: torch.Tensor):
        return self._model(img_tensor)
    def train(self):
        """
        For training model
        """
        pass
--- a/requirements.txt
+++ b/requirements.txt
--- a/vit_image_embedding.py
+++ b/vit_image_embedding.py
@ -0,0 +1,52 @@
 # Copyright 2021 Zilliz. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import sys
 from typing import NamedTuple
 from pathlib import Path
 from PIL import Image
 import torch
 from timm.data import resolve_data_config
 from timm.data.transforms_factory import create_transform
 from towhee.operator import Operator
 class VisionTransformerEmbeddingOperator(Operator):
    """
    Embedding extractor using ViT.
    Args:
        model_name (`string`):
            Model name.
        weights_path (`string`):
            Path to local weights.
    """
    def __init__(self, model_name: str = 'vit_large_patch16_224',
                 framework: str = 'pytorch', weights_path: str = None) -> None:
        super().__init__()
        sys.path.append(str(Path(__file__).parent))
        if framework == 'pytorch':
            from vit_embedding.pytorch.model import Model
        self.model = Model(model_name, weights_path)
        config = resolve_data_config({}, model=self.model._model)
        self.tfms = create_transform(**config)
    def __call__(self, img_path: str) -> NamedTuple('Outputs', [('embedding', torch.Tensor)]):
        Outputs = NamedTuple('Outputs', [('embedding', torch.Tensor)])
        img = self.tfms(Image.open(img_path)).unsqueeze(0)
        features = self.model(img)
        return Outputs(features.flatten().detach().numpy())
--- a/vit_image_embedding.yaml
+++ b/vit_image_embedding.yaml
@ -0,0 +1,13 @@
 name: 'vit-embedding'
 labels: 
  recommended_framework: pytorch1.2.0
  class: vit-embedding
  others: vit
 operator: 'towhee/vit-embedding'
 init:
  model_name: str
 call:
  input:
    img_path: str
  output:
    feature_vector: numpy.ndarray