diff --git a/LICENSE.pdf b/LICENSE.pdf new file mode 100644 index 0000000..eb54420 Binary files /dev/null and b/LICENSE.pdf differ diff --git a/NOTICE.txt b/NOTICE.txt new file mode 100644 index 0000000..5f93568 --- /dev/null +++ b/NOTICE.txt @@ -0,0 +1,232 @@ +=============================================================================== + +This file contains certain notices of software components included with the +software that Qualcomm Innovation Center, Inc. ("QuIC") and/or its subsidiaries +are required to provide you. Except where prohibited by the open source license, +the content of this file is provided solely to satisfy QuIC's and/or its +subsidiaries’ attribution and notice requirements, and your use of these +software components together with the software of QuIC and/or its subsidiaries +("Software") is subject to the terms of your license from QuIC and/or its +subsidiaries, as the case may be. Compliance with all copyright laws and software +license agreements included in the notice section of this file are the +responsibility of the user. Except as may be granted by separate express written +agreement, this file provides no license to any patents, trademarks, copyrights, +or other intellectual property of Qualcomm Incorporated or any of its +subsidiaries. + +Software provided with this notice is NOT A CONTRIBUTION to any open source +project. If alternative licensing is available for any of the components with +licenses or attributions provided below, a license choice is made for receiving +such code by QuIC and/or its subsidiaries, as the case may be. + +Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. + +Qualcomm is a trademark of Qualcomm Incorporated, registered in the United States +and other countries. All Qualcomm Incorporated trademarks are used with +permission. Other products and brand names may be trademarks or registered +trademarks of their respective owners. + +This software may be subject to U.S. and international export, re-export or +transfer (export) laws. Diversion contrary to U.S. and international law is +strictly prohibited. + +=================================================================== + +# ============================================================================== +# Copyright 2016-2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +""" +Copyright 2017-2018 Fizyr (https://fizyr.com) +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +# ============================================================================== +# Copyright 2020 Martin Krasser (https://github.com/krasserm) All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# ============================================================================== +# Copyright 2020 Ji Lin (https://github.com/tonylins) All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# ============================================================================== +# Copyright 2020 Ross Wightman (https://github.com/rwightman) All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +MIT License + +Copyright (c) 2019 Hao Gao + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +=============================================================================== +MIT License + +Copyright (c) 2018 Pyjcsx + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +# ============================================================================== +# Copyright (c) andreas (https://github.com/andreas128). All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# ============================================================================== +# Copyright (c) MMEditing Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +MIT License + +Copyright (c) 2019 Bin Zhang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +=============================================================================== +MIT License + +Copyright (c) 2017 Sean Naren + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index cb5c3c4..dbe5b3e 100644 --- a/README.md +++ b/README.md @@ -1 +1,329 @@ +![Qualcomm Innovation Center, Inc.](images/logo-quic-on@h68.png) + # Model Zoo for AI Model Efficiency Toolkit + +We provide a collection of popular neural network models and compare their floating point (FP32) and quantized (INT8 weights/activations or INT8 weights/INT16 activations) performance. Results demonstrate that quantized models can provide good accuracy, comparable to floating point (FP32) models. Together with results, we also provide recipes for users to quantize floating-point models using the [AI Model Efficiency ToolKit (AIMET)](https://github.com/quic/aimet). + + +## Table of Contents +- [Introduction](#introduction) +- [Tensorflow Models](#tensorflow-models) + - [Model Zoo](#model-zoo) + - [Detailed Results](#detailed-results) +- [PyTorch Models](#pytorch-models) + - [Model Zoo](#pytorch-model-zoo) + - [Detailed Results](#pytorch-detailed-results) +- [Examples](#examples) +- [Team](#team) +- [License](#license) + +## Introduction +Quantized inference is significantly faster than floating-point inference, and enables models to run in a power-efficient manner on mobile and edge devices. We use AIMET, a library that includes state-of-the-art techniques for quantization, to quantize various models available in [TensorFlow](https://tensorflow.org) and [PyTorch](https://pytorch.org) frameworks. The list of models is provided in the sections below. + +An original FP32 source model is quantized either using post-training quantization (PTQ) or Quantization-Aware-Training (QAT) technique available in AIMET. Example scripts for evaluation are provided for each model. When PTQ is needed, the evaluation script performs PTQ before evaluation. Wherever QAT is used, the fine-tuned model checkpoint is also provided. + +## Tensorflow Models + +### Model Zoo + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NetworkModel Source [1]Floating Pt (FP32) Model [2]Quantized Model [3]Results [4]Documentation
ResNet-50 (v1)GitHub RepoPretrained ModelSee Documentation(ImageNet) Top-1 Accuracy
FP32: 75.21%
INT8: 74.96%
ResNet50.md
MobileNet-v2-1.4GitHub RepoPretrained ModelQuantized Model(ImageNet) Top-1 Accuracy
FP32: 75%
INT8: 74.21%
MobileNetV2.md
EfficientNet LiteGitHub RepoPretrained Model + Quantized Model(ImageNet) Top-1 Accuracy
FP32: 74.93%
INT8: 74.99%
EfficientNetLite.md
SSD MobileNet-v2GitHub RepoPretrained ModelSee Example(COCO) Mean Avg. Precision (mAP)
FP32: 0.2469
INT8: 0.2456
SSDMobileNetV2.md
RetinaNetGitHub RepoPretrained ModelSee Example (COCO) mAP
FP32: 0.35
INT8: 0.349
Detailed Results
RetinaNet.md
Pose EstimationBased on Ref.Based on Ref.Quantized Model(COCO) mAP
FP32: 0.383
INT8: 0.379,
Mean Avg.Recall (mAR)
FP32: 0.452
INT8: 0.446
PoseEstimation.md
SRGANGitHub RepoPretrained ModelSee Example(BSD100) PSNR/SSIM
FP32: 25.45/0.668
INT8: 24.78/0.628
INT8W/INT16Act.: 25.41/0.666
Detailed Results
SRGAN.md
+ +*[1]* Original FP32 model source +*[2]* FP32 model checkpoint +*[3]* Quantized Model: For models quantized with post-training technique, refers to FP32 model which can then be quantized using AIMET. For models optimized with QAT, refers to model checkpoint with fine-tuned weights. 8-bit weights and activations are typically used. For some models, 8-bit weights and 16-bit activations (INT8W/INT16Act.) are used to further improve performance of post-training quantization. +*[4]* Results comparing float and quantized performance +*[5]* Script for quantized evaluation using the model referenced in “Quantized Model” column + +### Detailed Results +#### RetinaNet +(COCO dataset) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Average Precision/Recall @[ IoU | area | maxDets] FP32 INT8
Average Precision @[ 0.50:0.95 | all | 100 ] 0.350 0.349
Average Precision @[ 0.50 | all | 100 ] 0.537 0.536
Average Precision @[ 0.75 | all | 100 ] 0.374 0.372
Average Precision @[ 0.50:0.95 | small | 100 ] 0.191 0.187
Average Precision @[ 0.50:0.95 | medium | 100 ] 0.383 0.381
Average Precision @[ 0.50:0.95 | large | 100 ] 0.472 0.472
Average Recall @[ 0.50:0.95 | all | 1 ] 0.306 0.305
Average Recall @[0.50:0.95 | all | 10 ] 0.491 0.490
Average Recall @[ 0.50:0.95 | all |100 ] 0.533 0.532
Average Recall @[ 0.50:0.95 | small | 100 ] 0.3450.341
Average Recall @[ 0.50:0.95 | medium | 100 ] 0.5770.577
Average Recall @[ 0.50:0.95 | large | 100 ] 0.6810.679
+ +#### SRGAN + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelDatasetPSNRSSIM
FP32Set5/Set14/BSD10029.17/26.17/25.450.853/0.719/0.668
INT8/ACT8Set5/Set14/BSD10028.31/25.55/24.780.821/0.684/0.628
INT8/ACT16Set5/Set14/BSD10029.12/26.15/25.410.851/0.719/0.666
+ + +## PyTorch Models +### Model Zoo + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NetworkModel Source [1]Floating Pt (FP32) Model [2]Quantized Model [3]Results [4]Documentation
MobileNetV2GitHub RepoPretrained ModelSee Example(ImageNet) Top-1 Accuracy
FP32: 71.67%
INT8: 71.14%
MobileNetV2.md
EfficientNet-lite0GitHub RepoPretrained ModelSee Example(ImageNet) Top-1 Accuracy
FP32: 75.42%
INT8: 74.49%
EfficientNet-lite0.md
DeepLabV3+GitHub RepoPretrained ModelSee Example(PascalVOC) mIOU
FP32: 72.32%
INT8: 72.08%
DeepLabV3.md
MobileNetV2-SSD-LiteGitHub RepoPretrained ModelSee Example(PascalVOC) mAP
FP32: 68.7%
INT8: 68.6%
MobileNetV2-SSD-lite.md
Pose EstimationBased on Ref.Based on Ref.Quantized Model(COCO) mAP
FP32: 0.364
INT8: 0.359
mAR
FP32: 0.436
INT8: 0.432
PoseEstimation.md
SRGANGitHub RepoPretrained Model (older version from here)N/A(BSD100) PSNR/SSIM
FP32: 25.51/0.653
INT8: 25.5/0.648
Detailed Results
SRGAN.md
DeepSpeech2GitHub RepoPretrained ModelSee Example(Librispeech Test Clean) WER
FP32
9.92%
INT8: 10.22%
DeepSpeech2.md
+ +*[1]* Original FP32 model source +*[2]* FP32 model checkpoint +*[3]* Quantized Model: For models quantized with post-training technique, refers to FP32 model which can then be quantized using AIMET. For models optimized with QAT, refers to model checkpoint with fine-tuned weights. 8-bit weights and activations are typically used. For some models, 8-bit weights and 16-bit weights are used to further improve performance of post-training quantization. +*[4]* Results comparing float and quantized performance +*[5]* Script for quantized evaluation using the model referenced in “Quantized Model” column + +### Detailed Results + +#### SRGAN Pytorch + + + + + + + + + + + + + + + + + + + +
ModelDatasetPSNRSSIM
FP32Set5/Set14/BSD10029.93/N/A/25.510.851/N/A/0.653
INT8Set5/Set14/BSD10029.86/N/A/25.550.845/N/A/0.648
+ + +## Examples + +### Install AIMET +Before you can run the example script for a specific model, you need to install the AI Model Efficiency ToolKit (AIMET) software. Please see this [Getting Started](https://github.com/quic/aimet#getting-started) page for an overview. Then install AIMET and its dependencies using these [Installation instructions](https://github.com/quic/aimet/blob/1.13.0/packaging/INSTALL.txt). + +> **NOTE:** To obtain the exact version of AIMET software that was used to test this model zoo, please install release [1.13.0](https://github.com/quic/aimet/releases/tag/1.13.0) when following the above instructions. + +### Running the scripts +Download the necessary datasets and code required to run the example for the model of interest. The examples run quantized evaluation and if necessary apply AIMET techniques to improve quantized model performance. They generate the final accuracy results noted in the table above. Refer to the Docs for [TensorFlow](zoo_tensorflow/Docs) or [PyTorch](zoo_torch/Docs) folder to access the documentation and procedures for a specific model. + +## Team +AIMET Model Zoo is a project maintained by Qualcomm Innovation Center, Inc. + +## License +Please see the [LICENSE file](LICENSE.pdf) for details. diff --git a/images/logo-quic-on@h68.png b/images/logo-quic-on@h68.png new file mode 100644 index 0000000..a83b3d2 Binary files /dev/null and b/images/logo-quic-on@h68.png differ diff --git a/zoo_tensorflow/Docs/EfficientNetLite.md b/zoo_tensorflow/Docs/EfficientNetLite.md new file mode 100755 index 0000000..6fb4b51 --- /dev/null +++ b/zoo_tensorflow/Docs/EfficientNetLite.md @@ -0,0 +1,49 @@ +# EfficientNet Lite-0 + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies +### Setup TensorFlow TPU repo +- Clone the [TensorFlow TPU repo](https://github.com/tensorflow/tpu) + `git clone https://github.com/tensorflow/tpu.git` +- Append the repo location to your `PYTHONPATH` with the following: + `export PYTHONPATH=$PYTHONPATH:/tpu/models/official/efficientnet` + +## Obtaining model checkpoint and dataset +- The original EfficientNet Lite-0 checkpoint can be downloaded here: + - https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet/lite +- Optimized EfficientNet Lite-0 checkpoint can be downloaded from [Releases](/../../releases). +- ImageNet can be downloaded here: + - http://www.image-net.org/ + + +## Usage +- To run evaluation with QuantSim in AIMET, use the following +```bash +python efficientnet_quanteval.py + --model-name=efficientnet-lite0 + --checkpoint-path= + --imagenet-eval-glob= + --imagenet-eval-label= + --quantsim-config-file= +``` + +- If you are using a model checkpoint which has Batch Norms already folded (such as the optimized model checkpoint), please specify the `--ckpt-bn-folded` flag: +```bash +python efficientnet_quanteval.py + --model-name=efficientnet-lite0 + --checkpoint-path= + --imagenet-eval-glob= + --imagenet-eval-label= + --quantsim-config-file= + --ckpt-bn-folded +``` + +## Quantizer Op Assumptions +In the evaluation script included, we have used the default config file, which configures the quantizer ops with the following assumptions: +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized +- Operations which shuffle data such as reshape or transpose do not require additional quantizers \ No newline at end of file diff --git a/zoo_tensorflow/Docs/MobileNetV2.md b/zoo_tensorflow/Docs/MobileNetV2.md new file mode 100755 index 0000000..4db23cd --- /dev/null +++ b/zoo_tensorflow/Docs/MobileNetV2.md @@ -0,0 +1,50 @@ +# Mobilenetv2 1.4 + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies + +### Setup TensorFlow Models repo +- Clone the [TensorFlow Models repo](https://github.com/tensorflow/models) + `git clone https://github.com/tensorflow/models.git` + +- checkout this commit id: + `git checkout 104488e40bc2e60114ec0212e4e763b08015ef97` + +- Append the repo location to your `PYTHONPATH` with the following: + `export PYTHONPATH=$PYTHONPATH:/research/slim` + +## Obtaining model checkpoint and dataset +- The optimized Mobilenet v2 1.4 checkpoint can be downloaded from [Releases](/../../releases). +- ImageNet can be downloaded here: + - http://www.image-net.org/ + +## Usage +- To run evaluation with QuantSim in AIMET, use the following: +```bash +python mobilenet_v2_140_quanteval.py \ + --model-name=mobilenet_v2_140 \ + --checkpoint-path= \ + --dataset-dir= \ + --quantsim-config-file= +``` + +- If you are using a model checkpoint which has Batch Norms already folded (such as the optimized model checkpoint), please specify the `--ckpt-bn-folded` flag: + +```bash +python mobilenet_v2_140_quanteval.py \ + --model-name=mobilenet_v2_140 \ + --checkpoint-path= \ + --dataset-dir= \ + --quantsim-config-file= + --ckpt-bn-folded +``` + +## Quantizer Op Assumptions +In the evaluation script included, we have used the default config file, which configures the quantizer ops with the following assumptions: +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized +- Operations which shuffle data such as reshape or transpose do not require additional quantizers \ No newline at end of file diff --git a/zoo_tensorflow/Docs/PoseEstimation.md b/zoo_tensorflow/Docs/PoseEstimation.md new file mode 100644 index 0000000..da4a076 --- /dev/null +++ b/zoo_tensorflow/Docs/PoseEstimation.md @@ -0,0 +1,53 @@ +# Pose Estimation + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies + +| Package | Version | +| :---------: | :-----: | +| pycocotools | 2.0.2 | +| scipy | 1.1.0 | + +### Adding dependencies within Docker Image + +- If you are using a docker image, e.g. AIMET development docker, please add the following lines to the Dockerfile and rebuild the Docker image + +```dockerfile +RUN pip install git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI +RUN pip install scipy==1.1.0 +``` + +## Obtaining model weights and dataset + +- The pose estimation model can be downloaded here: + - + pose_estimation.tar.gz + +- This model has been compressed and its weights are optimized by applying DFQ + (Data Free Quantization). + +- coco dataset can be downloaded here: + - COCO 2014 Val images + - + COCO 2014 Train/Val annotations + + + +## Usage + +- The program requires two arguments to run: model_meta_file_dir, coco_path. These are positional + arguments so you must specify the arguments in order. + + ```bash + python ./examples/pose_estimation_quanteval.py + ``` + +- We only support evaluation on COCO 2014 val images with person keypoints. + +- The results reported was evaluation on the whole dataset, which contains over 40k + images and takes 15+ hours on a single RTX 2080Ti GPU. So in case you want to run + a faster evaluation, specifiy num_imgs argument to the second call with a + small number to evaluate_session so that you run evaluation only on a + partial dataset. \ No newline at end of file diff --git a/zoo_tensorflow/Docs/ResNet50.md b/zoo_tensorflow/Docs/ResNet50.md new file mode 100755 index 0000000..c7721bc --- /dev/null +++ b/zoo_tensorflow/Docs/ResNet50.md @@ -0,0 +1,62 @@ +# ResNet 50 + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies + +### Setup TensorFlow Models repo + +- Clone the [TensorFlow Models repo](https://github.com/tensorflow/models) + + `git clone https://github.com/tensorflow/models.git` + +- checkout this commit id: + + `git checkout 104488e40bc2e60114ec0212e4e763b08015ef97` + +- Append the repo location to your `PYTHONPATH` with the following: + + `export PYTHONPATH=$PYTHONPATH:/research/slim` + + + +## Obtaining model checkpoint and dataset + +- The optimized ResNet 50 checkpoint can be downloaded from [Releases](/../../releases). + +- ImageNet can be downloaded here: + - http://www.image-net.org/ + + + +## Usage + +- To run evaluation with QuantSim in AIMET, use the following + +```bash +python resnet_v1_50.py \ + --model-name=resnet_v1_50 \ + --checkpoint-path= \ + --dataset-dir= \ + --quantsim-config-file= +``` + +- If you are using a model checkpoint which has Batch Norms already folded (such as the optimized model checkpoint), please specify the `--ckpt-bn-folded` flag: + +```bash +python resnet_v1_50.py \ + --model-name=resnet_v1_50 \ + --checkpoint-path= \ + --dataset-dir= \ + --quantsim-config-file= + --ckpt-bn-folded +``` + +## Quantizer Op Assumptions +In the evaluation script included, we have used the default config file, which configures the quantizer ops with the following assumptions: +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized +- Operations which shuffle data such as reshape or transpose do not require additional quantizers diff --git a/zoo_tensorflow/Docs/RetinaNet.md b/zoo_tensorflow/Docs/RetinaNet.md new file mode 100644 index 0000000..ce8db8b --- /dev/null +++ b/zoo_tensorflow/Docs/RetinaNet.md @@ -0,0 +1,62 @@ +# RetinaNet + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies + +| Package | +| :-------------: | +| keras-retinanet | +| pycocotools | + +### Setup RetinaNet Repo + +- Clone the RetinaNet repository from github: https://github.com/fizyr/keras-retinanet + + ```git clone https://github.com/fizyr/keras-retinanet.git ``` + + Within the cloned repository, checkout the commit corresponding to pre-tf2.0. The included example scripts only works for TF 1.x. + + ```git checkout 08af308d01a8f22dc286d62bc26c8496e1ff6539``` + + Install keras-retinanet and dependencies using by running, + + ```pip install . --user``` + +### Pip install pycocotools + +- Install pycocotools by running the following: + ```bash + pip install --user git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI + ``` +### Adding dependencies within Docker Image + +- If you are using a docker image, e.g. AIMET development docker, please add the following lines to the Dockerfile and rebuild the Docker image + +```dockerfile +RUN git clone https://github.com/fizyr/keras-retinanet.git /tmp/keras-retinanet/ +RUN cd /tmp/keras-retinanet/ && git checkout 08af308d01a8f22dc286d62bc26c8496e1ff6539 +RUN cd /tmp/keras-retinanet/ && pip install . +RUN pip install git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI +``` + + + +## Obtaining model weights and dataset + +- The original pre-trained keras retinanet model can be downloaded here: + - RetinaNet pre-trained model +- coco dataset can be downloaded here: + - http://cocodataset.org + + + +## Usage +- The example script requires paths to coco dataset and keras retinanet model (look at the above *Obtaining model weights and dataset* instructions to download). +- There are two actions ```retinanet_quanteval.py``` can perform, ```eval_original``` will evaluate the accuracy of the original model, while ```eval_quantized``` will quantize the original model and evaluate the accuracy on the quantized model +``` +python ./examples/retinanet_quanteval.py coco --action eval_original + +python ./examples/retinanet_quanteval.py coco --action eval_quantized +``` diff --git a/zoo_tensorflow/Docs/SRGAN.md b/zoo_tensorflow/Docs/SRGAN.md new file mode 100644 index 0000000..ab12fb1 --- /dev/null +++ b/zoo_tensorflow/Docs/SRGAN.md @@ -0,0 +1,76 @@ +# SRGAN (Super Resolution) + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies + +| Package | Version | +| :----------: | :-----: | +| scikit-image | 0.16.2 | +| mmcv | 1.2.0 | + +### Setup Super-resolution repo + +- Clone the krasserm/super-resolution repo + + `git clone https://github.com/krasserm/super-resolution.git` + +- Append the repo location to your `PYTHONPATH` with the following: + + `export PYTHONPATH=/super-resolution:$PYTHONPATH` + + + +### Adding dependencies within Docker Image + +- If you are using a docker image, e.g. AIMET development docker, please add the following lines to the Dockerfile and rebuild the Docker image + +```dockerfile +RUN pip install scikit-image==0.16.2 +RUN pip install mmcv==1.2.0 +``` + + + +## Obtaining model weights and dataset + +- The SRGAN model can be downloaded from: + - krasserm/super-resolution +- Three benchmark dataset can be downloaded here: + - [Set5](https://uofi.box.com/shared/static/kfahv87nfe8ax910l85dksyl2q212voc.zip) + - [Set14](https://uofi.box.com/shared/static/igsnfieh4lz68l926l8xbklwsnnk8we9.zip) + - [BSD100](https://uofi.box.com/shared/static/qgctsplb8txrksm9to9x01zfa4m61ngq.zip) +- If you want to use custom high resolution images, one way to generate corresponding low resolution images can be found at this issue + - with a Python version of MATLAB `imresize` function available here + + + +## Usage + +- The `srgan_quanteval.py` script requires two arguments to run: weights_path, images_path. + These are positional arguments so you just have to specify the arguments in order. + + ```bash + python ./zoo_tensorflow/examples/srgan_quanteval.py [--options] + ``` + +- we only support 4x super resolution on .png images. So make sure you high resolution images are 4x the dimension of you low resolution images. If you are using one of the benchmark dataset, please use images under `image_SRF_4` directory + +- We assume low and high resolution images are both present under the same directory, + + with images following naming conventions: + + - low resolution images will have file name suffix: `LR.png` + - e.g. `people_LR.png` + - high resolution images will have file name suffix: `HR.png` + - e.g. `people_HR.png` + + +## Quantizer Op Assumptions +In the evaluation script included, we have modified activation bitwidth, the configuration looks like below: +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 16 bits, asymmetric quantization +- Model inputs are not quantized +- Bias Correction and Cross Layer Equalization have been applied diff --git a/zoo_tensorflow/Docs/SSDMobileNetV2.md b/zoo_tensorflow/Docs/SSDMobileNetV2.md new file mode 100644 index 0000000..671c3b6 --- /dev/null +++ b/zoo_tensorflow/Docs/SSDMobileNetV2.md @@ -0,0 +1,142 @@ +# SSD MobileNet v2 + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Obtaining model checkpoint and dataset + +- SSD MobileNet v2 checkpoint used for AIMET quantization can be downloaded from release page +- Or you could follow the steps below to obtain the checkpoint + +### export inference graph + +The following steps are need to have a model ready for AIMET quantization + +- ```bash + git clone https://github.com/tensorflow/models.git + cd models && git checkout r1.12.0 + cd research && protoc object_detection/protos/*.proto --python_out=. + ``` + +- Download [ssd_mobilenet_v2](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03.tar.gz) + + - `tar xfvz ssd_mobilenet_v2_quantized_300x300_coco_2019_01_03.tar.gz` + + - **remove** following parameters from `pipeline.config` that come with the tarball + + ``` + graph_rewriter { + quantization { + delay: 48000 + weight_bits: 8 + activation_bits: 8 + } + ``` +- Add the following code snippet to [Line 147, models/research/object_detection/export_inference_graph.py](https://github.com/tensorflow/models/blob/r1.12.0/research/object_detection/export_inference_graph.py#L147) + + ```python + import os + saver = tf.train.Saver() + with tf.Session() as sess: + saver.restore(sess, os.path.join(FLAGS.output_directory, "model.ckpt")) + aimet_model_output_dir = os.path.join(FLAGS.output_directory, "AIMET") + os.mkdir(aimet_model_output_dir) + saver.save(sess, os.path.join(aimet_model_output_dir, "model.ckpt") + ``` + +- tensorflow v1.10 is need to run the script, we could use the offical tensorflow 1.10.1 docker image + + - ```bash + docker pull tensorflow/tensorflow:1.10.1-devel-py3 + export WORKSPACE= + docker run -it --rm -v $WORKSPACE:$WORKSPACE tensorflow/tensorflow:1.10.1-devel-py3 + ``` + +- run `export_inference_graph.py` to obtain model checkpoint ready for AIMET quantization + + ```bash + cd models/research + export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/slim + python ./object_detection/export_inference_graph.py \ + --input_type image_tensor \ + --pipeline_config_path \ + --trained_checkpoint_prefix \ + --output_directory \ + ``` + + - model checkpoint will be available at `/AIMET/model.ckpt` + +### COCO dataset TFRecord + +TFRecord format of COCO dataset is need + +- [download_and_preprocess_mscoco.sh](https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/download_and_preprocess_mscoco.sh) can be used to download and convert coco dataset into TFRecord + + ```bash + git clone https://github.com/tensorflow/models.git + git checkout master + cd models/research/object_detection/dataset_tools + ./download_and_preprocess_mscoco.sh + ``` + +- If COCO dataset is already available or you want to download COCO dataset separately + - COCO dataset can be download here: [COCO](https://cocodataset.org/#download) + - Please download the 2017 Version + - [create_coco_tf_record.py](https://github.com/tensorflow/models/blob/master/research/object_detection/dataset_tools/create_coco_tf_record.py) can be used to convert dataset into TFRecord + + + +## Additional Dependencies + +| Package | +| :---------------: | +| tensorflow/models | +| pycocotools | + +### Setup models Repo + +- Clone the tensorflow models repository from github: + + ```bash + git clone https://github.com/tensorflow/models.git + cd models && git checkout r1.12.0 + ``` + +- Append the repo location to your `PYTHONPATH` by doing the following: + + `export PYTHONPATH=/models/research:$PYTHONPATH` + +### Pip install pycocotools + +- Install pycocotools by running the following: + + ```bash + pip install --user git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI + ``` + +### Adding dependencies within Docker Image + +- If you are using a docker image, e.g. AIMET development docker, please add the following lines to the Dockerfile and rebuild the Docker image + +```dockerfile +RUN pip install git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI +``` + + + +## Usage +- `ssd_mobilenet_v2_quanteval.py` has four required arguments, an example usage is shown below +```bash +./ssd_mobilenet_v2_quanteval.py --model-checkpoint /model.ckpt --dataset-dir --TFRecord-file-pattern 'coco_val.record-*-of-00010' --annotation-json-file /instances_val2017.json +``` + +- `--quantsim-output-dir` option can be used if want to save the quantized graph + + + +## Quantizer Op Assumptions +In the evaluation script included, we have manually configured the quantizer ops with the following assumptions: +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized \ No newline at end of file diff --git a/zoo_tensorflow/examples/efficientnet_quanteval.py b/zoo_tensorflow/examples/efficientnet_quanteval.py new file mode 100755 index 0000000..2ea64c8 --- /dev/null +++ b/zoo_tensorflow/examples/efficientnet_quanteval.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +import os +import sys +import json +import argparse + +import numpy as np +import tensorflow as tf + +import aimet_common.defs +from aimet_tensorflow import quantsim +from aimet_tensorflow.quantsim import save_checkpoint, QuantizationSimModel +from aimet_tensorflow.batch_norm_fold import fold_all_batch_norms + +import model_builder_factory +import preprocessing +import utils +import eval_ckpt_main + +class EvalCkptDriver(eval_ckpt_main.EvalCkptDriver): + + def build_dataset(self, filenames, labels, is_training): + """Wrap build_dataset function to create an initializable iterator rather than a one shot iterator.""" + make_one_shot_iterator = tf.data.Dataset.make_one_shot_iterator + tf.data.Dataset.make_one_shot_iterator = tf.data.Dataset.make_initializable_iterator + r = super().build_dataset(filenames, labels, is_training) + tf.data.Dataset.make_one_shot_iterator = make_one_shot_iterator + + return r + + def run_inference(self, + ckpt_path, + image_files, + labels, + enable_ema=True, + export_ckpt=None): + """Build and run inference on the target images and labels.""" + label_offset = 1 if self.include_background_label else 0 + with tf.Graph().as_default(): + sess = tf.Session() + images, labels = self.build_dataset(image_files, labels, False) + probs = self.build_model(images, is_training=False) + if isinstance(probs, tuple): + probs = probs[0] + + if not self.ckpt_bn_folded: + saver = tf.train.Saver() + saver.restore(sess, ckpt_path) + else: + sess.run(tf.global_variables_initializer()) + + # Fold all BatchNorms before QuantSim + sess, folded_pairs = fold_all_batch_norms(sess, ['IteratorGetNext'], ['logits']) + + if self.ckpt_bn_folded: + with sess.graph.as_default(): + checkpoint = ckpt_path + saver = tf.train.Saver() + saver.restore(sess, checkpoint) + + sess.run('MakeIterator') + + # Define an eval function to use during compute encodings + def eval_func(sess, iterations): + sess.run('MakeIterator') + for _ in range(iterations): + out_probs = sess.run('Squeeze:0') + + # Select the right quant_scheme + if self.quant_scheme == 'range_learning_tf': + quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_init + elif self.quant_scheme == 'range_learning_tf_enhanced': + quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_enhanced_init + elif self.quant_scheme == 'tf': + quant_scheme = aimet_common.defs.QuantScheme.post_training_tf + elif self.quant_scheme == 'tf_enhanced': + quant_scheme = aimet_common.defs.QuantScheme.post_training_tf_enhanced + else: + raise ValueError("Got unrecognized quant_scheme: " + self.quant_scheme) + + # Create QuantizationSimModel + sim = QuantizationSimModel( + session=sess, + starting_op_names=['IteratorGetNext'], + output_op_names=['logits'], + quant_scheme=quant_scheme, + rounding_mode=self.round_mode, + default_output_bw=self.default_output_bw, + default_param_bw=self.default_param_bw, + config_file=self.quantsim_config_file, + ) + + # Run compute_encodings + sim.compute_encodings(eval_func, + forward_pass_callback_args=500 + ) + + # Run final evaluation + sess = sim.session + sess.run('MakeIterator') + prediction_idx = [] + prediction_prob = [] + for _ in range(len(image_files) // self.batch_size): + out_probs = sess.run('Squeeze:0') + idx = np.argsort(out_probs)[::-1] + prediction_idx.append(idx[:5] - label_offset) + prediction_prob.append([out_probs[pid] for pid in idx[:5]]) + + # Return the top 5 predictions (idx and prob) for each image. + return prediction_idx, prediction_prob + + +def run_evaluation(args): + print("Running evaluation") + driver = EvalCkptDriver( + model_name=args.model_name, + batch_size=1, + image_size=model_builder_factory.get_model_input_size(args.model_name), + include_background_label=args.include_background_label, + advprop_preprocessing=args.advprop_preprocessing) + + driver.quant_scheme = args.quant_scheme + driver.round_mode = args.round_mode + driver.default_output_bw = args.default_output_bw + driver.default_param_bw = args.default_param_bw + driver.quantsim_config_file = args.quantsim_config_file + driver.ckpt_bn_folded = args.ckpt_bn_folded + + driver.eval_imagenet(args.checkpoint_path, args.imagenet_eval_glob, + args.imagenet_eval_label, 50000, + args.enable_ema, args.export_ckpt) + +def parse_args(args): + """ Parse the arguments. + """ + parser = argparse.ArgumentParser(description='Evaluation script for an Efficientnet network.') + + parser.add_argument('--model-name', help='Name of model to eval.', default='efficientnet-lite0') + parser.add_argument('--checkpoint-path', help='Path to checkpoint to load from.') + parser.add_argument('--imagenet-eval-glob', help='Imagenet eval image glob, such as /imagenet/ILSVRC2012*.JPEG') + parser.add_argument('--imagenet-eval-label', help='Imagenet eval label file path, such as /imagenet/ILSVRC2012_validation_ground_truth.txt') + parser.add_argument('--include-background-label', help='Whether to include background as label #0', action='store_true') + parser.add_argument('--advprop-preprocessing', help='Whether to use AdvProp preprocessing', action='store_true') + parser.add_argument('--enable-ema', help='Enable exponential moving average.', default=True) + parser.add_argument('--export-ckpt', help='Exported ckpt for eval graph.', default=None) + + parser.add_argument('--ckpt-bn-folded', help='Use this flag to specify whether checkpoint has batchnorms folded already or not.', action='store_true') + parser.add_argument('--quant-scheme', help='Quant scheme to use for quantization (tf, tf_enhanced, range_learning_tf, range_learning_tf_enhanced).', default='tf') + parser.add_argument('--round-mode', help='Round mode for quantization.', default='nearest') + parser.add_argument('--default-output-bw', help='Default output bitwidth for quantization.', type=int, default=8) + parser.add_argument('--default-param-bw', help='Default parameter bitwidth for quantization.', type=int, default=8) + parser.add_argument('--quantsim-config-file', help='Quantsim configuration file.', default=None) + + return parser.parse_args(args) + +def main(args=None): + args = parse_args(args) + run_evaluation(args) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/zoo_tensorflow/examples/mobilenet_v2_140_quanteval.py b/zoo_tensorflow/examples/mobilenet_v2_140_quanteval.py new file mode 100755 index 0000000..6f50d8d --- /dev/null +++ b/zoo_tensorflow/examples/mobilenet_v2_140_quanteval.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +import os +import sys +import json +import argparse +from tqdm import tqdm +from glob import glob + +import numpy as np +import tensorflow as tf + +import aimet_common.defs +from aimet_tensorflow import quantsim +from aimet_tensorflow.quantsim import save_checkpoint, QuantizationSimModel +from aimet_tensorflow.batch_norm_fold import fold_all_batch_norms + +from nets import nets_factory +from preprocessing import preprocessing_factory +from deployment import model_deploy +from datasets import dataset_factory + +def wrap_preprocessing(preprocessing, height, width, num_classes, labels_offset): + '''Wrap preprocessing function to do parsing of TFrecords. + ''' + def parse(serialized_example): + features = tf.parse_single_example(serialized_example, features={ + 'image/class/label': tf.FixedLenFeature([], tf.int64), + 'image/encoded': tf.FixedLenFeature([], tf.string) + }) + + image_data = features['image/encoded'] + image = tf.image.decode_jpeg(image_data, channels=3) + label = tf.cast(features['image/class/label'], tf.int32) + label = label - labels_offset + + labels = tf.one_hot(indices=label, depth=num_classes) + image = preprocessing(image, height, width) + return image, labels + return parse + +def run_evaluation(args): + # Build graph definition + with tf.Graph().as_default(): + # Create iterator + tf_records = glob(args.dataset_dir + '/validation*') + preprocessing_fn = preprocessing_factory.get_preprocessing(args.model_name, is_training=False) + parse_function = wrap_preprocessing(preprocessing_fn, height=args.image_size, width=args.image_size, num_classes=(1001 - args.labels_offset), labels_offset=args.labels_offset) + + dataset = tf.data.TFRecordDataset(tf_records).repeat(1) + dataset = dataset.map(parse_function, num_parallel_calls=1).apply(tf.contrib.data.batch_and_drop_remainder(args.batch_size)) + iterator = dataset.make_initializable_iterator() + images, labels = iterator.get_next() + + network_fn = nets_factory.get_network_fn(args.model_name, num_classes=(1001 - args.labels_offset), is_training=False) + with tf.device('/cpu:0'): + images = tf.placeholder_with_default(images, + shape=(None, args.image_size, args.image_size, 3), + name='input') + labels = tf.placeholder_with_default(labels, + shape=(None, 1001 - args.labels_offset), + name='labels') + logits, end_points = network_fn(images) + confidences = tf.nn.softmax(logits, axis=1, name='confidences') + categorical_preds = tf.argmax(confidences, axis=1, name='categorical_preds') + categorical_labels = tf.argmax(labels, axis=1, name='categorical_labels') + correct_predictions = tf.equal(categorical_labels, categorical_preds) + top1_acc = tf.reduce_mean(tf.cast(correct_predictions, tf.float32), name='top1-acc') + top5_acc = tf.reduce_mean(tf.cast(tf.nn.in_top_k(predictions=confidences, + targets=tf.cast(categorical_labels, tf.int32), + k=5), tf.float32), name='top5-acc') + + saver = tf.train.Saver() + sess = tf.Session() + + # Load model from checkpoint + if not args.ckpt_bn_folded: + saver.restore(sess, args.checkpoint_path) + else: + sess.run(tf.global_variables_initializer()) + + # Fold all BatchNorms before QuantSim + sess, folded_pairs = fold_all_batch_norms(sess, ['IteratorGetNext'], [logits.name[:-2]]) + + if args.ckpt_bn_folded: + with sess.graph.as_default(): + saver = tf.train.Saver() + saver.restore(sess, args.checkpoint_path) + + + # Define eval_func to use for compute encodings in QuantSim + def eval_func(session, iterations): + cnt = 0 + avg_acc_top1 = 0 + session.run('MakeIterator') + while cnt < iterations or iterations == -1: + try: + avg_acc_top1 += session.run('top1-acc:0') + cnt += 1 + except: + return avg_acc_top1 / cnt + + return avg_acc_top1 / cnt + + # Select the right quant_scheme + if args.quant_scheme == 'range_learning_tf': + quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_init + elif args.quant_scheme == 'range_learning_tf_enhanced': + quant_scheme = aimet_common.defs.QuantScheme.training_range_learning_with_tf_enhanced_init + elif args.quant_scheme == 'tf': + quant_scheme = aimet_common.defs.QuantScheme.post_training_tf + elif args.quant_scheme == 'tf_enhanced': + quant_scheme = aimet_common.defs.QuantScheme.post_training_tf_enhanced + else: + raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme) + + # Create QuantizationSimModel + sim = QuantizationSimModel( + session=sess, + starting_op_names=['IteratorGetNext'], + output_op_names=[logits.name[:-2]], + quant_scheme=quant_scheme, + rounding_mode=args.round_mode, + default_output_bw=args.default_output_bw, + default_param_bw=args.default_param_bw, + config_file=args.quantsim_config_file, + ) + + # Run compute_encodings + sim.compute_encodings(eval_func, forward_pass_callback_args=args.encodings_iterations) + + # Run final evaluation + sess = sim.session + + top1_acc = eval_func(sess, -1) + print('Avg accuracy Top 1: {}'.format(top1_acc)) + + +def parse_args(args): + """ Parse the arguments. + """ + parser = argparse.ArgumentParser(description='Evaluation script for an MobileNetv2 network.') + + parser.add_argument('--model-name', help='Name of model to eval.', default='mobilenet_v2_140') + parser.add_argument('--checkpoint-path', help='Path to checkpoint to load from.') + parser.add_argument('--dataset-dir', help='Imagenet eval dataset directory.') + parser.add_argument('--labels-offset', help='Offset for whether to ignore background label', type=int, default=0) + parser.add_argument('--image-size', help='Image size.', type=int, default=224) + parser.add_argument('--batch-size', help='Batch size.', type=int, default=32) + + parser.add_argument('--ckpt-bn-folded', help='Use this flag to specify whether checkpoint has batchnorms folded already or not.', action='store_true') + parser.add_argument('--quant-scheme', help='Quant scheme to use for quantization (tf, tf_enhanced, range_learning_tf, range_learning_tf_enhanced).', default='tf') + parser.add_argument('--round-mode', help='Round mode for quantization.', default='nearest') + parser.add_argument('--default-output-bw', help='Default output bitwidth for quantization.', type=int, default=8) + parser.add_argument('--default-param-bw', help='Default parameter bitwidth for quantization.', type=int, default=8) + parser.add_argument('--quantsim-config-file', help='Quantsim configuration file.', default=None) + parser.add_argument('--encodings-iterations', help='Number of iterations to use for compute encodings during quantization.', default=500) + + return parser.parse_args(args) + +def main(args=None): + args = parse_args(args) + run_evaluation(args) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/zoo_tensorflow/examples/pose_estimation_quanteval.py b/zoo_tensorflow/examples/pose_estimation_quanteval.py new file mode 100755 index 0000000..5436ba3 --- /dev/null +++ b/zoo_tensorflow/examples/pose_estimation_quanteval.py @@ -0,0 +1,474 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +import os +import math +import argparse +from functools import partial + +import cv2 +from scipy.ndimage.filters import gaussian_filter +import numpy as np +import tensorflow as tf +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +from aimet_tensorflow import quantsim +from aimet_tensorflow.utils import graph_saver + + +def non_maxium_suppression(map, thresh): + map_s = gaussian_filter(map, sigma=3) + + map_left = np.zeros(map_s.shape) + map_left[1:, :] = map_s[:-1, :] + map_right = np.zeros(map_s.shape) + map_right[:-1, :] = map_s[1:, :] + map_up = np.zeros(map_s.shape) + map_up[:, 1:] = map_s[:, :-1] + map_down = np.zeros(map_s.shape) + map_down[:, :-1] = map_s[:, 1:] + + peaks_binary = np.logical_and.reduce((map_s >= map_left, map_s >= map_right, map_s >= map_up, map_s >= map_down, + map_s > thresh)) + + peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse + peaks_with_score = [x + (map[x[1], x[0]],) for x in peaks] + + return peaks_with_score + + +def pad_image(img, stride, padding): + h = img.shape[0] + w = img.shape[1] + + pad = 4 * [None] + pad[0] = 0 # up + pad[1] = 0 # left + pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down + pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right + + img_padded = img + pad_up = np.tile(img_padded[0:1, :, :] * 0 + padding, (pad[0], 1, 1)) + img_padded = np.concatenate((pad_up, img_padded), axis=0) + pad_left = np.tile(img_padded[:, 0:1, :] * 0 + padding, (1, pad[1], 1)) + img_padded = np.concatenate((pad_left, img_padded), axis=1) + pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + padding, (pad[2], 1, 1)) + img_padded = np.concatenate((img_padded, pad_down), axis=0) + pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + padding, (1, pad[3], 1)) + img_padded = np.concatenate((img_padded, pad_right), axis=1) + + return img_padded, pad + + +def encode_input(image, scale, stride, padding): + image_scaled = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) + image_scaled_padded, pad = pad_image(image_scaled, stride, padding) + + return image_scaled_padded, pad + + +def decode_output(data, stride, padding, input_shape, image_shape): + output = np.transpose(np.squeeze(data), (1, 2, 0)) + output = cv2.resize(output, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + output = output[:input_shape[0] - padding[2], :input_shape[1] - padding[3], :] + output = cv2.resize(output, (image_shape[1], image_shape[0]), interpolation=cv2.INTER_CUBIC) + + return output + + +def preprocess(image, transforms): + mean_bgr = [34.282957728666474, 32.441979567868017, 24.339757511312481] + + image = image.astype(np.float32) + + if 'bgr' in transforms: + if image.shape[0] == 3: + image = image[::-1, :, :] + elif image.shape[2] == 3: + image = image[:, :, ::-1] + + if 'tr' in transforms: + image = image.transpose((2, 0, 1)) + + if 'mean' in transforms: + image[0, :, :] -= mean_bgr[0] + image[1, :, :] -= mean_bgr[1] + image[2, :, :] -= mean_bgr[2] + + if 'addchannel' in transforms: + image = image[np.newaxis, :, :, :] + + if 'normalize' in transforms: + image = image / 256 - 0.5 + + return image + + +def run_session(session, output_names, input_name, image, fast=False): + scale_search = [1.] + crop = 368 + stride = 8 + padValue = 128 + + if fast: + scales = scale_search + else: + scales = [x * crop / image.shape[0] for x in scale_search] + + heatmaps, pafs = [], [] + for scale in scales: + if fast: + horiz = image.shape[0] < image.shape[1] + sz = (496, 384) if horiz else (384, 496) + image_encoded = cv2.resize(image, dsize=(int(sz[0] * scale), int(sz[1] * scale))) + else: + image_encoded, pad = encode_input(image, scale, stride, padValue) + image_encoded_ = preprocess(image_encoded, ['addchannel', 'normalize', 'bgr']) + + paf, heatmap = session.run(output_names, + feed_dict={session.graph.get_tensor_by_name(input_name): image_encoded_}) + + if fast: + paf = cv2.resize(paf[0], (image.shape[1], image.shape[0])) + heatmap = cv2.resize(heatmap[0], dsize=(image.shape[1], image.shape[0])) + else: + paf = paf.transpose((0, 3, 1, 2)) + heatmap = heatmap.transpose((0, 3, 1, 2)) + paf = decode_output(paf, stride, pad, image_encoded.shape, image.shape) + heatmap = decode_output(heatmap, stride, pad, image_encoded.shape, image.shape) + + pafs.append(paf) + heatmaps.append(heatmap) + + return np.asarray(heatmaps).mean(axis=0), np.asarray(pafs).mean(axis=0) + + +def get_keypoints(heatmap): + thre1 = 0.1 + keypoints_all = [] + keypoints_cnt = 0 + + for part in range(19 - 1): + keypoints = non_maxium_suppression(heatmap[:, :, part], thre1) + + id = range(keypoints_cnt, keypoints_cnt + len(keypoints)) + keypoints = [keypoints[i] + (id[i],) for i in range(len(id))] + + keypoints_all.append(keypoints) + keypoints_cnt += len(keypoints) + + return keypoints_all + + +def get_limb_consistancy(paf, start_keypoint, end_keypoint, image_h, div_num=10): + vec_key = np.subtract(end_keypoint[:2], start_keypoint[:2]) + vec_key_norm = math.sqrt(vec_key[0] * vec_key[0] + vec_key[1] * vec_key[1]) + if vec_key_norm == 0: + vec_key_norm = 1 + vec_key = np.divide(vec_key, vec_key_norm) + + vec_paf = list(zip(np.linspace(start_keypoint[0], end_keypoint[0], num=div_num).astype(int), + np.linspace(start_keypoint[1], end_keypoint[1], num=div_num).astype(int))) + + vec_paf_x = np.array([paf[vec_paf[k][1], vec_paf[k][0], 0] for k in range(div_num)]) + vec_paf_y = np.array([paf[vec_paf[k][1], vec_paf[k][0], 1] for k in range(div_num)]) + + vec_sims = np.multiply(vec_paf_x, vec_key[0]) + np.multiply(vec_paf_y, vec_key[1]) + vec_sims_prior = vec_sims.mean() + min(0.5 * image_h / vec_key_norm - 1, 0) + + return vec_sims, vec_sims_prior + + +def connect_keypoints(image_shape, keypoints, paf, limbs, limbsInds): + thre2 = 0.05 + connections = [] + + for k in range(len(limbsInds)): + paf_limb = paf[:, :, limbsInds[k]] + limb_strs = keypoints[limbs[k][0]] + limb_ends = keypoints[limbs[k][1]] + + if len(limb_strs) != 0 and len(limb_ends) != 0: + cands = [] + for i, limb_str in enumerate(limb_strs): + for j, limb_end in enumerate(limb_ends): + sims, sims_p = get_limb_consistancy(paf_limb, limb_str, limb_end, image_shape[0]) + + if len(np.where(sims > thre2)[0]) > int(0.8 * len(sims)) and sims_p > 0: + cands.append([i, j, sims_p]) + cands = sorted(cands, key=lambda x: x[2], reverse=True) + + connection = np.zeros((0, 3)) + visited_strs, visited_ends = [], [] + for cand in cands: + i, j, s = cand + if i not in visited_strs and j not in visited_ends: + connection = np.vstack([connection, [limb_strs[i][3], limb_ends[j][3], s]]) + visited_strs.append(i) + visited_ends.append(j) + + if len(connection) >= min(len(limb_strs), len(limb_ends)): + break + + connections.append(connection) + else: + connections.append([]) + + return connections + + +def create_skeletons(keypoints, connections, limbs): + # last number in each row is the total parts number of that person + # the second last number in each row is the score of the overall configuration + skeletons = -1 * np.ones((0, 20)) + keypoints_flatten = np.array([item for sublist in keypoints for item in sublist]) + + for k in range(len(limbs)): + if connections[k] != []: + detected_str = connections[k][:, 0] + detected_end = connections[k][:, 1] + limb_str, limb_end = np.array(limbs[k]) + + for i in range(len(connections[k])): + found = 0 + subset_idx = [-1, -1] + for j in range(len(skeletons)): + if skeletons[j][limb_str] == detected_str[i] or skeletons[j][limb_end] == detected_end[i]: + subset_idx[found] = j + found += 1 + + if found == 1: + j = subset_idx[0] + if skeletons[j][limb_end] != detected_end[i]: + skeletons[j][limb_end] = detected_end[i] + skeletons[j][-1] += 1 + skeletons[j][-2] += keypoints_flatten[detected_end[i].astype(int), 2] + connections[k][i][2] + elif found == 2: # if found 2 and disjoint, merge them + j1, j2 = subset_idx + + membership = ((skeletons[j1] >= 0).astype(int) + (skeletons[j2] >= 0).astype(int))[:-2] + if len(np.nonzero(membership == 2)[0]) == 0: # merge + skeletons[j1][:-2] += (skeletons[j2][:-2] + 1) + skeletons[j1][-2:] += skeletons[j2][-2:] + skeletons[j1][-2] += connections[k][i][2] + skeletons = np.delete(skeletons, j2, 0) + else: # as like found == 1 + skeletons[j1][limb_end] = detected_end[i] + skeletons[j1][-1] += 1 + skeletons[j1][-2] += keypoints_flatten[detected_end[i].astype(int), 2] + connections[k][i][2] + + # if find no partA in the subset, create a new subset + elif not found and k < 17: + row = -1 * np.ones(20) + row[limb_str] = detected_str[i] + row[limb_end] = detected_end[i] + row[-1] = 2 + row[-2] = sum(keypoints_flatten[connections[k][i, :2].astype(int), 2]) + connections[k][i][2] + skeletons = np.vstack([skeletons, row]) + + # delete some rows of subset which has few parts occur + deleteIdx = [] + for i in range(len(skeletons)): + if skeletons[i][-1] < 4 or skeletons[i][-2] / skeletons[i][-1] < 0.4: + deleteIdx.append(i) + skeletons = np.delete(skeletons, deleteIdx, axis=0) + + return {'keypoints': skeletons[:, :18], 'scores': skeletons[:, 18]} + + +def estimate_pose(image_shape, heatmap, paf): + # limbs as pair of keypoints: [start_keypoint, end_keypoint] keypoints index to heatmap matrix + limbs = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13], + [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 16], [5, 17]] + # index where each limb stands in paf matrix. Two consecuitive indices for x and y component of paf + limbsInd = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], [4, 5], [6, 7], [8, 9], + [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], [36, 37], [18, 19], [26, 27]] + + keypoints = get_keypoints(heatmap) + + connections = connect_keypoints(image_shape, keypoints, paf, limbs, limbsInd) + + skeletons = create_skeletons(keypoints, connections, limbs) + + return skeletons, np.array([item for sublist in keypoints for item in sublist]) + + +def parse_results(skeletons, points): + coco_indices = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] + + skeletons_out, scores = [], [] + for score, keypoints in zip(skeletons['scores'], skeletons['keypoints']): + skeleton = [] + for p in range(len(keypoints)): + if p == 1: + continue + ind = int(keypoints[p]) + if ind >= 0: + point = {'x': points[ind, 0], 'y': points[ind, 1], 'score': points[ind, 2], 'id': coco_indices[p]} + skeleton.append(point) + + skeletons_out.append(skeleton) + scores.append(score) + + return {'skeletons': skeletons_out, 'scores': scores} + + +class COCOWrapper: + def __init__(self, coco_path, num_imgs=None): + self.coco_path = coco_path + self.num_imgs = num_imgs + # sys.path.append(self.coco_apth + "codes/PythonAPI") + + def get_images(self): + imgs = self.cocoGT.imgs.values() + + image_ids = sorted(map(lambda x: x['id'], self.cocoGT.imgs.values())) + if self.num_imgs: + image_ids = image_ids[:self.num_imgs] + imgs = list(filter(lambda x: x['id'] in image_ids, imgs)) + + return imgs + + def evaluate_json(self, obj): + # initialize COCO detections api + cocoDT = self.cocoGT.loadRes(obj) + + imgIds = sorted(self.cocoGT.getImgIds()) + if self.num_imgs: + imgIds = imgIds[:self.num_imgs] + + # running evaluation + cocoEval = COCOeval(self.cocoGT, cocoDT, 'keypoints') + cocoEval.params.imgIds = imgIds + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + return cocoEval.stats[0::5] + + def get_results_json(self, results, imgs): + results_obj = [] + for img, result in list(zip(imgs, results)): + for score, skeleton in list(zip(result['scores'], result['skeletons'])): + obj = {'image_id': img['id'], 'category_id': 1, 'keypoints': np.zeros(shape=(3, 17))} + + for keypoint in skeleton: + obj['keypoints'][0, keypoint['id']] = keypoint['x'] - 0.5 + obj['keypoints'][1, keypoint['id']] = keypoint['y'] - 0.5 + obj['keypoints'][2, keypoint['id']] = 1 + obj['keypoints'] = list(np.reshape(obj['keypoints'], newshape=(51,), order='F')) + obj['score'] = score / len(skeleton) + + results_obj.append(obj) + + return results_obj + + @property + def cocoGT(self): + annType = 'keypoints' + prefix = 'person_keypoints' + print('Initializing demo for *%s* results.' % (annType)) + + # initialize COCO ground truth api + dataType = 'val2014' + annFile = os.path.join(self.coco_path, 'annotations/%s_%s.json' % (prefix, dataType)) + cocoGT = COCO(annFile) + + if not cocoGT: + raise AttributeError('COCO ground truth demo failed to initialize!') + + return cocoGT + + +def evaluate_session(session, + coco_path, + input_name, + output_names, + num_imgs=None, + fast=False): + coco = COCOWrapper(coco_path, num_imgs) + + results = [] + image_path = os.path.join(coco.coco_path, 'images/val2014/') + imgs = coco.get_images() + + for i, img in enumerate(imgs): + image = cv2.imread(image_path + img['file_name']) # B,G,R order + + heatmap, paf = run_session(session, output_names, input_name, image, fast) + + skeletons, keypoints = estimate_pose(image.shape, heatmap, paf) + results.append(parse_results(skeletons, keypoints)) + + try: + ans = coco.evaluate_json(coco.get_results_json(results, imgs)) + return ans + except: + return [0, 0] + + +def parse_args(): + parser = argparse.ArgumentParser(prog='pose_estimation_quanteval', + description='Evaluate the post quantized SRGAN model') + + parser.add_argument('model_dir', + help='The location where the the meta checkpoint is saved,' + 'should have .meta as file suffix', + type=str) + parser.add_argument('coco_path', + help='The location coco images and annotations are saved. ' + 'It assumes a folder structure containing two subdirectorys ' + '`images/val2014` and `annotations`. Right now only val2014 ' + 'dataset with person_keypoints are supported', + type=str) + parser.add_argument('--representative-datapath', + '-reprdata', + help='The location where representative data are stored. ' + 'The data will be used for computation of encodings', + type=str) + parser.add_argument('--quant-scheme', + '-qs', + help='Support two schemes for quantization: [`tf` or `tf_enhanced`],' + '`tf_enhanced` is used by default', + default='tf_enhanced', + choices=['tf', 'tf_enhanced'], + type=str) + + return parser.parse_args() + + +def pose_estimation_quanteval(args): + # load the model checkpoint from meta + sess = graph_saver.load_model_from_meta(args.model_dir) + + # create quantsim object which inserts quant ops between layers + sim = quantsim.QuantizationSimModel(sess, + starting_op_names=['input'], + output_op_names=['node184', 'node196'], + quant_scheme=args.quant_scheme) + + partial_eval = partial(evaluate_session, + input_name='input:0', + output_names=['node184_quantized:0', 'node196_quantized:0'], + num_imgs=500 + ) + sim.compute_encodings(partial_eval, args.coco_path) + + eval_num = evaluate_session(sim.session, + args.coco_path, + input_name='input:0', + output_names=['node184_quantized:0', 'node196_quantized:0'] + ) + print(f'The [mAP, mAR] results are: {eval_num}') + + +if __name__ == '__main__': + args = parse_args() + pose_estimation_quanteval(args) diff --git a/zoo_tensorflow/examples/retinanet_quanteval.py b/zoo_tensorflow/examples/retinanet_quanteval.py new file mode 100755 index 0000000..d9a1808 --- /dev/null +++ b/zoo_tensorflow/examples/retinanet_quanteval.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +import os +import sys +import argparse +import progressbar +from glob import glob +from tqdm import tqdm + + +import tensorflow as tf +from keras import backend as K + +# Keras RetinaNet +from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image +from keras_retinanet.utils.coco_eval import evaluate_coco +from keras_retinanet import models + +# AIMET +from aimet_tensorflow import quantsim +from aimet_tensorflow.batch_norm_fold import fold_all_batch_norms +from aimet_tensorflow.quantsim import save_checkpoint, load_checkpoint + + + + +def quantize_retinanet(model_path, cocopath, action): + """ + Quantize the original RetinaNet model. + Loads the keras model. + Retrieve the back-end TF session and saves a checkpoint for quantized evaluatoin by AIMET + Invoke AIMET APIs to quantize the and save a quantized checkpoint - which includes quantize ops + :param model_path: Path to the downloaded keras retinanet model - read the docs for download path + :param cocopath: Path to the top level COCO dataset + :param action: eval_original or eval_quantized + :return: + """ + + model_path = os.path.join(model_path, 'resnet50_coco_best_v2.1.0.h5') + model = models.load_model(model_path, backbone_name='resnet50') + + # Note that AIMET APIs need TF session. So retrieve the TF session from the backend + session = K.get_session() + if action=="eval_original": + saver = tf.train.Saver() + saver.save(session, "./original_model.ckpt") + else: + in_tensor="input_1:0" + out_tensor = ['filtered_detections/map/TensorArrayStack/TensorArrayGatherV3:0', + 'filtered_detections/map/TensorArrayStack_1/TensorArrayGatherV3:0', + 'filtered_detections/map/TensorArrayStack_2/TensorArrayGatherV3:0'] + selected_ops = ["P" + str(i) + "/BiasAdd" for i in range(3, 8)] + session, folded_pairs = fold_all_batch_norms(session, [in_tensor.split(":")[0]], selected_ops) + sim = quantsim.QuantizationSimModel(session, [in_tensor.split(":")[0]], selected_ops) + def forward_pass(session2: tf.Session, args): + images_raw = glob(cocopath+"/images/train2017/*.jpg") + for idx in tqdm(range(10)): + image = read_image_bgr(images_raw[idx]) + image = preprocess_image(image) + image, scale = resize_image(image) + session2.run(out_tensor, feed_dict={in_tensor: [image]}) + + sim.compute_encodings(forward_pass, None) + save_checkpoint(sim, './quantzied_sim.ckpt', 'orig_quantsim_model') + + +assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead." + +def evaluate(generator, action, threshold=0.05): + + """ + Evaluate the model and saves results + :param generator: generator for validation dataset + :param action: eval the original or quantized model + :param threshold: Score Threshold + :return: + """ + in_tensor = "input_1:0" + out_tensor = ['filtered_detections/map/TensorArrayStack/TensorArrayGatherV3:0', + 'filtered_detections/map/TensorArrayStack_1/TensorArrayGatherV3:0', + 'filtered_detections/map/TensorArrayStack_2/TensorArrayGatherV3:0'] + + + with tf.Session() as new_sess: + if action=='eval_original': + saver = tf.train.import_meta_graph('./original_model.ckpt.meta') + saver.restore(new_sess, './original_model.ckpt') + else: + + new_quantsim = load_checkpoint('./quantzied_sim.ckpt', 'orig_quantsim_model') + new_sess = new_quantsim.session + + model = TFRunWrapper(new_sess, in_tensor, out_tensor) + + evaluate_coco(generator, model, threshold) + + +def create_generator(args, preprocess_image): + """ + Create generator to use for eval for coco validation set + :param args: args from commandline + :param preprocess_image: input preprocessing + :return: + """ + common_args = { + 'preprocess_image': preprocess_image, + } + + + from keras_retinanet.preprocessing.coco import CocoGenerator + + validation_generator = CocoGenerator( + args.coco_path, + 'val2017', + image_min_side=args.image_min_side, + image_max_side=args.image_max_side, + config=args.config, + shuffle_groups=False, + **common_args + ) + + return validation_generator + + +def parse_args(args): + """ Parse the arguments. + """ + parser = argparse.ArgumentParser(description='Evaluation script for a RetinaNet network.') + subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type') + subparsers.required = True + + coco_parser = subparsers.add_parser('coco') + coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).') + coco_parser.add_argument('model_path', help='Path to the RetinaNet model.') + + parser.add_argument('--action', help='action to perform - eval_quantized|eval_original', default='eval_quantized', choices={"eval_quantized", "eval_original"}) + parser.add_argument('--convert-model', help='Convert the model to an inference model (ie. the input is a training model).', action='store_true') + parser.add_argument('--backbone', help='The backbone of the model.', default='resnet50') + parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).', type=int) + parser.add_argument('--score-threshold', help='Threshold on score to filter detections with (defaults to 0.05).', default=0.05, type=float) + parser.add_argument('--iou-threshold', help='IoU Threshold to count for a positive detection (defaults to 0.5).', default=0.5, type=float) + parser.add_argument('--max-detections', help='Max Detections per image (defaults to 100).', default=100, type=int) + parser.add_argument('--save-path', help='Path for saving images with detections (doesn\'t work for COCO).') + parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800) + parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333) + parser.add_argument('--config', help='Path to a configuration parameters .ini file (only used with --convert-model).') + + return parser.parse_args(args) + +# The coco_eval in keras-retinanet repository needs a model as input for prediction +# We have a TF back-end session - so we wrap it in a Wrapper and implement predict to call session run +class TFRunWrapper(): + def __init__(self, tf_session, in_tensor, out_tensor): + self.sess = tf_session + self.in_tensor = in_tensor + self.out_tensor = out_tensor + + def predict_on_batch(self, input): + return self.sess.run(self.out_tensor, feed_dict={self.in_tensor: input}) + + +def main(args=None): + args = parse_args(args) + action = args.action + backbone = models.backbone("resnet50") + modelpath = args.model_path + cocopath= args.coco_path + generator = create_generator(args, backbone.preprocess_image) + quantize_retinanet(modelpath, cocopath, action) + evaluate(generator, action, args.score_threshold) + +if __name__ == '__main__': + main() diff --git a/zoo_tensorflow/examples/srgan_quanteval.py b/zoo_tensorflow/examples/srgan_quanteval.py new file mode 100755 index 0000000..1249c17 --- /dev/null +++ b/zoo_tensorflow/examples/srgan_quanteval.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +import glob +import os +import warnings +import argparse +from functools import partial + +import tensorflow as tf +import numpy as np +from skimage.metrics import peak_signal_noise_ratio as psnr +from skimage.metrics import structural_similarity as ssim +from aimet_tensorflow import quantsim +from aimet_tensorflow.cross_layer_equalization import equalize_model +from aimet_tensorflow.bias_correction import QuantParams, BiasCorrectionParams, BiasCorrection + +from mmcv.image.colorspace import rgb2ycbcr +from model.srgan import generator + + +def make_dataset(filenames): + ds = tf.data.Dataset.from_tensor_slices(filenames) + ds = ds.map(tf.io.read_file) + ds = ds.map(lambda x: tf.image.decode_png(x, channels=3)) + + return ds + + +def evaluate_session(sess, + image_files, + input_name, + output_name, + mode='y_channel', + output_dir=None): + ''' + :param sess: a tensorflow session on which we run evaluation + :param image_files: a sequence containing sequence of image filenames as strings + :param input_name: a string indicating the input tensor's name + :param output_name: a string indicating the output tensor's name + :param mode: a string indicating on which space to evalute the PSNR & SSIM metrics. + Accepted values are ['y_channel', 'rgb'] + :param output_dir: If specified, super resolved images will be saved under the path + :return: a tuple containing the computed values of (PSNR, SSIME) sequences + ''' + # TODO: factor out the image_files but take a custom dataset instead + # TODO: support multiple inputs and outputs + if mode == 'rgb': + print('Testing on RGB channels...') + elif mode == 'y_channel': + print('Testing on Y channel...') + else: + raise ValueError('evaluation mode not supported!' + 'Must be one of `RGB` or `y_channel`') + # batch size needed to align with input shape (?, ?, ?, 3) + batch_size = 1 + + with sess.graph.as_default(): + lr_image_files, hr_image_files = image_files + # make a dataset from input and reference images + lr_valid_ds = make_dataset(lr_image_files) + lr_valid_ds = lr_valid_ds.map(lambda x: tf.cast(x, dtype=tf.float32)) + + hr_valid_ds = make_dataset(hr_image_files) + + valid_ds = tf.data.Dataset.zip((lr_valid_ds, hr_valid_ds)) + # make an iterator from the dataset, batch applied here + valid_ds = valid_ds.batch(batch_size) + valid_ds_iter = valid_ds.make_one_shot_iterator() + imgs = valid_ds_iter.get_next() + + # crop border width 4 as suggested in https://arxiv.org/abs/1609.04802 + crop_border = 4 + psnr_values = [] + ssim_values = [] + + for lr_image_file in lr_image_files: + lr_img, hr_img = sess.run(imgs) + # get inference images + sr_img = sess.run(sess.graph.get_tensor_by_name(output_name), + {sess.graph.get_tensor_by_name(input_name): lr_img}) + sr_img = tf.clip_by_value(sr_img, 0, 255) + sr_img = tf.round(sr_img) + sr_img = tf.cast(sr_img, tf.uint8) + + sr_img = sess.run(sr_img) + + if output_dir: + sr_img_png = tf.image.encode_png(sr_img[0]) + # use the input image's name as output image's name by default + _, filename = os.path.split(lr_image_file) + filename = os.path.join(output_dir, filename) + + save_img = tf.io.write_file(filename, sr_img_png) + sess.run(save_img) + + if mode == 'y_channel': + sr_img = rgb2ycbcr(sr_img ,y_only=True) + hr_img = rgb2ycbcr(hr_img, y_only=True) + + sr_img = np.expand_dims(sr_img, axis=-1) + hr_img = np.expand_dims(hr_img, axis=-1) + + sr_img = sr_img[:, crop_border:-crop_border, crop_border:-crop_border, :] + hr_img = hr_img[:, crop_border:-crop_border, crop_border:-crop_border, :] + + psnr_value = psnr(hr_img[0], sr_img[0], data_range=255) + ssim_value = ssim(hr_img[0, :, :, 0], sr_img[0, :, :, 0], + multichannel=False, data_range=255.) + + psnr_values.append(psnr_value) + ssim_values.append(ssim_value) + + return psnr_values, ssim_values + + +def parse_args(): + parser = argparse.ArgumentParser(prog='srgan_quanteval', + description='Evaluate the pre and post quantized SRGAN model') + + parser.add_argument('weights_path', + help='The location where weight file for SRGAN model is saved', + type=str) + parser.add_argument('images_path', + help='The location where .png images are saved', + type=str) + parser.add_argument('--representative-datapath', + '-repr', + help='The location where representative data are stored. ' + 'The data will be used for bias correction and ' + 'computation of encodings', + type=str) + parser.add_argument('--cross-layer-equalization', + '-cle', + action='store_true', + help='Applying cross layer equalization') + parser.add_argument('--bias-correction', + '-bc', + action='store_true', + help='Applying bias correction') + parser.add_argument('--use-cuda', + '-cuda', + help='Whether to use cuda, True by default', + default=True, + type=bool) + parser.add_argument('--quant-scheme', + '-qs', + help='Support two schemes for quantization: [`tf` or `tf_enhanced`],' + '`tf_enhanced` is used by default', + default='tf_enhanced', + choices=['tf', 'tf_enhanced'], + type=str) + parser.add_argument('--default-output-bw', + '-bout', + help='Default bitwidth (4-31) to use for quantizing layer inputs and outputs', + default=8, + choices=range(4, 32), + type=int) + parser.add_argument('--default-param-bw', + '-bparam', + help='Default bitwidth (4-31) to use for quantizing layer parameters', + default=8, + choices=range(4, 32), + type=int) + parser.add_argument('--num-quant-samples', + help='Number of quantization samples for Bias Correction, 10 by default', + default=10, + type=int) + parser.add_argument('--num-bias-correct-samples', + help='Number of samples for Bias Correction, 500 by default', + default=500, + type=int) + parser.add_argument('--output-dir', + '-outdir', + help='If specified, output images of quantized model ' + 'will be saved under this directory', + default=None, + type=str) + + return parser.parse_args() + +def main(args): + # configuration for efficient use of gpu + config = tf.ConfigProto() + config.gpu_options.allow_growth = True + + print('Loading srgan generator...') + gen_graph = tf.Graph() + with gen_graph.as_default(): + gen_sess = tf.Session(config=config, graph=gen_graph) + with gen_sess.as_default(): + srgan_generator = generator() + srgan_generator.load_weights(args.weights_path) + + # sort files by filenames, assuming names match in both paths + lr_images_files = sorted(glob.glob(os.path.join(args.images_path, '*LR.png'))) + hr_images_files = sorted(glob.glob(os.path.join(args.images_path, '*HR.png'))) + + # check if number of images align + if len(lr_images_files) != len(hr_images_files): + raise RuntimeError('length of image files doesn`t match,' + 'need same number of images for both' + 'low resolution and high resolution!') + + image_files = (lr_images_files, hr_images_files) + + # two list of metrics on all images + psnr_vals, ssim_vals = evaluate_session(gen_sess, image_files, + srgan_generator.input.name, + srgan_generator.output.name) + psnr_val = np.mean(psnr_vals) + ssim_val = np.mean(ssim_vals) + print(f'Mean PSNR and SSIM for given images on original model are: [{psnr_val}, {ssim_val}]') + + # TODO: use a better default dataset for compute encodings when not given by users + # use low resolution images if no representative lr data are provided + + # use low and high resolution images if no representative lr and hr data are provided + if args.representative_datapath: + bc_lr_data = glob.glob(os.path.join(args.representative_datapath, '*LR.png')) + comp_encodings_lr_data = glob.glob(os.path.join(args.representative_datapath, '*LR.png')) + comp_encodings_hr_data = glob.glob(os.path.join(args.representative_datapath, '*HR.png')) + else: + warnings.warn('No representative input data are given,' + 'bias correction and computation of encodings will be done' + 'on part of all of the low resolution images!') + bc_lr_data = lr_images_files + + warnings.warn('No representative reference data are given,' + 'computation of encodings will be done' + 'on part of all of the high resolution images!') + comp_encodings_lr_data = lr_images_files + comp_encodings_hr_data = hr_images_files + + comp_encodings_data = (comp_encodings_lr_data, comp_encodings_hr_data) + + if args.cross_layer_equalization: + print('Applying cross layer equalization (CLE) to session...') + gen_sess = equalize_model(gen_sess, + start_op_names=srgan_generator.input.op.name, + output_op_names=srgan_generator.output.op.name) + + if args.bias_correction: + print('Applying Bias Correction (BC) to session...') + # the dataset being evaluated might have varying image sizes + # so right now only use batch size 1 + batch_size = 1 + num_imgs = len(bc_lr_data) + + quant_params = QuantParams(use_cuda=args.use_cuda, quant_mode=args.quant_scheme) + bias_correction_params = BiasCorrectionParams(batch_size=batch_size, + num_quant_samples=min(num_imgs, args.num_quant_samples), + num_bias_correct_samples=min(num_imgs, args.num_bias_correct_samples), + input_op_names=[srgan_generator.input.op.name], + output_op_names=[srgan_generator.output.op.name]) + + ds = make_dataset(bc_lr_data) + ds = ds.batch(batch_size) + + gen_sess = BiasCorrection.correct_bias(gen_sess, bias_correction_params, quant_params, ds) + + # creating quantsim object which inserts quantizer ops + sim = quantsim.QuantizationSimModel(gen_sess, + starting_op_names=[srgan_generator.input.op.name], + output_op_names=[srgan_generator.output.op.name], + quant_scheme=args.quant_scheme, + default_output_bw=args.default_output_bw, + default_param_bw=args.default_param_bw) + + # compute activation encodings + # usually achieves good results when data being used for computing + # encodings are representative of its task + partial_eval = partial(evaluate_session, + input_name=srgan_generator.input.name, + output_name='lambda_3/mul_quantized:0') + sim.compute_encodings(partial_eval, comp_encodings_data) + + psnr_vals, ssim_vals = evaluate_session(sim.session, image_files, + srgan_generator.input.name, + 'lambda_3/mul_quantized:0', + output_dir=args.output_dir) + psnr_val = np.mean(psnr_vals) + ssim_val = np.mean(ssim_vals) + + print(f'Mean PSNR and SSIM for given images on quantized model are: [{psnr_val}, {ssim_val}]') + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/zoo_tensorflow/examples/ssd_mobilenet_v2_quanteval.py b/zoo_tensorflow/examples/ssd_mobilenet_v2_quanteval.py new file mode 100755 index 0000000..d411bcf --- /dev/null +++ b/zoo_tensorflow/examples/ssd_mobilenet_v2_quanteval.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +# ============================================================================== +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import os +import json +import argparse +import logging +import tensorflow as tf +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tensorflow.contrib.slim import tfexample_decoder as slim_example_decoder +from tensorflow.contrib.quantize.python import quantize +from tensorflow.contrib.quantize.python import fold_batch_norms + +from object_detection.core import standard_fields as fields +from object_detection.data_decoders.tf_example_decoder import TfExampleDecoder +from aimet_tensorflow import quantizer as q +from aimet_tensorflow import quantsim +from aimet_tensorflow.batch_norm_fold import fold_all_batch_norms + +logger = logging.getLogger(__file__) + + +def load_graph(graph, meta_graph, checkpoint=None): + """ + Load a TF graph given the meta and checkpoint files + :param graph: Graph to load into + :param meta_graph: Meta file + :param checkpoint: Checkpoint file + :return: Newly created TF session + """ + gpu_options = tf.GPUOptions(allow_growth=True) + config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) + sess = tf.Session(config=config, graph=graph) + # Open the graph and restore the parameters + saver = tf.train.import_meta_graph(meta_graph, clear_devices=True) + if checkpoint is None: + checkpoint = meta_graph.split('.meta')[0] + saver.restore(sess, checkpoint) + return sess, saver + + +def initialize_uninitialized_vars(sess): + """ + Some graphs have variables created after training that need to be initialized. + However, in pre-trained graphs we don't want to reinitialize variables that are already + which would overwrite the values obtained during training. Therefore search for all + uninitialized variables and initialize ONLY those variables. + :param sess: TF session + :return: + """ + from itertools import compress + global_vars = tf.global_variables() + is_not_initialized = sess.run([~(tf.is_variable_initialized(var)) for var in global_vars]) + uninitialized_vars = list(compress(global_vars, is_not_initialized)) + if uninitialized_vars: + sess.run(tf.variables_initializer(uninitialized_vars)) + +class CocoParser: + def __init__(self, data_inputs=None, validation_inputs=None, batch_size=1): + """ + Constructor + :param data_inputs: List of input ops for the model + :param validation_inputs: List of validation ops for the model + :param batch_size: Batch size for the data + """ + self._validation_inputs = validation_inputs + self._data_inputs = data_inputs + self._batch_size = batch_size + + if data_inputs is None: + self._data_inputs = ['image_tensor'] + else: + self._data_inputs = data_inputs + self.keys_to_features = TfExampleDecoder().keys_to_features + + self.items_to_handlers = { + fields.InputDataFields.image: ( + slim_example_decoder.Image(image_key='image/encoded', format_key='image/format', channels=3)), + fields.InputDataFields.source_id: (slim_example_decoder.Tensor('image/source_id')), + } + + def get_data_inputs(self): + return self._data_inputs + + def get_validation_inputs(self): + return self._validation_inputs + + def get_batch_size(self): + return self._batch_size + + def parse(self, serialized_example, is_trainning): + """ + Parse one example + :param serialized_example: + :param is_trainning: + :return: tensor_dict + """ + decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features, + self.items_to_handlers) + keys = decoder.list_items() + tensors = decoder.decode(serialized_example, items=keys) + tensor_dict = dict(zip(keys, tensors)) + + tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3]) + tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape( + tensor_dict[fields.InputDataFields.image])[:2] + + tensor_dict[fields.InputDataFields.image] = tf.image.resize_images( + tensor_dict[fields.InputDataFields.image], tf.stack([300, 300]), + method=0) + + if fields.InputDataFields.image_additional_channels in tensor_dict: + channels = tensor_dict[fields.InputDataFields.image_additional_channels] + channels = tf.squeeze(channels, axis=3) + channels = tf.transpose(channels, perm=[1, 2, 0]) + tensor_dict[fields.InputDataFields.image_additional_channels] = channels + + if fields.InputDataFields.groundtruth_boxes in tensor_dict: + is_crowd = fields.InputDataFields.groundtruth_is_crowd + tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool) + + def default_groundtruth_weights(): + shape = tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0] + return tf.ones([shpae], dtype=tf.float32) + + shape = tf.shape(tensor_dict[fields.InputDataFields.groundtruth_weights])[0] + tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond( + tf.greater(shape,0), + lambda: tensor_dict[fields.InputDataFields.groundtruth_weights], + default_groundtruth_weights) + + return tensor_dict + + def get_batch(self, iterator, next_element, sess): + """ + Get the next batch of data + :param next_element: + :param iterator: Data iterator + :return: Inputs in feed_dict form + """ + try: + keys = next_element.keys() + tensors = [] + for key in keys: + tensors.append(next_element[key]) + tensors_np = sess.run(tensors) + except tf.errors.OutOfRangeError: + tf.logging.error('tf.errors.OutOfRangeError') + raise + return dict(zip(keys, tensors_np)) + + +class TfRecordGenerator: + """ Dataset generator for TfRecords""" + + def __init__(self, dataset_dir, parser, file_pattern=None, is_trainning=False, num_gpus=1, num_epochs=None): + """ + Constructor + :param dataset_dir: The directory where the dataset files are stored. + :param file_pattern: The file pattern to use for matching the dataset source files. + :param parser: parser function to read tfrecords. + :param num_gpus: The number of GPUs being used. Data batches must be generated for each GPU device + :param num_epochs: How many times to repeat the dataset. Default is forever. Then the + amount of data generated is determined by the number of iterations the model is run and the batch + size. If set to a specific number the dataset will only provide the amount of the total dataset + 'num_epochs' times. + :return: A new TfRecord generator used to generate data for model analysis + """ + self._parser = parser + self._num_gpus = num_gpus + + # Setup the Dataset reader + if not file_pattern: + if not is_trainning: + file_pattern = 'validation-*-of-*' + else: + file_pattern = 'train-*-of-*' + file_pattern = os.path.join(dataset_dir, file_pattern) + tfrecords = tf.data.Dataset.list_files(file_pattern, shuffle=False) + self._dataset = tf.data.TFRecordDataset(tfrecords).repeat(num_epochs) + batch_size = self._parser.get_batch_size() + parse_fn = lambda x: self._parser.parse(x, is_trainning) + self._dataset = self._dataset.map(parse_fn) + self._dataset = self._dataset.batch(batch_size) + + # Initialize the iterator. This must be allocated during init when the + # generator is to be used manually. Otherwise the generator will generate a + # new iterator each time it's used as an iterator + with self._dataset._graph.as_default(): + self._iterator = self._dataset.make_one_shot_iterator() + self._next_element = self._iterator.get_next() + self.sess = tf.Session() + + def __iter__(self): + """ + Iter method for the generator + :return: + """ + with self._dataset._graph.as_default(): + self._iterator = self._dataset.make_one_shot_iterator() + self._next_element = self._iterator.get_next() + self.sess = tf.Session() + return self + + def __next__(self): + """ + Return the next set of batched data + + **NOTE** This function will not return new batches until the previous batches have + actually been used by a call to tensorflow. Eg used in a graph with a call to + 'run' etc. If it's unused the same tensors will be returned over and over again. + + :return: + """ + return self._parser.get_batch(self._iterator, self._next_element, self.sess) + + # Map next for python27 compatibility + next = __next__ + + def get_data_inputs(self): + return self._parser.get_data_inputs() + + def get_validation_inputs(self): + return self._parser.get_validation_inputs() + + def get_batch_size(self): + return self._parser.get_batch_size() + + @property + def dataset(self): + return self._dataset + + +class MobileNetV2SSDRunner: + + def __init__(self, generator, checkpoint, annotation_file, graph=None, network=None, + is_train=False, + fold_bn=False, quantize=False): + self._generator = generator + self._checkpoint = checkpoint + self._annotation_file = annotation_file + self._graph = graph + self._network = network + self._is_train = is_train + self._fold_bn = fold_bn + self._quantize = quantize + if is_train is False: + self._eval_session, self._eval_saver = self.build_eval_graph() + + @staticmethod + def post_func(tensors_dict, annotation_file): + json_list = [] + # t_bbox [ymin,xmin,ymax,xmax] + # gt [xmin,ymin,width,height] + for i in range(len(tensors_dict)): + result_dict = tensors_dict[i] + for j in range(len(result_dict[fields.DetectionResultFields.detection_scores])): + t_score = result_dict[fields.DetectionResultFields.detection_scores][j] + t_bbox = result_dict[fields.DetectionResultFields.detection_boxes][j] + t_class = result_dict[fields.DetectionResultFields.detection_classes][j] + image_id = int(result_dict[fields.InputDataFields.source_id][j]) + Height = result_dict[fields.InputDataFields.original_image_spatial_shape][j][0] + Width = result_dict[fields.InputDataFields.original_image_spatial_shape][j][1] + for index, conf in enumerate(t_score): + top_conf = float(t_score[index]) + top_ymin = t_bbox[index][0] * Height + top_xmin = t_bbox[index][1] * Width + top_h = (t_bbox[index][3] - t_bbox[index][1]) * Width + top_w = (t_bbox[index][2] - t_bbox[index][0]) * Height + top_cat = int(t_class[index]) + json_dict = {'image_id': image_id, 'category_id': top_cat, + 'bbox': [top_xmin, top_ymin, top_h, top_w], 'score': top_conf} + json_list.append(json_dict) + + cocoGt = COCO(annotation_file) + cocoDt = cocoGt.loadRes(json_list) + cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + dict_map_result = {'IoU[0.50:0.95]': cocoEval.stats[0], 'IoU[0.50]': cocoEval.stats[1], + 'IoU[0.75]': cocoEval.stats[2]} + return dict_map_result + + @property + def eval_session(self): + return self._eval_session + + def evaluate(self, session, iterations, loginfo=None, generator=None, post_func=None, eval_names=None): + generator = generator if generator is not None else self._generator + post_func = post_func if post_func is not None else self.post_func + eval_names = eval_names if eval_names is not None else self.eval_names + if loginfo is not None: + logger.info(loginfo) + return self.run_graph(session, generator, eval_names, post_func, iterations) + + def build_eval_graph(self): + g = tf.Graph() + with g.as_default(): + sess, saver = load_graph(g, self._graph, self._checkpoint) + if self._fold_bn: + fold_batch_norms.FoldBatchNorms(graph=sess.graph, freeze_batch_norm_delay=None, + is_training=False) + if self._quantize: + quantize.Quantize( + graph=sess.graph, + is_training=False, + quant_delay=0, + weight_bits=8, + activation_bits=8, + scope=None) + return sess, saver + + def run_graph(self, session, generator, eval_names, post_func, iterations): + """ + Evaluates the graph's performance by running data through the network + and calling an evaluation function to generate the performance metric. + :param session: The tensorflow session that contains the graph + :param generator: The data generator providing the network with batch data + :param eval_names: The names providing the nodes on which the network's performance should be judged + :param post_func: The customized post processing function to evaluate the network performance + :param iterations: The number of iterations (batches) to run through the network + :return: + """ + + initialize_uninitialized_vars(session) + image_tensor = session.graph.get_tensor_by_name('image_tensor:0') + eval_outputs = [] + for name in eval_names: + op = session.graph.get_operation_by_name(name) + eval_outputs.append(op.outputs[0]) + counters = {'skipped': 0, 'success': 0} + result_list = [] + try: + for _, input_dict in zip(range(iterations), generator): + # Setup the feed dictionary + feed_dict = {image_tensor: input_dict[fields.InputDataFields.image]} + try: + output_data = session.run(eval_outputs, feed_dict=feed_dict) + counters['success'] += 1 + export_dict = { + fields.InputDataFields.source_id: + input_dict[fields.InputDataFields.source_id], + fields.InputDataFields.original_image_spatial_shape: + input_dict[fields.InputDataFields.original_image_spatial_shape] + } + export_dict.update(dict(zip(eval_names, output_data))) + result_list.append(export_dict) + except tf.errors.InvalidArgumentError: + counters['skipped'] += 1 + except tf.errors.OutOfRangeError: + logger.info("Completed evaluation iterations: %i, success: %i, skipped: %i", + iterations, counters['success'], counters['skipped']) + finally: + if post_func is not None: + perf = post_func(result_list, self._annotation_file) + logger.info("%s", perf) + else: + perf = result_list + return perf + + def forward_func(self, sess, iterations): + return self.run_graph(sess, self._generator, self.eval_names, None, iterations) + + @property + def eval_names(self): + return [fields.DetectionResultFields.detection_scores, fields.DetectionResultFields.detection_boxes, + fields.DetectionResultFields.detection_classes] + + +def parse_args(): + """ Parse the arguments. + """ + parser = argparse.ArgumentParser(description='Evaluation script for SSD MobileNet v2.') + + parser.add_argument('--model-checkpoint', help='Path to model checkpoint', required=True) + parser.add_argument('--dataset-dir', help='Dir path to dataset (TFRecord format)', required=True) + parser.add_argument('--TFRecord-file-pattern', help='Dataset file pattern, e.g. coco_val.record-*-of-00010', + required=True) + parser.add_argument('--annotation-json-file', help='Path to ground truth annotation json file', required=True) + parser.add_argument('--eval-batch-size', help='Batch size to evaluate', default=1, type=int) + parser.add_argument('--eval-num-examples', help='Number of examples to evaluate, total 5000', default=5000, + type=int) + parser.add_argument('--quantsim-output-dir', help='Use this flag if want to save the quantized graph') + + return parser.parse_args() + + +def ssd_mobilenet_v2_quanteval(args): + parser = CocoParser(batch_size=args.eval_batch_size) + generator = TfRecordGenerator(dataset_dir=args.dataset_dir, file_pattern=args.TFRecord_file_pattern, + parser=parser, is_trainning=False) + + # Allocate the runner related to model session run + runner = MobileNetV2SSDRunner(generator=generator, checkpoint=args.model_checkpoint, + annotation_file=args.annotation_json_file, graph=args.model_checkpoint + '.meta', + fold_bn=False, quantize=False, is_train=False) + float_sess = runner.eval_session + + iterations = int(args.eval_num_examples / args.eval_batch_size) + runner.evaluate(float_sess, iterations, 'original model evaluating') + + # Fold BN + after_fold_sess, _ = fold_all_batch_norms(float_sess, generator.get_data_inputs(), ['concat', 'concat_1']) + # + # Allocate the quantizer and quantize the network using the default 8 bit params/activations + sim = quantsim.QuantizationSimModel(after_fold_sess, ['FeatureExtractor/MobilenetV2/MobilenetV2/input'], + output_op_names=['concat', 'concat_1'], + quant_scheme='tf', + default_output_bw=8, default_param_bw=8, + use_cuda=False) + # Compute encodings + sim.compute_encodings(runner.forward_func, forward_pass_callback_args=50) + # Export model for target inference + if args.quantsim_output_dir: + sim.export(os.path.join(args.quantsim_output_dir, 'export'), 'model.ckpt') + # Evaluate simulated quantization performance + runner.evaluate(sim.session, iterations, 'quantized model evaluating') + + +if __name__ == '__main__': + args = parse_args() + ssd_mobilenet_v2_quanteval(args) diff --git a/zoo_torch/Docs/DeepLabV3.md b/zoo_torch/Docs/DeepLabV3.md new file mode 100644 index 0000000..6783af5 --- /dev/null +++ b/zoo_torch/Docs/DeepLabV3.md @@ -0,0 +1,62 @@ +# PyTorch-DeepLabV3+ + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies +1. Install pycocotools as follows +``` +sudo -H pip install pycocotools +``` + +## Model modifications & Experiment Setup +1. Clone the [DeepLabV3+ repo](https://github.com/jfzhang95/pytorch-deeplab-xception) +``` +git clone https://github.com/jfzhang95/pytorch-deeplab-xception.git +cd pytorch-deeplab-xception +git checkout 9135e104a7a51ea9effa9c6676a2fcffe6a6a2e6 +``` +2. Apply the following patch to the above repository +``` +git apply ../aimet-model-zoo/zoo_torch/examples/pytorch-deeplab-xception-zoo.patch +``` +3. Place modeling directory & dataloaders directory & metrics.py & mypath.py to aimet-model-zoo/zoo_torch/examples/ +``` +mv modeling ../aimet-model-zoo/zoo_torch/examples/ +mv dataloaders ../aimet-model-zoo/zoo_torch/examples/ +mv utils/metrics.py ../aimet-model-zoo/zoo_torch/examples/ +mv mypath.py ../aimet-model-zoo/zoo_torch/examples/ +``` +4. Download Optimized DeepLabV3+ checkpoint from [Releases](/../../releases). +5. Change data location as located in mypath.py + +## Obtaining model checkpoint and dataset + +- The original DeepLabV3+ checkpoint can be downloaded here: + - https://drive.google.com/file/d/1G9mWafUAj09P4KvGSRVzIsV_U5OqFLdt/view +- Optimized DeepLabV3+ checkpoint can be downloaded from [Releases](/../../releases). +- Pascal Dataset can be downloaded here: + - http://host.robots.ox.ac.uk/pascal/VOC/voc2012/ + +## Usage + +- To run evaluation with QuantSim in AIMET, use the following +```bash +python eval_deeplabv3.py \ + --checkpoint-path \ + --base-size \ + --crop-size \ + --num-classes \ + --dataset \ + --quant-scheme \ + --default-output-bw \ + --default-param-bw +``` + +## Quantization Configuration +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized +- TF_enhanced was used as quantization scheme +- Data Free Quantization and Quantization aware Training has been performed on the optimized checkpoint diff --git a/zoo_torch/Docs/DeepSpeech2.md b/zoo_torch/Docs/DeepSpeech2.md new file mode 100755 index 0000000..32087b6 --- /dev/null +++ b/zoo_torch/Docs/DeepSpeech2.md @@ -0,0 +1,51 @@ +# DeepSpeech + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies + +### Setup SeanNaren DeepSpeech2 Repo + +- Clone the [SeanNaren DeepSpeech2 Repo](https://github.com/SeanNaren/deepspeech.pytorch) + `git clone https://github.com/SeanNaren/deepspeech.pytorch.git` + +- checkout this commit id: +`cd deepspeech.pytorch` +`git checkout 78f7fb791f42c44c8a46f10e79adad796399892b` + +- Install the requirements from the SeanNaren repo as detailed in the repository. + +- Append the repo location to your `PYTHONPATH` with the following: + `export PYTHONPATH=$PYTHONPATH:/deepspeech.pytorch` + + +## Obtaining model checkpoint and dataset + +- The SeanNaren DeepSpeech2 checkpoint can be downloaded from [here](https://github.com/SeanNaren/deepspeech.pytorch/releases/download/v2.0/librispeech_pretrained_v2.pth). Please point the `model-path` flag in to this file in the run script. Please note that this script is only compatible with release V2. + +- LibriSpeech __test clean__ set can be downloaded here: + - http://www.openslr.org/12 + + +Please see the [Datasets Section in the SeanNaren Repo](https://github.com/SeanNaren/deepspeech.pytorch#datasets) for the format of the test manifest used in the script. The [download script](https://github.com/SeanNaren/deepspeech.pytorch/blob/v2.0/data/librispeech.py) from this repository will download and format the csv to be used in the `test-manifest` flag. + + +## Usage + +- To run evaluation with QuantSim in AIMET, use the following + +```bash +python deepspeech2_quanteval.py \ + --model-path= \ + --test-manifest= +``` + +## Quantizer Op Assumptions +In the evaluation script included, we have manually configured the quantizer ops with the following assumptions: +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization + - Inputs to Conv layers are quantized + - Input and recurrent activations for LSTM layers are quantized +- Operations which shuffle data such as reshape or transpose do not require additional quantizers diff --git a/zoo_torch/Docs/EfficientNet-lite0.md b/zoo_torch/Docs/EfficientNet-lite0.md new file mode 100644 index 0000000..20f261d --- /dev/null +++ b/zoo_torch/Docs/EfficientNet-lite0.md @@ -0,0 +1,38 @@ +# PyTorch-EfficientNet-lite0 + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies +1. Install geffnet using pip install +``` +sudo -H pip install geffnet +``` +## Obtaining model checkpoint and dataset + +- The original EfficientNet-lite0 checkpoint can be downloaded from here: + - https://github.com/rwightman/gen-efficientnet-pytorch +- ImageNet can be downloaded from here: + - http://www.image-net.org/ + +## Usage +- To run evaluation with QuantSim in AIMET, use the following +```bash +python eval_efficientnetlite0.py \ + --images-dir \ + --quant-scheme \ + --quant-tricks \ + --default-output-bw \ + --default-param-bw \ + --num-iterations \ + --num-batches \ +``` + +## Quantization Configuration +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized +- TF_enhanced was used as quantization scheme +- Batch norm folding and Adaround has been applied on efficientnet-lite in the eval script +- [Conv - Relu6] layers has been fused as one operation via manual configurations diff --git a/zoo_torch/Docs/MobileNetV2-SSD-lite.md b/zoo_torch/Docs/MobileNetV2-SSD-lite.md new file mode 100644 index 0000000..2d5a172 --- /dev/null +++ b/zoo_torch/Docs/MobileNetV2-SSD-lite.md @@ -0,0 +1,81 @@ +# PyTorch-MobileNetV2-SSD-lite + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Model modifications +1. Clone the original repository +``` +git clone https://github.com/qfgaohao/pytorch-ssd.git +cd pytorch-ssd +git checkout f61ab424d09bf3d4bb3925693579ac0a92541b0d +git apply ../aimet-model-zoo/zoo_torch/examples/torch_ssd_eval.patch +``` +2. Place the model definition & eval_ssd.py to aimet-model-zoo/zoo_torch/examples/ +``` +mv vision ../aimet-model-zoo/zoo_torch/examples/ +mv eval_ssd.py ../aimet-model-zoo/zoo_torch/examples/ +``` +3. Change __init__ function from line #27 in vision/ssd/ssd.py as follows: +``` +self.config = None #############Change 1 + +self.image_size = 300 +self.image_mean = np.array([127, 127, 127]) # RGB layout +self.image_std = 128.0 +self.iou_threshold = 0.45 +self.center_variance = 0.1 +self.size_variance = 0.2 + +self.specs = [box_utils.SSDSpec(19, 16, box_utils.SSDBoxSizes(60, 105), [2, 3]), + box_utils.SSDSpec(10, 32, box_utils.SSDBoxSizes(105, 150), [2, 3]), + box_utils.SSDSpec(5, 64, box_utils.SSDBoxSizes(150, 195), [2, 3]), + box_utils.SSDSpec(3, 100, box_utils.SSDBoxSizes(195, 240), [2, 3]), + box_utils.SSDSpec(2, 150, box_utils.SSDBoxSizes(240, 285), [2, 3]), + box_utils.SSDSpec(1, 300, box_utils.SSDBoxSizes(285, 330), [2, 3])] + +self.gen_priors = box_utils.generate_ssd_priors(self.specs, self.image_size) + +# register layers in source_layer_indexes by adding them to a module list +self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes + if isinstance(t, tuple) and not isinstance(t, GraphPath)]) + +if device: + self.device = device +else: + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +if is_test: + self.priors = self.gen_priors.to(self.device) +``` +4. Change line #93 in vision/ssd/ssd.py as follows: +``` +boxes = box_utils.convert_locations_to_boxes( + locations.cpu(), self.priors.cpu(), self.center_variance, self.size_variance +) +``` + +## Obtaining model checkpoint and dataset +- The original MobileNetV2-SSD-lite checkpoint can be downloaded here: + - https://storage.googleapis.com/models-hao/mb2-ssd-lite-mp-0_686.pth +- Optimized checkpoint can be downloaded from the [Releases](/../../releases). +- Pascal VOC2007 dataset can be downloaded here: + - http://host.robots.ox.ac.uk/pascal/VOC/voc2007/index.html + +## Usage +- To run evaluation with QuantSim in AIMET, use the following +```bash +python eval_ssd.py \ + --net \ + --trained_model \ + --dataset \ + --label_file \ + --eval_dir +``` + +## Quantization Configuration +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized +- TF_enhanced was used as quantization scheme +- Cross-layer-Equalization and Adaround have been applied on optimized checkpoint diff --git a/zoo_torch/Docs/MobilenetV2.md b/zoo_torch/Docs/MobilenetV2.md new file mode 100644 index 0000000..2ace385 --- /dev/null +++ b/zoo_torch/Docs/MobilenetV2.md @@ -0,0 +1,70 @@ +# PyTorch-MobileNetV2 + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Model modifications +1. Clone the [MobileNetV2 repo](https://github.com/tonylins/pytorch-mobilenet-v2) +``` +git clone https://github.com/tonylins/pytorch-mobilenet-v2 +cd pytorch-mobilenet-v2/ +git checkout 99f213657e97de463c11c9e0eaca3bda598e8b3f +``` +2. Place model definition under model directory +``` +mkdir ../aimet-model-zoo/zoo_torch/examples/model +mv MobileNetV2.py ../aimet-model-zoo/zoo_torch/examples/model/ +``` +3. Download Optimized MobileNetV2 checkpoint from [Releases](/../../releases) and place under the model directory. +4. Replace all ReLU6 activations with ReLU +5. Following changes has been made or appended in original model definition for our suite + - Change line #87 as follows in MobileNetV2.py +``` +self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel +``` + - Change line #91 as follows in MobileNetV2.py +``` +output_channel = int(c * width_mult) +``` + - Append line #100 as follows in MobileNetV2.py +``` +self.features.append(nn.AvgPool2d(input_size // 32) +``` + - Change line #104 as follows in MobileNetV2.py +``` +self.classifier = nn.Sequential( + nn.Dropout(dropout), + nn.Linear(self.last_channel, n_class), + ) +``` + - Change line #110 as follows in MobileNetV2.py +``` +x = x.squeeze() +``` +## Obtaining model checkpoint and dataset + +- The original MobileNetV2 checkpoint can be downloaded here: + - https://github.com/tonylins/pytorch-mobilenet-v2 +- Optimized MobileNetV2 checkpoint can be downloaded from releases +- ImageNet can be downloaded here: + - http://www.image-net.org/ + +## Usage +- To run evaluation with QuantSim in AIMET, use the following +```bash +python eval_mobilenetv2.py \ + --model-path \ + --images-dir \ + --quant-scheme \ + --input-shape \ + --default-output-bw \ + --default-param-bw +``` + +## Quantization Configuration +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized +- TF_enhanced was used as quantization scheme +- Data Free Quantization and Quantization aware Training has been performed on the optimized checkpoint diff --git a/zoo_torch/Docs/PoseEstimation.md b/zoo_torch/Docs/PoseEstimation.md new file mode 100644 index 0000000..1b27c4d --- /dev/null +++ b/zoo_torch/Docs/PoseEstimation.md @@ -0,0 +1,41 @@ +# Pose Estimation + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +## Additional Dependencies + +| Package | Version | +| :---------: | :-----: | +| pycocotools | 2.0.2 | +| scipy | 1.1.0 | + +### Adding dependencies within Docker Image + +- If you are using a docker image, e.g. AIMET development docker, please add the following lines to the Dockerfile and rebuild the Docker image + +```dockerfile +RUN pip install git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI +RUN pip install scipy==1.1.0 +``` + +## Obtaining model weights and dataset +- The pose estimation model can be downloaded here: + - + Pose Estimation pytorch model + +- coco dataset can be downloaded here: + - COCO 2014 Val images + - + COCO 2014 Train/Val annotations + + +## Usage +- The program requires two arguments to run: model_dir, coco_path. These are positional arguments so you must specify the arguments in order. + ```bash + python ./examples/pose_estimation_quanteval.py + ``` + +- We only support evaluation on COCO 2014 val images with person keypoints. + +- The results reported was evaluation on the whole dataset, which contains over 40k images and takes 15+ hours on a single RTX 2080Ti GPU. So in case you want to run a faster evaluation, specifiy num_imgs argument to the second call with a small number to evaluate_session so that you run evaluation only on a partial dataset. diff --git a/zoo_torch/Docs/SRGAN.md b/zoo_torch/Docs/SRGAN.md new file mode 100644 index 0000000..1c3c7c0 --- /dev/null +++ b/zoo_torch/Docs/SRGAN.md @@ -0,0 +1,73 @@ +# SRGAN (Super Resolution) + +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](../../README.md#install-aimet) before proceeding further. + +### Setup Super-resolution repo + +- Clone the mmsr repo + `git clone https://github.com/andreas128/mmsr.git` + `git checkout a73b318f0f07feb6505ef5cb1abf0db33e33807a` + +- Append the repo location to your `PYTHONPATH` with the following: + `export PYTHONPATH=:/codes:$PYTHONPATH` + + Note that here we add both mmsr and the subdirectory mmsr/codes to our path. + + - Find mmsr/codes/models/archs/arch_util.py and do the following changes: + 1. In \_\_init__ append one line ```self.relu=nn.ReLU()``` after + ```self.conv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)``` like below: + + ```python + super(ResidualBlock_noBN, self).__init__() + self.conv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + self.conv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) + self.relu = nn.ReLU() + ``` + + 2. In forward replace ```out = F.relu(self.conv1(x), inplace=True)``` + with ```out = self.relu(self.conv1(x))``` like below: + + ```python + identity = x + # out = F.relu(self.conv1(x), inplace=True) + out = self.relu(self.conv1(x)) + out = self.conv2(out) + ``` + + These changes are necessary since AIMET currently doesn't run on some pytorch + functionals. + +## Obtaining model weights and dataset + +- The SRGAN model can be downloaded from: + - mmediting + +- Three benchmark dataset can be downloaded here: + - [Set5](https://uofi.box.com/shared/static/kfahv87nfe8ax910l85dksyl2q212voc.zip) + - [Set14](https://uofi.box.com/shared/static/igsnfieh4lz68l926l8xbklwsnnk8we9.zip) + - [BSD100](https://uofi.box.com/shared/static/qgctsplb8txrksm9to9x01zfa4m61ngq.zip) + + Our benchmark results use images under **image_SRF_4** directory which tests 4x + super-resolution as the suffix number indicates. You can also use other scales. + See instructions for usage below. + +## Usage + +- The `srgan_quanteval.py` script requires you to specify a .yml file which contains locations to your dataset and .pth model together with some config parameters. You can just pass the mmsr/codes/options/test/test_SRGAN.yml as your .yml file. Remember to edit the file s.t. + - dataroot_GT points to your directory of HR images + - dataroot_LQ points to your directory of LR images + - pretrain_model_G points to where you store your srgan .pth file + - scale has to match the super-resolution images' scale + +Run the script as follows: + ```bash + python ./zoo_torch/examples/srgan_quanteval.py [--options] -opt + ``` + +## Quantizer Op Assumptions +In the evaluation script included, we have used the default config file, which configures the quantizer ops with the following assumptions: +- Weight quantization: 8 bits, asymmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are not quantized diff --git a/zoo_torch/examples/deepspeech2_quanteval.py b/zoo_torch/examples/deepspeech2_quanteval.py new file mode 100755 index 0000000..9977b92 --- /dev/null +++ b/zoo_torch/examples/deepspeech2_quanteval.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +''' +This script will run AIMET QuantSim and evaluate WER using the DeepSpeech2 model +from the SeanNaren repo with manual configuration of quantization ops. +''' + +import os +import sys +import json +import argparse + +import torch +from tqdm import tqdm + +from deepspeech_pytorch.configs.inference_config import EvalConfig, LMConfig +from deepspeech_pytorch.decoder import GreedyDecoder +from deepspeech_pytorch.loader.data_loader import SpectrogramDataset, AudioDataLoader +from deepspeech_pytorch.utils import load_model, load_decoder +from deepspeech_pytorch.testing import run_evaluation + +import aimet_torch +from aimet_common.defs import QuantScheme +from aimet_torch.pro.quantsim import QuantizationSimModel + +def run_quantsim_evaluation(args): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + import deepspeech_pytorch.model + + def wrapped_forward_function(self, x, lengths=None): + if lengths is None: + lengths = torch.IntTensor([_x.shape[0] for _x in x]) + return self.infer(x, lengths) + + deepspeech_pytorch.model.DeepSpeech.infer = deepspeech_pytorch.model.DeepSpeech.forward + deepspeech_pytorch.model.DeepSpeech.forward = wrapped_forward_function + + model = load_model(device=device, + model_path=args.model_path, + use_half=False) + + + + decoder = load_decoder(labels=model.labels, + cfg=LMConfig) + + target_decoder = GreedyDecoder(model.labels, + blank_index=model.labels.index('_')) + + def eval_func(model, iterations=None, device=device): + test_dataset = SpectrogramDataset(audio_conf=model.audio_conf, + manifest_filepath=args.test_manifest, + labels=model.labels, + normalize=True) + + if iterations is not None: + test_dataset.size = iterations + + test_loader = AudioDataLoader(test_dataset, + batch_size=args.batch_size, + num_workers=args.num_workers) + + wer, cer, output_data = run_evaluation(test_loader=test_loader, + device=device, + model=model, + decoder=decoder, + target_decoder=target_decoder, + save_output=False, + verbose=True, + use_half=False) + return wer, cer, output_data + + + quant_scheme = QuantScheme.post_training_tf_enhanced + + sim = QuantizationSimModel(model.cpu(), + input_shapes=tuple([1, 1, 161, 500]), + quant_scheme=quant_scheme, + default_param_bw=args.default_param_bw, + default_output_bw=args.default_output_bw, + config_file=args.quantsim_config_file + ) + + manually_configure_quant_ops(sim) + + sim.model.to(device) + sim.compute_encodings(eval_func, forward_pass_callback_args=args.encodings_iterations) + + wer, cer, output_data = eval_func(sim.model, None) + print('Average WER {:.4f}'.format(wer)) + +def manually_configure_quant_ops(sim): + ''' + Manually configure Quantization Ops. Please see documentation for further explanation of quant op placement. + ''' + + manual_config = { + 'conv.seq_module.0': { # Conv2d + 'input_quantizer': True, + 'output_quantizer': False, + 'weight_quantizer': True, + 'bias_quantizer': False, + }, + 'conv.seq_module.1': { # BatchNorm + 'input_quantizer': False, + 'output_quantizer': False, + 'weight_quantizer': False, + 'bias_quantizer': False, + }, + 'conv.seq_module.2': { # HardTanh + 'input_quantizer': True, + 'output_quantizer': False, + }, + 'conv.seq_module.3': { # Conv2d + 'input_quantizer': True, + 'output_quantizer': False, + 'weight_quantizer': True, + 'bias_quantizer': False, + }, + 'conv.seq_module.4': { # BatchNorm + 'input_quantizer': False, + 'output_quantizer': False, + 'weight_quantizer': False, + 'bias_quantizer': False, + }, + 'conv.seq_module.5': { # HardTanh + 'input_quantizer': True, + 'output_quantizer': False, + }, + 'rnns.0.rnn': { + 'input_l0_quantizer': True, + 'initial_h_l0_quantizer': False, + 'initial_c_l0_quantizer': False, + 'h_l0_quantizer': True, + 'c_l0_quantizer': False, + 'weight_ih_l0_quantizer': True, + 'weight_hh_l0_quantizer': True, + 'bias_ih_l0_quantizer': False, + 'bias_hh_l0_quantizer': False, + 'weight_ih_l0_reverse_quantizer': True, + 'weight_hh_l0_reverse_quantizer': True, + 'bias_ih_l0_reverse_quantizer': False, + 'bias_hh_l0_reverse_quantizer': False, + }, + 'rnns.1.batch_norm.module': { + 'input_quantizer': False, + 'output_quantizer': False, + 'weight_quantizer': False, + 'bias_quantizer': False, + }, + 'rnns.1.rnn': { + 'input_l0_quantizer': True, + 'initial_h_l0_quantizer': False, + 'initial_c_l0_quantizer': False, + 'h_l0_quantizer': True, + 'c_l0_quantizer': False, + 'weight_ih_l0_quantizer': True, + 'weight_hh_l0_quantizer': True, + 'bias_ih_l0_quantizer': False, + 'bias_hh_l0_quantizer': False, + 'weight_ih_l0_reverse_quantizer': True, + 'weight_hh_l0_reverse_quantizer': True, + 'bias_ih_l0_reverse_quantizer': False, + 'bias_hh_l0_reverse_quantizer': False, + }, + 'rnns.2.batch_norm.module': { + 'input_quantizer': False, + 'output_quantizer': False, + 'weight_quantizer': False, + 'bias_quantizer': False, + }, + 'rnns.2.rnn': { + 'input_l0_quantizer': True, + 'initial_h_l0_quantizer': False, + 'initial_c_l0_quantizer': False, + 'h_l0_quantizer': True, + 'c_l0_quantizer': False, + 'weight_ih_l0_quantizer': True, + 'weight_hh_l0_quantizer': True, + 'bias_ih_l0_quantizer': False, + 'bias_hh_l0_quantizer': False, + 'weight_ih_l0_reverse_quantizer': True, + 'weight_hh_l0_reverse_quantizer': True, + 'bias_ih_l0_reverse_quantizer': False, + 'bias_hh_l0_reverse_quantizer': False, + }, + 'rnns.3.batch_norm.module': { + 'input_quantizer': False, + 'output_quantizer': False, + 'weight_quantizer': False, + 'bias_quantizer': False, + }, + 'rnns.3.rnn': { + 'input_l0_quantizer': True, + 'initial_h_l0_quantizer': False, + 'initial_c_l0_quantizer': False, + 'h_l0_quantizer': True, + 'c_l0_quantizer': False, + 'weight_ih_l0_quantizer': True, + 'weight_hh_l0_quantizer': True, + 'bias_ih_l0_quantizer': False, + 'bias_hh_l0_quantizer': False, + 'weight_ih_l0_reverse_quantizer': True, + 'weight_hh_l0_reverse_quantizer': True, + 'bias_ih_l0_reverse_quantizer': False, + 'bias_hh_l0_reverse_quantizer': False, + }, + 'rnns.4.batch_norm.module': { + 'input_quantizer': False, + 'output_quantizer': False, + 'weight_quantizer': False, + 'bias_quantizer': False, + }, + 'rnns.4.rnn': { + 'input_l0_quantizer': True, + 'initial_h_l0_quantizer': False, + 'initial_c_l0_quantizer': False, + 'h_l0_quantizer': True, + 'c_l0_quantizer': False, + 'weight_ih_l0_quantizer': True, + 'weight_hh_l0_quantizer': True, + 'bias_ih_l0_quantizer': False, + 'bias_hh_l0_quantizer': False, + 'weight_ih_l0_reverse_quantizer': True, + 'weight_hh_l0_reverse_quantizer': True, + 'bias_ih_l0_reverse_quantizer': False, + 'bias_hh_l0_reverse_quantizer': False, + }, + 'fc.0.module.0': { + 'input_quantizer': True, + 'output_quantizer': False, + 'weight_quantizer': False, + 'bias_quantizer': False, + }, + 'fc.0.module.1': { + 'input_quantizer': True, + 'output_quantizer': False, + 'weight_quantizer': True, + }, + 'inference_softmax': { + 'input_quantizer': False, + 'output_quantizer': True, + } + } + + quant_ops = QuantizationSimModel._get_qc_quantized_layers(sim.model) + for name, op in quant_ops: + mc = manual_config[name] + if isinstance(op, aimet_torch.qc_quantize_op.QcPostTrainingWrapper): + op.input_quantizer.enabled = mc['input_quantizer'] + op.output_quantizer.enabled = mc['output_quantizer'] + for q_name, param_quantizer in op.param_quantizers.items(): + param_quantizer.enabled = mc[q_name + '_quantizer'] + elif isinstance(op, aimet_torch.pro.qc_quantize_recurrent.QcQuantizeRecurrent): + for q_name, input_quantizer in op.input_quantizers.items(): + input_quantizer.enabled = mc[q_name + '_quantizer'] + for q_name, output_quantizer in op.output_quantizers.items(): + output_quantizer.enabled = mc[q_name + '_quantizer'] + for q_name, param_quantizer in op.param_quantizers.items(): + param_quantizer.enabled = mc[q_name + '_quantizer'] + + +def parse_args(args): + """ Parse the arguments. + """ + parser = argparse.ArgumentParser(description='Evaluation script for an DeepSpeech2 network.') + + parser.add_argument('--model-path', help='Path to .pth to load from.') + parser.add_argument('--test-manifest', help='Path to csv to do eval on.') + parser.add_argument('--batch-size', help='Batch size.', type=int, default=20) + parser.add_argument('--num-workers', help='Number of workers.', type=int, default=1) + + parser.add_argument('--quant-scheme', help='Quant scheme to use for quantization (tf, tf_enhanced, range_learning_tf, range_learning_tf_enhanced).', default='tf') + parser.add_argument('--round-mode', help='Round mode for quantization.', default='nearest') + parser.add_argument('--default-output-bw', help='Default output bitwidth for quantization.', type=int, default=8) + parser.add_argument('--default-param-bw', help='Default parameter bitwidth for quantization.', type=int, default=8) + parser.add_argument('--quantsim-config-file', help='Quantsim configuration file.', default=None) + parser.add_argument('--encodings-iterations', help='Number of iterations to use for compute encodings during quantization.', type=int, default=500) + + return parser.parse_args(args) + +def main(args=None): + args = parse_args(args) + run_quantsim_evaluation(args) + +if __name__ == '__main__': + main() diff --git a/zoo_torch/examples/eval_deeplabv3.py b/zoo_torch/examples/eval_deeplabv3.py new file mode 100755 index 0000000..b4cdbb0 --- /dev/null +++ b/zoo_torch/examples/eval_deeplabv3.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +''' AIMET Quantsim code for DeepLabV3 ''' + +import random +import numpy as np +import torch +from modeling.deeplab import DeepLab +from tqdm import tqdm +import argparse +from metrics import Evaluator +from dataloaders import make_data_loader + +def work_init(work_id): + seed = torch.initial_seed() % 2**32 + random.seed(seed + work_id) + np.random.seed(seed + work_id) + +def model_eval(args, data_loader): + def func_wrapper(model, arguments): + evaluator = Evaluator(args.num_classes) + evaluator.reset() + model.eval() + model.cuda() + threshold, use_cuda = arguments[0], arguments[1] + total_samples = 0 + for sample in tqdm(data_loader): + images, label = sample['image'], sample['label'] + images, label = images.cuda(), label.cpu().numpy() + output = model(images) + pred = torch.argmax(output, 1).data.cpu().numpy() + evaluator.add_batch(label, pred) + total_samples += images.size()[0] + if total_samples > threshold: + break + mIoU = evaluator.Mean_Intersection_over_Union()*100. + print("mIoU : {:0.2f}".format(mIoU)) + return mIoU + return func_wrapper + + + + +def arguments(): + parser = argparse.ArgumentParser(description='Evaluation script for PyTorch ImageNet networks.') + + parser.add_argument('--checkpoint-path', help='Path to optimized checkpoint directory to load from.', default = None, type=str) + parser.add_argument('--base-size', help='Base size for Random Crop', default=513) + parser.add_argument('--crop-size', help='Crop size for Random Crop', default=513) + parser.add_argument('--num-classes', help='Number of classes in a dataset', default=21) + parser.add_argument('--dataset', help='dataset used for evaluation', default='pascal') + + parser.add_argument('--seed', help='Seed number for reproducibility', default=0) + parser.add_argument('--use-sbd', help='Use SBD data for data augmentation during training', default=False) + + parser.add_argument('--quant-scheme', help='Quant scheme to use for quantization (tf, tf_enhanced, range_learning_tf, range_learning_tf_enhanced).', default='tf', choices = ['tf', 'tf_enhanced', 'range_learning_tf', 'range_learning_tf_enhanced']) + parser.add_argument('--round-mode', help='Round mode for quantization.', default='nearest') + parser.add_argument('--default-output-bw', help='Default output bitwidth for quantization.', default=8) + parser.add_argument('--default-param-bw', help='Default parameter bitwidth for quantization.', default=8) + parser.add_argument('--config-file', help='Quantsim configuration file.', default=None, type=str) + parser.add_argument('--cuda', help='Enable cuda for a model', default=True) + + parser.add_argument('--batch-size', help='Data batch size for a model', default=16) + args = parser.parse_args() + return args + +def seed(args): + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) + + +def main(): + args = arguments() + seed(args) + + model = DeepLab(backbone='mobilenet', output_stride=16, num_classes=21, + sync_bn=False) + model.eval() + + from aimet_torch import batch_norm_fold + from aimet_torch import utils + args.input_shape = (1,3,513,513) + batch_norm_fold.fold_all_batch_norms(model, args.input_shape) + utils.replace_modules_of_type1_with_type2(model, torch.nn.ReLU6, torch.nn.ReLU) + # from IPython import embed; embed() + if args.checkpoint_path: + model.load_state_dict(torch.load(args.checkpoint_path)) + else: + raise ValueError('checkpoint path {} must be specified'.format(args.checkpoint_path)) + + data_loader_kwargs = { 'worker_init_fn':work_init, 'num_workers' : 0} + train_loader, val_loader, test_loader, num_class = make_data_loader(args, **data_loader_kwargs) + eval_func_quant = model_eval(args, val_loader) + eval_func = model_eval(args, val_loader) + + from aimet_common.defs import QuantScheme + from aimet_torch.pro.quantsim import QuantizationSimModel + if hasattr(args, 'quant_scheme'): + if args.quant_scheme == 'range_learning_tf': + quant_scheme = QuantScheme.training_range_learning_with_tf_init + elif args.quant_scheme == 'range_learning_tfe': + quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init + elif args.quant_scheme == 'tf': + quant_scheme = QuantScheme.post_training_tf + elif args.quant_scheme == 'tf_enhanced': + quant_scheme = QuantScheme.post_training_tf_enhanced + else: + raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme) + kwargs = { + 'quant_scheme': quant_scheme, + 'default_param_bw': args.default_param_bw, + 'default_output_bw': args.default_output_bw, + 'config_file': args.config_file + } + print(kwargs) + sim = QuantizationSimModel(model.cpu(), input_shapes=args.input_shape, **kwargs) + sim.compute_encodings(eval_func_quant, (1024, True)) + post_quant_top1 = eval_func(sim.model.cuda(), (99999999, True)) + print("Post Quant mIoU :", post_quant_top1) + +if __name__ == '__main__': + main() diff --git a/zoo_torch/examples/eval_efficientnetlite0.py b/zoo_torch/examples/eval_efficientnetlite0.py new file mode 100755 index 0000000..8e45717 --- /dev/null +++ b/zoo_torch/examples/eval_efficientnetlite0.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +''' AIMET Post Quantization code for EfficientNet-Lite0 ''' + +import random +import numpy as np +import torch +import geffnet +from torch.utils.data import DataLoader +from torchvision import transforms, datasets +from tqdm import tqdm +import argparse + +from aimet_torch import utils +from aimet_torch import cross_layer_equalization +from aimet_torch import batch_norm_fold +from aimet_common.defs import QuantScheme +from aimet_torch.pro.quantsim import QuantizationSimModel +from aimet_torch.adaround.adaround_weight import Adaround, AdaroundParameters +from aimet_torch.onnx_utils import onnx_pytorch_conn_graph_type_pairs +from aimet_common.utils import AimetLogger +import logging +AimetLogger.set_level_for_all_areas(logging.DEBUG) +onnx_pytorch_conn_graph_type_pairs.append([["Clip"], ["hardtanh"]]) + +def work_init(work_id): + seed = torch.initial_seed() % 2**32 + random.seed(seed + work_id) + np.random.seed(seed + work_id) + +def model_eval(data_loader, image_size, batch_size=64, quant = False): + def func_wrapper_quant(model, arguments): + top1_acc = 0.0 + total_num = 0 + idx = 0 + iterations , use_cuda = arguments[0], arguments[1] + if use_cuda: + model.cuda() + for sample, label in tqdm(data_loader): + total_num += sample.size()[0] + if use_cuda: + sample = sample.cuda() + label = label.cuda() + logits = model(sample) + pred = torch.argmax(logits, dim = 1) + correct = sum(torch.eq(pred, label)).cpu().numpy() + top1_acc += correct + idx += 1 + if idx > iterations: + break + avg_acc = top1_acc * 100. / total_num + print("Top 1 ACC : {:0.2f}".format(avg_acc)) + return avg_acc + + def func_wrapper(model, arguments): + top1_acc = 0.0 + total_num = 0 + iterations , use_cuda = arguments[0], arguments[1] + if use_cuda: + model.cuda() + for sample, label in tqdm(data_loader): + total_num += sample.size()[0] + if use_cuda: + sample = sample.cuda() + label = label.cuda() + logits = model(sample) + pred = torch.argmax(logits, dim = 1) + correct = sum(torch.eq(pred, label)).cpu().numpy() + top1_acc += correct + avg_acc = top1_acc * 100. / total_num + print("Top 1 ACC : {:0.2f}".format(avg_acc)) + return avg_acc + if quant: + func = func_wrapper_quant + else: + func = func_wrapper + return func + +def seed(args): + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) + + +def load_model(pretrained = True): + model = getattr(geffnet, 'efficientnet_lite0')(pretrained) + return model + +def run_pytorch_bn_fold(config, model): + folded_pairs = batch_norm_fold.fold_all_batch_norms(model.cpu(), config.input_shape) + conv_bn_pairs = {} + for conv_bn in folded_pairs: + conv_bn_pairs[conv_bn[0]] = conv_bn[1] + return model, conv_bn_pairs + +def run_pytorch_cross_layer_equalization(config, model): + cross_layer_equalization.equalize_model(model.cpu(), config.input_shape) + return model + +def run_pytorch_adaround(config, model, data_loaders): + if hasattr(config, 'quant_scheme'): + if config.quant_scheme == 'range_learning_tf': + quant_scheme = QuantScheme.post_training_tf + elif config.quant_scheme == 'range_learning_tfe': + quant_scheme = QuantScheme.post_training_tf_enhanced + elif config.quant_scheme == 'tf': + quant_scheme = QuantScheme.post_training_tf + elif config.quant_scheme == 'tf_enhanced': + quant_scheme = QuantScheme.post_training_tf_enhanced + else: + raise ValueError("Got unrecognized quant_scheme: " + config.quant_scheme) + + params = AdaroundParameters(data_loader = data_loaders, num_batches = config.num_batches, default_num_iterations = config.num_iterations, + default_reg_param = 0.01, default_beta_range = (20, 2)) + ada_model = Adaround.apply_adaround(model.cuda(), params, default_param_bw= config.default_param_bw, + default_quant_scheme = quant_scheme, + default_config_file = config.config_file + ) + return ada_model + + +def arguments(): + parser = argparse.ArgumentParser(description='Evaluation script for PyTorch EfficientNet-lite0 networks.') + + parser.add_argument('--images-dir', help='Imagenet eval image', default='./ILSVRC2012_PyTorch/', type=str) + parser.add_argument('--input-shape', help='Model to an input image shape, (ex : [batch, channel, width, height]', default=(1,3,224,224)) + parser.add_argument('--seed', help='Seed number for reproducibility', default=0) + + parser.add_argument('--quant-tricks', help='Preprocessing prior to Quantization', choices=['BNfold', 'CLE', 'adaround'], nargs = "+") + parser.add_argument('--quant-scheme', help='Quant scheme to use for quantization (tf, tf_enhanced, range_learning_tf, range_learning_tf_enhanced).', default='tf', choices = ['tf', 'tf_enhanced', 'range_learning_tf', 'range_learning_tf_enhanced']) + parser.add_argument('--round-mode', help='Round mode for quantization.', default='nearest') + parser.add_argument('--default-output-bw', help='Default output bitwidth for quantization.', default=8) + parser.add_argument('--default-param-bw', help='Default parameter bitwidth for quantization.', default=8) + parser.add_argument('--config-file', help='Quantsim configuration file.', default=None, type=str) + parser.add_argument('--cuda', help='Enable cuda for a model', default=True) + + parser.add_argument('--batch-size', help='Data batch size for a model', default=64) + parser.add_argument('--num-workers', help='Number of workers to run data loader in parallel', default=16) + + parser.add_argument('--num-iterations', help='Number of iterations used for adaround optimization', default=10000, type = int) + parser.add_argument('--num-batches', help='Number of batches used for adaround optimization', default=16, type = int) + + args = parser.parse_args() + return args + + +def main(): + args = arguments() + seed(args) + + model = load_model() + model.eval() + + image_size = args.input_shape[-1] + + data_loader_kwargs = { 'worker_init_fn':work_init, 'num_workers' : args.num_workers} + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + val_transforms = transforms.Compose([ + transforms.Resize(image_size + 24), + transforms.CenterCrop(image_size), + transforms.ToTensor(), + normalize]) + val_data = datasets.ImageFolder(args.images_dir + '/val/', val_transforms) + val_dataloader = DataLoader(val_data, args.batch_size, shuffle = False, pin_memory = True, **data_loader_kwargs) + + eval_func_quant = model_eval(val_dataloader, image_size, batch_size=args.batch_size, quant = True) + eval_func = model_eval(val_dataloader, image_size, batch_size=args.batch_size) + + if 'BNfold' in args.quant_tricks: + print("BN fold") + model, conv_bn_pairs = run_pytorch_bn_fold(args, model) + if 'CLE' in args.quant_tricks: + print("CLE") + model = run_pytorch_cross_layer_equalization(args, model) + print(model) + if 'adaround' in args.quant_tricks: + model = run_pytorch_adaround(args, model, val_dataloader) + + if hasattr(args, 'quant_scheme'): + if args.quant_scheme == 'range_learning_tf': + quant_scheme = QuantScheme.training_range_learning_with_tf_init + elif args.quant_scheme == 'range_learning_tfe': + quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init + elif args.quant_scheme == 'tf': + quant_scheme = QuantScheme.post_training_tf + elif args.quant_scheme == 'tf_enhanced': + quant_scheme = QuantScheme.post_training_tf_enhanced + else: + raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme) + kwargs = { + 'quant_scheme': quant_scheme, + 'default_param_bw': args.default_param_bw, + 'default_output_bw': args.default_output_bw, + 'config_file': args.config_file + } + print(kwargs) + sim = QuantizationSimModel(model.cpu(), input_shapes=args.input_shape, **kwargs) + + # Manually Config Super group, AIMET currently does not support [Conv-ReLU6] in a supergroup + from aimet_torch.qc_quantize_op import QcPostTrainingWrapper + for quant_wrapper in sim.model.modules(): + if isinstance(quant_wrapper, QcPostTrainingWrapper): + if isinstance(quant_wrapper._module_to_wrap, torch.nn.Conv2d): + quant_wrapper.output_quantizer.enabled = False + + sim.model.blocks[0][0].conv_pw.output_quantizer.enabled = True + sim.model.blocks[1][0].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[1][1].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[2][0].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[2][1].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[3][0].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[3][1].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[3][2].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[4][0].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[4][1].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[4][2].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[5][0].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[5][1].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[5][2].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[5][3].conv_pwl.output_quantizer.enabled = True + sim.model.blocks[6][0].conv_pwl.output_quantizer.enabled = True + + sim.compute_encodings(eval_func_quant, (32, True)) + print(sim) + post_quant_top1 = eval_func(sim.model.cuda(), (0, True)) + print("Post Quant Top1 :", post_quant_top1) + +if __name__ == '__main__': + main() diff --git a/zoo_torch/examples/eval_mobilenetv2.py b/zoo_torch/examples/eval_mobilenetv2.py new file mode 100755 index 0000000..acfe366 --- /dev/null +++ b/zoo_torch/examples/eval_mobilenetv2.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +''' AIMET Quantsim code for MobileNetV2 ''' + +import random +import numpy as np +import torch +from model.MobileNetV2 import mobilenet_v2 +from torch.utils.data import DataLoader +from torchvision import transforms, datasets +from tqdm import tqdm +import argparse +def work_init(work_id): + seed = torch.initial_seed() % 2**32 + random.seed(seed + work_id) + np.random.seed(seed + work_id) + +def model_eval(images_dir, image_size, batch_size=64, num_workers=16, quant = False): + + data_loader_kwargs = { 'worker_init_fn':work_init, 'num_workers' : num_workers} + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + val_transforms = transforms.Compose([ + transforms.Resize(image_size + 24), + transforms.CenterCrop(image_size), + transforms.ToTensor(), + normalize]) + val_data = datasets.ImageFolder(images_dir, val_transforms) + val_dataloader = DataLoader(val_data, batch_size, shuffle = False, pin_memory = True, **data_loader_kwargs) + def func_wrapper_quant(model, arguments): + top1_acc = 0.0 + total_num = 0 + idx = 0 + iterations , use_cuda = arguments[0], arguments[1] + if use_cuda: + model.cuda() + for sample, label in tqdm(val_dataloader): + total_num += sample.size()[0] + if use_cuda: + sample = sample.cuda() + label = label.cuda() + logits = model(sample) + pred = torch.argmax(logits, dim = 1) + correct = sum(torch.eq(pred, label)).cpu().numpy() + top1_acc += correct + idx += 1 + if idx > iterations: + break + avg_acc = top1_acc * 100. / total_num + print("Top 1 ACC : {:0.2f}".format(avg_acc)) + return avg_acc + + def func_wrapper(model, arguments): + top1_acc = 0.0 + total_num = 0 + iterations , use_cuda = arguments[0], arguments[1] + if use_cuda: + model.cuda() + for sample, label in tqdm(val_dataloader): + total_num += sample.size()[0] + if use_cuda: + sample = sample.cuda() + label = label.cuda() + logits = model(sample) + pred = torch.argmax(logits, dim = 1) + correct = sum(torch.eq(pred, label)).cpu().numpy() + top1_acc += correct + avg_acc = top1_acc * 100. / total_num + print("Top 1 ACC : {:0.2f}".format(avg_acc)) + return avg_acc + if quant: + func = func_wrapper_quant + else: + func = func_wrapper + return func + + +def arguments(): + parser = argparse.ArgumentParser(description='Evaluation script for PyTorch ImageNet networks.') + + parser.add_argument('--model-path', help='Path to checkpoint directory to load from.', default = "./model/mv2qat_modeldef.pth", type=str) + parser.add_argument('--images-dir', help='Imagenet eval image', default='./ILSVRC2012/', type=str) + parser.add_argument('--input-shape', help='Model to an input image shape, (ex : [batch, channel, width, height]', default=(1,3,224,224)) + parser.add_argument('--seed', help='Seed number for reproducibility', default=0) + + parser.add_argument('--quant-tricks', help='Preprocessing prior to Quantization', choices=['BNfold', 'CLS', 'HBF', 'CLE', 'BC', 'adaround'], nargs = "+") + parser.add_argument('--quant-scheme', help='Quant scheme to use for quantization (tf, tf_enhanced, range_learning_tf, range_learning_tf_enhanced).', default='tf', choices = ['tf', 'tf_enhanced', 'range_learning_tf', 'range_learning_tf_enhanced']) + parser.add_argument('--round-mode', help='Round mode for quantization.', default='nearest') + parser.add_argument('--default-output-bw', help='Default output bitwidth for quantization.', default=8) + parser.add_argument('--default-param-bw', help='Default parameter bitwidth for quantization.', default=8) + parser.add_argument('--config-file', help='Quantsim configuration file.', default=None, type=str) + parser.add_argument('--cuda', help='Enable cuda for a model', default=True) + + parser.add_argument('--batch-size', help='Data batch size for a model', default=64) + + + args = parser.parse_args() + return args + +def seed(args): + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + torch.manual_seed(args.seed) + torch.cuda.manual_seed(args.seed) + torch.cuda.manual_seed_all(args.seed) + + +def main(): + args = arguments() + seed(args) + + if args.model_path: + model = torch.load(args.model_path) + else: + raise ValueError('Model path {} must be specified'.format(args.model_path)) + + model.eval() + image_size = args.input_shape[-1] + eval_func_quant = model_eval(args.images_dir + '/val/', image_size, batch_size=args.batch_size, num_workers=0, quant = True) + eval_func = model_eval(args.images_dir + '/val/', image_size, batch_size=args.batch_size, num_workers=16) + + from aimet_common.defs import QuantScheme + from aimet_torch.pro.quantsim import QuantizationSimModel + if hasattr(args, 'quant_scheme'): + if args.quant_scheme == 'range_learning_tf': + quant_scheme = QuantScheme.training_range_learning_with_tf_init + elif args.quant_scheme == 'range_learning_tfe': + quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init + elif args.quant_scheme == 'tf': + quant_scheme = QuantScheme.post_training_tf + elif args.quant_scheme == 'tf_enhanced': + quant_scheme = QuantScheme.post_training_tf_enhanced + else: + raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme) + kwargs = { + 'quant_scheme': quant_scheme, + 'default_param_bw': args.default_param_bw, + 'default_output_bw': args.default_output_bw, + 'config_file': args.config_file + } + print(kwargs) + sim = QuantizationSimModel(model.cpu(), input_shapes=args.input_shape, **kwargs) + sim.compute_encodings(eval_func_quant, (32, True)) + post_quant_top1 = eval_func(sim.model.cuda(), (0, True)) + print("Post Quant Top1 :", post_quant_top1) + +if __name__ == '__main__': + main() diff --git a/zoo_torch/examples/pose_estimation_quanteval.py b/zoo_torch/examples/pose_estimation_quanteval.py new file mode 100644 index 0000000..fc4af80 --- /dev/null +++ b/zoo_torch/examples/pose_estimation_quanteval.py @@ -0,0 +1,499 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +""" +This script applies and evaluates a compressed pose estimation model which has a similar +structure with https://github.com/CMU-Perceptual-Computing-Lab/openpose. Evaluation is +done on 2014 val dataset with person keypoints only. This model is quantization-friendly +so no post-training methods or QAT were applied. For instructions please refer to +zoo_torch/Docs/PoseEstimation.md +""" + + +import os +import math +import argparse +from functools import partial +from tqdm import tqdm + +import cv2 +from scipy.ndimage.filters import gaussian_filter +import torch +import numpy as np +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +from aimet_torch import quantsim + + +def non_maximum_suppression(map, thresh): + map_s = gaussian_filter(map, sigma=3) + + map_left = np.zeros(map_s.shape) + map_left[1:, :] = map_s[:-1, :] + map_right = np.zeros(map_s.shape) + map_right[:-1, :] = map_s[1:, :] + map_up = np.zeros(map_s.shape) + map_up[:, 1:] = map_s[:, :-1] + map_down = np.zeros(map_s.shape) + map_down[:, :-1] = map_s[:, 1:] + + peaks_binary = np.logical_and.reduce((map_s >= map_left, map_s >= map_right, map_s >= map_up, + map_s >= map_down, + map_s > thresh)) + + peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse + peaks_with_score = [x + (map[x[1], x[0]],) for x in peaks] + + return peaks_with_score + + +def pad_image(img, stride, padding): + h = img.shape[0] + w = img.shape[1] + + pad = 4 * [None] + pad[0] = 0 # up + pad[1] = 0 # left + pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down + pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right + + img_padded = img + pad_up = np.tile(img_padded[0:1, :, :] * 0 + padding, (pad[0], 1, 1)) + img_padded = np.concatenate((pad_up, img_padded), axis=0) + pad_left = np.tile(img_padded[:, 0:1, :] * 0 + padding, (1, pad[1], 1)) + img_padded = np.concatenate((pad_left, img_padded), axis=1) + pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + padding, (pad[2], 1, 1)) + img_padded = np.concatenate((img_padded, pad_down), axis=0) + pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + padding, (1, pad[3], 1)) + img_padded = np.concatenate((img_padded, pad_right), axis=1) + + return img_padded, pad + + +def encode_input(image, scale, stride, padding): + image_scaled = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) + image_scaled_padded, pad = pad_image(image_scaled, stride, padding) + + return image_scaled_padded, pad + + +def decode_output(data, stride, padding, input_shape, image_shape): + output = np.transpose(np.squeeze(data), (1, 2, 0)) + output = cv2.resize(output, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) + output = output[:input_shape[0] - padding[2], :input_shape[1] - padding[3], :] + output = cv2.resize(output, (image_shape[1], image_shape[0]), interpolation=cv2.INTER_CUBIC) + + return output + + +def preprocess(image, transforms): + mean_bgr = [34.282957728666474, 32.441979567868017, 24.339757511312481] + + image = image.astype(np.float32) + + if 'bgr' in transforms: + if image.shape[0] == 3: + image = image[::-1, :, :] + elif image.shape[2] == 3: + image = image[:, :, ::-1] + + if 'tr' in transforms: + image = image.transpose((2, 0, 1)) + + if 'mean' in transforms: + image[0, :, :] -= mean_bgr[0] + image[1, :, :] -= mean_bgr[1] + image[2, :, :] -= mean_bgr[2] + + if 'addchannel' in transforms: + image = image[np.newaxis, :, :, :] + + if 'normalize' in transforms: + image = image / 256 - 0.5 + + return image + + +def run_model(model, image, fast=False): + scale_search = [1.] + crop = 368 + stride = 8 + padValue = 128 + + if fast: + scales = scale_search + else: + scales = [x * crop / image.shape[0] for x in scale_search] + + heatmaps, pafs = [], [] + for scale in scales: + if fast: + horiz = image.shape[0] < image.shape[1] + sz = (496, 384) if horiz else (384, 496) + image_encoded = cv2.resize(image, dsize=(int(sz[0] * scale), int(sz[1] * scale))) + else: + image_encoded, pad = encode_input(image, scale, stride, + padValue) + image_encoded_ = preprocess(image_encoded, + ['addchannel', 'normalize', 'bgr']) + image_encoded_ = np.transpose(image_encoded_, (0, 3, 1, 2)) + with torch.no_grad(): + input_image = torch.FloatTensor(torch.from_numpy(image_encoded_).float()) + if next(model.parameters()).is_cuda: + input_image = input_image.to(device='cuda') + output = model(input_image) + paf = output[2].cpu().data.numpy().transpose((0, 2, 3, 1)) + heatmap = output[3].cpu().data.numpy().transpose((0, 2, 3, 1)) + if fast: + paf = cv2.resize(paf[0], (image.shape[1], image.shape[0])) + heatmap = cv2.resize(heatmap[0], dsize=(image.shape[1], image.shape[0])) + else: + # paf = paf.transpose((0, 3, 1, 2)) + # heatmap = heatmap.transpose((0, 3, 1, 2)) + paf = decode_output(paf, stride, pad, image_encoded.shape, + image.shape) + heatmap = decode_output(heatmap, stride, pad, image_encoded.shape, + image.shape) + + pafs.append(paf) + heatmaps.append(heatmap) + + return np.asarray(heatmaps).mean(axis=0), np.asarray(pafs).mean(axis=0) + + +def get_keypoints(heatmap): + thre1 = 0.1 + keypoints_all = [] + keypoints_cnt = 0 + for part in range(19 - 1): + keypoints = non_maximum_suppression(heatmap[:, :, part], thre1) + id = range(keypoints_cnt, keypoints_cnt + len(keypoints)) + keypoints = [keypoints[i] + (id[i],) for i in range(len(id))] + keypoints_all.append(keypoints) + keypoints_cnt += len(keypoints) + return keypoints_all + + +def get_limb_consistency(paf, start_keypoint, end_keypoint, image_h, div_num=10): + vec_key = np.subtract(end_keypoint[:2], start_keypoint[:2]) + vec_key_norm = math.sqrt(vec_key[0] * vec_key[0] + vec_key[1] * vec_key[1]) + if vec_key_norm == 0: + vec_key_norm = 1 + vec_key = np.divide(vec_key, vec_key_norm) + + vec_paf = list(zip(np.linspace(start_keypoint[0], end_keypoint[0], num=div_num).astype(int), + np.linspace(start_keypoint[1], end_keypoint[1], num=div_num).astype(int))) + + vec_paf_x = np.array([paf[vec_paf[k][1], vec_paf[k][0], 0] for k in range(div_num)]) + vec_paf_y = np.array([paf[vec_paf[k][1], vec_paf[k][0], 1] for k in range(div_num)]) + + # To see how well the direction of the prediction over the line connecting the limbs aligns + # with the vec_key we compute the integral of the dot product of the "affinity vector at point + # 'u' on the line" and the "vec_key". + # In discrete form, this integral is done as below: + vec_sims = np.multiply(vec_paf_x, vec_key[0]) + np.multiply(vec_paf_y, vec_key[1]) + + # this is just a heuristic approach to punish very long predicted limbs + vec_sims_prior = vec_sims.mean() + min(0.5 * image_h / vec_key_norm - 1, 0) + + return vec_sims, vec_sims_prior + + +def connect_keypoints(image_shape, keypoints, paf, limbs, limbsInds): + thre2 = 0.05 + connections = [] + small_limb_list = [1, 15, 16, 17, 18] + for k in range(len(limbsInds)): + paf_limb = paf[:, :, limbsInds[k]] + limb_strs = keypoints[limbs[k][0]] + limb_ends = keypoints[limbs[k][1]] + + if len(limb_strs) != 0 and len(limb_ends) != 0: + cands = [] + for i, limb_str in enumerate(limb_strs): + for j, limb_end in enumerate(limb_ends): + # for each potential pair of keypoints which can have a limb in between we + # measure a score using the get_limb_consistency function + if limbs[k][0] in small_limb_list or limbs[k][1] in small_limb_list: + sims, sims_p = get_limb_consistency(paf_limb, limb_str, limb_end, + image_shape[0], div_num=10) + else: + sims, sims_p = get_limb_consistency(paf_limb, limb_str, limb_end, + image_shape[0], div_num=10) + if len(np.where(sims > thre2)[0]) > int(0.80 * len(sims)) and sims_p > 0: + cands.append([i, j, sims_p]) + cands = sorted(cands, key=lambda x: x[2], reverse=True) + connection = np.zeros((0, 3)) + visited_strs, visited_ends = [], [] + for cand in cands: + i, j, s = cand + if i not in visited_strs and j not in visited_ends: + connection = np.vstack([connection, [limb_strs[i][3], limb_ends[j][3], s]]) + visited_strs.append(i) + visited_ends.append(j) + + if len(connection) >= min(len(limb_strs), len(limb_ends)): + break + connections.append(connection) + else: + connections.append([]) + return connections + + +def create_skeletons(keypoints, connections, limbs): + # last number in each row is the total parts number of that person + # the second last number in each row is the score of the overall configuration + skeletons = -1 * np.ones((0, 20)) + keypoints_flatten = np.array([item for sublist in keypoints for item in sublist]) + + for k in range(len(limbs)): + if len(connections[k]) > 0: + detected_str = connections[k][:, 0] + detected_end = connections[k][:, 1] + limb_str, limb_end = np.array(limbs[k]) + + for i in range(len(connections[k])): + found = 0 + subset_idx = [-1, -1] + for j in range(len(skeletons)): + if skeletons[j][limb_str] == detected_str[i] or \ + skeletons[j][limb_end] == detected_end[i]: + subset_idx[found] = j + found += 1 + + if found == 1: + j = subset_idx[0] + if skeletons[j][limb_end] != detected_end[i]: + skeletons[j][limb_end] = detected_end[i] + skeletons[j][-1] += 1 + skeletons[j][-2] += keypoints_flatten[detected_end[i].astype(int), 2] + \ + connections[k][i][2] + elif found == 2: # if found 2 and disjoint, merge them + j1, j2 = subset_idx + + membership = ((skeletons[j1] >= 0).astype(int) + + (skeletons[j2] >= 0).astype(int))[:-2] + if len(np.nonzero(membership == 2)[0]) == 0: # merge + skeletons[j1][:-2] += (skeletons[j2][:-2] + 1) + skeletons[j1][-2:] += skeletons[j2][-2:] + skeletons[j1][-2] += connections[k][i][2] + skeletons = np.delete(skeletons, j2, 0) + else: # as like found == 1 + skeletons[j1][limb_end] = detected_end[i] + skeletons[j1][-1] += 1 + skeletons[j1][-2] += keypoints_flatten[detected_end[i].astype(int), 2] + \ + connections[k][i][2] + + # if find no partA in the subset, create a new subset + elif not found and k < 17: + row = -1 * np.ones(20) + row[limb_str] = detected_str[i] + row[limb_end] = detected_end[i] + row[-1] = 2 + row[-2] = sum(keypoints_flatten[connections[k][i, :2].astype(int), 2]) + \ + connections[k][i][2] + skeletons = np.vstack([skeletons, row]) + + # delete some rows of subset which has few parts occur + deleteIdx = [] + for i in range(len(skeletons)): + if skeletons[i][-1] < 4 or skeletons[i][-2] / skeletons[i][-1] < 0.4: + deleteIdx.append(i) + skeletons = np.delete(skeletons, deleteIdx, axis=0) + return {'keypoints': skeletons[:, :18], 'scores': skeletons[:, 18]} + + +def estimate_pose(image_shape, heatmap, paf): + # limbs as pair of keypoints: [start_keypoint, end_keypoint] keypoints index to heatmap matrix + limbs = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], + [11, 12], [12, 13], + [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 16], [5, 17]] + # index where each limb stands in paf matrix. Two consecutive indices for x and y component + # of paf + limbsInd = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], [4, 5], + [6, 7], [8, 9], + [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], [36, 37], [18, 19], [26, 27]] + + # Computing the keypoints using non-max-suppression + keypoints = get_keypoints(heatmap) + + # Computing which pairs of joints should be connected based on the paf. + connections = connect_keypoints(image_shape, keypoints, paf, limbs, limbsInd) + + skeletons = create_skeletons(keypoints, connections, limbs) + + return skeletons, np.array([item for sublist in keypoints for item in sublist]) + + +def parse_results(skeletons, points): + coco_indices = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] + + skeletons_out, scores = [], [] + for score, keypoints in zip(skeletons['scores'], skeletons['keypoints']): + skeleton = [] + for p in range(len(keypoints)): + if p == 1: + continue + ind = int(keypoints[p]) + if ind >= 0: + point = {'x': points[ind, 0], 'y': points[ind, 1], 'score': points[ind, 2], + 'id': coco_indices[p]} + skeleton.append(point) + + skeletons_out.append(skeleton) + scores.append(score) + return {'skeletons': skeletons_out, 'scores': scores} + + +class COCOWrapper: + def __init__(self, coco_path, num_imgs=None): + self.coco_path = coco_path + self.num_imgs = num_imgs + # sys.path.append(self.coco_apth + "codes/PythonAPI") + + def get_images(self): + imgs = self.cocoGT.imgs.values() + + image_ids = sorted(map(lambda x: x['id'], self.cocoGT.imgs.values())) + if self.num_imgs: + image_ids = image_ids[:self.num_imgs] + imgs = list(filter(lambda x: x['id'] in image_ids, imgs)) + + return imgs + + def evaluate_json(self, obj): + # initialize COCO detections api + cocoDT = self.cocoGT.loadRes(obj) + + imgIds = sorted(self.cocoGT.getImgIds()) + if self.num_imgs: + imgIds = imgIds[:self.num_imgs] + + # running evaluation + cocoEval = COCOeval(self.cocoGT, cocoDT, 'keypoints') + cocoEval.params.imgIds = imgIds + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + return cocoEval.stats[0::5] + + def get_results_json(self, results, imgs): + results_obj = [] + for img, result in list(zip(imgs, results)): + for score, skeleton in list(zip(result['scores'], result['skeletons'])): + obj = {'image_id': img['id'], 'category_id': 1, 'keypoints': np.zeros(shape=(3, 17))} + + for keypoint in skeleton: + obj['keypoints'][0, keypoint['id']] = keypoint['x'] - 0.5 + obj['keypoints'][1, keypoint['id']] = keypoint['y'] - 0.5 + obj['keypoints'][2, keypoint['id']] = 1 + obj['keypoints'] = list(np.reshape(obj['keypoints'], newshape=(51,), order='F')) + obj['score'] = score / len(skeleton) + + results_obj.append(obj) + + return results_obj + + @property + def cocoGT(self): + annType = 'keypoints' + prefix = 'person_keypoints' + print('Initializing demo for *%s* results.' % (annType)) + + # initialize COCO ground truth api + dataType = 'val2014' + annFile = os.path.join(self.coco_path, 'annotations/%s_%s.json' % (prefix, dataType)) + cocoGT = COCO(annFile) + + if not cocoGT: + raise AttributeError('COCO ground truth demo failed to initialize!') + + return cocoGT + + +def evaluate_model(model, + coco_path, + num_imgs=None, + fast=True): + coco = COCOWrapper(coco_path, num_imgs) + + results = [] + image_path = os.path.join(coco.coco_path, 'images/val2014/') + imgs = coco.get_images() + print("Running extended evaluation on the validation set") + for i, img in tqdm(enumerate(imgs)): + image = cv2.imread(image_path + img['file_name']) # B,G,R order + + heatmap, paf = run_model(model, image, fast) + + skeletons, keypoints = estimate_pose(image.shape, heatmap, paf) + results.append(parse_results(skeletons, keypoints)) + + try: + ans = coco.evaluate_json(coco.get_results_json(results, imgs)) + return ans + except: + return [0, 0] + + +def parse_args(): + parser = argparse.ArgumentParser(prog='pose_estimation_quanteval', + description='Evaluate the post quantized SRGAN model') + + parser.add_argument('model_dir', + help='The location where the the .pth file is saved,' + 'the whole model should be saved by torch.save()', + type=str) + parser.add_argument('coco_path', + help='The location coco images and annotations are saved. ' + 'It assumes a folder structure containing two subdirectorys ' + '`images/val2014` and `annotations`. Right now only val2014 ' + 'dataset with person_keypoints are supported', + type=str) + parser.add_argument('--representative-datapath', + '-reprdata', + help='The location where representative data are stored. ' + 'The data will be used for computation of encodings', + type=str) + parser.add_argument('--quant-scheme', + '-qs', + help='Support two schemes for quantization: [`tf` or `tf_enhanced`],' + '`tf_enhanced` is used by default', + default='tf_enhanced', + choices=['tf', 'tf_enhanced'], + type=str) + + return parser.parse_args() + + +def pose_estimation_quanteval(args): + # load the model checkpoint from meta + model = torch.load(args.model_dir) + + # create quantsim object which inserts quant ops between layers + sim = quantsim.QuantizationSimModel(model, + input_shapes=(1, 3, 128, 128), + quant_scheme=args.quant_scheme) + + evaluate = partial(evaluate_model, + num_imgs=100 + ) + sim.compute_encodings(evaluate, args.coco_path) + + eval_num = evaluate_model(sim.model, + args.coco_path + ) + print(f'The [mAP, mAR] results are: {eval_num}') + + +if __name__ == '__main__': + args = parse_args() + pose_estimation_quanteval(args) diff --git a/zoo_torch/examples/pytorch-deeplab-xception-zoo.patch b/zoo_torch/examples/pytorch-deeplab-xception-zoo.patch new file mode 100644 index 0000000..de7f477 --- /dev/null +++ b/zoo_torch/examples/pytorch-deeplab-xception-zoo.patch @@ -0,0 +1,176 @@ +diff --git a/modeling/aspp.py b/modeling/aspp.py +index 5a97879..770e60f 100644 +--- a/modeling/aspp.py ++++ b/modeling/aspp.py +@@ -68,7 +68,7 @@ class ASPP(nn.Module): + x3 = self.aspp3(x) + x4 = self.aspp4(x) + x5 = self.global_avg_pool(x) +- x5 = F.interpolate(x5, size=x4.size()[2:], mode='bilinear', align_corners=True) ++ x5 = F.interpolate(x5, size=x4.size()[2:], mode='nearest', align_corners=None) + x = torch.cat((x1, x2, x3, x4, x5), dim=1) + + x = self.conv1(x) +diff --git a/modeling/backbone/mobilenet.py b/modeling/backbone/mobilenet.py +index 6fff541..9edce54 100644 +--- a/modeling/backbone/mobilenet.py ++++ b/modeling/backbone/mobilenet.py +@@ -5,22 +5,21 @@ import math + from modeling.sync_batchnorm.batchnorm import SynchronizedBatchNorm2d + import torch.utils.model_zoo as model_zoo + ++from aimet_torch.defs import PassThroughOp + def conv_bn(inp, oup, stride, BatchNorm): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + BatchNorm(oup), + nn.ReLU6(inplace=True) + ) +- +- +-def fixed_padding(inputs, kernel_size, dilation): +- kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1) +- pad_total = kernel_size_effective - 1 +- pad_beg = pad_total // 2 +- pad_end = pad_total - pad_beg +- padded_inputs = F.pad(inputs, (pad_beg, pad_end, pad_beg, pad_end)) +- return padded_inputs +- ++def _make_divisible(v, divisor=8, min_value=None): ++ if min_value is None: ++ min_value = divisor ++ new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) ++ # Make sure that round down does not go down by more than 10%. ++ if new_v < 0.9 * v: ++ new_v += divisor ++ return new_v + + class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, dilation, expand_ratio, BatchNorm): +@@ -33,10 +32,15 @@ class InvertedResidual(nn.Module): + self.kernel_size = 3 + self.dilation = dilation + ++ # More generally: padding = (ks // 2) * dilation for odd kernel sizes. ks is fixed to 3, ++ # ks // 2 == 1, so (ks // 2) * dilation = dilation ++ padding = dilation + if expand_ratio == 1: + self.conv = nn.Sequential( + # dw +- nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False), ++ nn.Conv2d( ++ hidden_dim, hidden_dim, 3, stride, ++ padding, dilation, groups=hidden_dim, bias=False), + BatchNorm(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear +@@ -46,11 +50,13 @@ class InvertedResidual(nn.Module): + else: + self.conv = nn.Sequential( + # pw +- nn.Conv2d(inp, hidden_dim, 1, 1, 0, 1, bias=False), ++ # It is stupid to pad here, but we need it for backwards compatibility ++ nn.Conv2d(inp, hidden_dim, 1, 1, padding, 1, bias=False), + BatchNorm(hidden_dim), + nn.ReLU6(inplace=True), + # dw +- nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, groups=hidden_dim, bias=False), ++ nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 0, dilation, ++ groups=hidden_dim, bias=False), + BatchNorm(hidden_dim), + nn.ReLU6(inplace=True), + # pw-linear +@@ -59,14 +65,12 @@ class InvertedResidual(nn.Module): + ) + + def forward(self, x): +- x_pad = fixed_padding(x, self.kernel_size, dilation=self.dilation) + if self.use_res_connect: +- x = x + self.conv(x_pad) ++ x = x + self.conv(x) + else: +- x = self.conv(x_pad) ++ x = self.conv(x) + return x + +- + class MobileNetV2(nn.Module): + def __init__(self, output_stride=8, BatchNorm=None, width_mult=1., pretrained=True): + super(MobileNetV2, self).__init__() +@@ -87,7 +91,8 @@ class MobileNetV2(nn.Module): + + # building first layer + input_channel = int(input_channel * width_mult) +- self.features = [conv_bn(3, input_channel, 2, BatchNorm)] ++ # self.features = [conv_bn(3, input_channel, 2, BatchNorm)] ++ features = [conv_bn(3, input_channel, 2, BatchNorm)] + current_stride *= 2 + # building inverted residual blocks + for t, c, n, s in interverted_residual_setting: +@@ -102,18 +107,24 @@ class MobileNetV2(nn.Module): + output_channel = int(c * width_mult) + for i in range(n): + if i == 0: +- self.features.append(block(input_channel, output_channel, stride, dilation, t, BatchNorm)) ++ features.append(block(input_channel, output_channel, stride, dilation, t, BatchNorm)) ++ # self.features.append(block(input_channel, output_channel, stride, dilation, t, BatchNorm)) + else: +- self.features.append(block(input_channel, output_channel, 1, dilation, t, BatchNorm)) ++ features.append(block(input_channel, output_channel, 1, dilation, t, BatchNorm)) ++ # self.features.append(block(input_channel, output_channel, 1, dilation, t, BatchNorm)) ++ + input_channel = output_channel +- self.features = nn.Sequential(*self.features) ++ # self.features = nn.Sequential(*self.features) + self._initialize_weights() + ++ ++ # self.low_level_features = self.features[0:4] ++ # self.high_level_features = self.features[4:] + if pretrained: + self._load_pretrained_model() ++ self.low_level_features = nn.Sequential(*features[0:4]) + +- self.low_level_features = self.features[0:4] +- self.high_level_features = self.features[4:] ++ self.high_level_features = nn.Sequential(*features[4:]) + + def forward(self, x): + low_level_feat = self.low_level_features(x) +@@ -141,8 +152,8 @@ class MobileNetV2(nn.Module): + m.bias.data.zero_() + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) +- m.bias.data.zero_() +- ++ m.bias.data.zero_() ++ + if __name__ == "__main__": + input = torch.rand(1, 3, 512, 512) + model = MobileNetV2(output_stride=16, BatchNorm=nn.BatchNorm2d) +diff --git a/modeling/decoder.py b/modeling/decoder.py +index 5ed41d0..ec4485e 100644 +--- a/modeling/decoder.py ++++ b/modeling/decoder.py +@@ -36,7 +36,7 @@ class Decoder(nn.Module): + low_level_feat = self.bn1(low_level_feat) + low_level_feat = self.relu(low_level_feat) + +- x = F.interpolate(x, size=low_level_feat.size()[2:], mode='bilinear', align_corners=True) ++ x = F.interpolate(x, size=low_level_feat.size()[2:], mode='nearest', align_corners=None) + x = torch.cat((x, low_level_feat), dim=1) + x = self.last_conv(x) + +diff --git a/modeling/deeplab.py b/modeling/deeplab.py +index 91907f8..8308934 100644 +--- a/modeling/deeplab.py ++++ b/modeling/deeplab.py +@@ -28,7 +28,7 @@ class DeepLab(nn.Module): + x, low_level_feat = self.backbone(input) + x = self.aspp(x) + x = self.decoder(x, low_level_feat) +- x = F.interpolate(x, size=input.size()[2:], mode='bilinear', align_corners=True) ++ x = F.interpolate(x, size=input.size()[2:], mode='nearest', align_corners=None) + + return x + diff --git a/zoo_torch/examples/srgan_quanteval.py b/zoo_torch/examples/srgan_quanteval.py new file mode 100644 index 0000000..929e752 --- /dev/null +++ b/zoo_torch/examples/srgan_quanteval.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +""" +This script applies and evaluates a pre-trained srgan model taken from +https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan. +Metrics for evaluation are based on y-channel by default. This model is quantization- +friendly so no post-training methods or QAT were applied. For instructions please refer +to zoo_torch/Docs/SRGAN.md +""" + +import os +import argparse +from functools import partial +from collections import OrderedDict + +import torch +import numpy as np +from aimet_torch import quantsim + +import codes.options.options as option +import codes.utils.util as util +from codes.data.util import bgr2ycbcr +from codes.data import create_dataset, create_dataloader +from codes.models import create_model + + +def evaluate_generator(generator, + test_loader, + options, + mode='y_channel', + output_dir=None): + ''' + :param generator: an srgan model`s generator part, must be an nn.module + :param test_loader: a pytorch dataloader + :param options: a dictionary which contains options for dataloader + :param mode: a string indicating on which space to evalute the PSNR & SSIM metrics. + Accepted values are ['y_channel', 'rgb'] + :param output_dir: If specified, super resolved images will be saved under the path + :return: a tuple containing the computed values of (PSNR, SSIME) sequences + ''' + if mode == 'rgb': + print('Testing on RGB channels...') + elif mode == 'y_channel': + print('Testing on Y channel...') + else: + raise ValueError('evaluation mode not supported!' + 'Must be one of `RGB` or `y_channel`') + + device = torch.device('cuda' if options['gpu_ids'] is not None else 'cpu') + + psnr_values = [] + ssim_values = [] + + for data in test_loader: + need_GT = False if test_loader.dataset.opt['dataroot_GT'] is None else True + var_L = data['LQ'].to(device) + if need_GT: + real_H = data['GT'].to(device) + img_path = data['GT_path'][0] if need_GT else data['LQ_path'][0] + img_name = os.path.splitext(os.path.basename(img_path))[0] + + generator.eval() + with torch.no_grad(): + fake_H = generator(var_L) + generator.train() + + out_dict = OrderedDict() + out_dict['LQ'] = var_L.detach()[0].float().cpu() + out_dict['rlt'] = fake_H.detach()[0].float().cpu() + if need_GT: + out_dict['GT'] = real_H.detach()[0].float().cpu() + visuals = out_dict + + sr_img = util.tensor2img(visuals['rlt']) # uint8 + + # save images if output_dir specified + if output_dir: + save_img_path = os.path.join(output_dir, img_name + '.png') + util.save_img(sr_img, save_img_path) + + + # calculate PSNR and SSIM + if need_GT: + gt_img = util.tensor2img(visuals['GT']) + sr_img, gt_img = util.crop_border([sr_img, gt_img], options ['scale']) + + if mode == 'rgb': + psnr = util.calculate_psnr(sr_img, gt_img) + ssim = util.calculate_ssim(sr_img, gt_img) + psnr_values.append(psnr) + ssim_values.append(ssim) + + if mode == 'y_channel' and gt_img.shape[2] == 3: # RGB image + sr_img_y = bgr2ycbcr(sr_img / 255., only_y=True) + gt_img_y = bgr2ycbcr(gt_img / 255., only_y=True) + + psnr = util.calculate_psnr(sr_img_y * 255, gt_img_y * 255) + ssim = util.calculate_ssim(sr_img_y * 255, gt_img_y * 255) + psnr_values.append(psnr) + ssim_values.append(ssim) + + return psnr_values, ssim_values + + +def parse_args(): + parser = argparse.ArgumentParser(prog='srgan_quanteval', + description='Evaluate the pre and post quantized SRGAN model') + + parser.add_argument('--options-file', + '-opt', + help='The location where the yaml file is saved', + required=True, + type=str) + parser.add_argument('--quant-scheme', + '-qs', + help='Support two schemes for quantization: [`tf` or `tf_enhanced`],' + '`tf_enhanced` is used by default', + default='tf_enhanced', + choices=['tf', 'tf_enhanced'], + type=str) + parser.add_argument('--default-output-bw', + '-bout', + help='Default bitwidth (4-31) to use for quantizing layer inputs and outputs', + default=8, + choices=range(4, 32), + type=int) + parser.add_argument('--default-param-bw', + '-bparam', + help='Default bitwidth (4-31) to use for quantizing layer parameters', + default=8, + choices=range(4, 32), + type=int) + parser.add_argument('--output-dir', + '-outdir', + help='If specified, output images of quantized model ' + 'will be saved under this directory', + default=None, + type=str) + + return parser.parse_args() + + +def main(args): + # parse the options file + print(f'Parsing file {args.options_file}...') + opt = option.parse(args.options_file, is_train=False) + opt = option.dict_to_nonedict(opt) + + print('Loading test images...') + test_loaders = [] + for phase, dataset_opt in sorted(opt['datasets'].items()): + test_set = create_dataset(dataset_opt) + test_loader = create_dataloader(test_set, dataset_opt) + test_loaders.append(test_loader) + + model = create_model(opt) + generator = model.netG.module + + for test_loader in test_loaders: + test_set_name = test_loader.dataset.opt['name'] + print(f'Testing on dataset {test_set_name}') + psnr_vals, ssim_vals = evaluate_generator(generator, test_loader, opt) + psnr_val = np.mean(psnr_vals) + ssim_val = np.mean(ssim_vals) + print(f'Mean PSNR and SSIM for {test_set_name} on original model are: [{psnr_val}, {ssim_val}]') + + # The input shape is chosen arbitrarily to generate dummy input for creating quantsim object + input_shapes = (1, 3, 24, 24) + sim = quantsim.QuantizationSimModel(generator, + input_shapes=input_shapes, + quant_scheme=args.quant_scheme, + default_output_bw=args.default_output_bw, + default_param_bw=args.default_param_bw) + + evaluate_func = partial(evaluate_generator, options=opt) + sim.compute_encodings(evaluate_func, test_loaders[0]) + + for test_loader in test_loaders: + test_set_name = test_loader.dataset.opt['name'] + print(f'Testing on dataset {test_set_name}') + psnr_vals, ssim_vals = evaluate_generator(sim.model, test_loader, opt, output_dir=args.output_dir) + psnr_val = np.mean(psnr_vals) + ssim_val = np.mean(ssim_vals) + print(f'Mean PSNR and SSIM for {test_set_name} on quantized model are: [{psnr_val}, {ssim_val}]') + + +if __name__ == '__main__': + args = parse_args() + main(args) \ No newline at end of file diff --git a/zoo_torch/examples/ssd_utils.py b/zoo_torch/examples/ssd_utils.py new file mode 100755 index 0000000..8552327 --- /dev/null +++ b/zoo_torch/examples/ssd_utils.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3.6 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2020 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +''' AIMET Quantsim helper functions ''' +''' Calibration wrapper functions for range estimation ''' + +from tqdm import tqdm +from torch.utils.data import Dataset +from vision.ssd.data_preprocessing import PredictionTransform +from torch.utils.data import DataLoader +import torch +import random +import numpy as np + +class VoCdataset(Dataset): + def __init__(self, data_dict): + """ + Args: + txt_file (string): Path to text file with location of images, label in img name + """ + self.data = data_dict + + def __len__(self): + return len(self.data.ids) + + def __getitem__(self, idx): + image = self.data.get_image(idx) + label = self.data.get_annotation(idx) + return image, label + +def work_init(work_id): + seed = torch.initial_seed() % 2**32 + random.seed(seed + work_id) + np.random.seed(seed + work_id) + +def model_eval(args, predictor, dataset): + import copy + aimet_dataset=copy.deepcopy(dataset) + aimet_dataset.ids=aimet_dataset.ids[:1000] + calib_dataset = VoCdataset(aimet_dataset) + data_loader_kwargs = { 'worker_init_fn':work_init, 'num_workers' : 0} + batch_size = 1 + calib_dataloader = DataLoader(calib_dataset, batch_size, shuffle = False, pin_memory = True, **data_loader_kwargs) + calib = tqdm(calib_dataloader) + def func_quant(model, iterations, use_cuda = True): + for i, sampels in enumerate(calib): + image = sampels[0] + image = predictor.transform(image.squeeze(0).numpy()) + image = image.unsqueeze(0).cuda() + model(image) + return func_quant + +def get_simulations(model, args): + from aimet_common.defs import QuantScheme + from aimet_torch.pro.quantsim import QuantizationSimModel + if hasattr(args, 'quant_scheme'): + if args.quant_scheme == 'range_learning_tf': + quant_scheme = QuantScheme.training_range_learning_with_tf_init + elif args.quant_scheme == 'range_learning_tfe': + quant_scheme = QuantScheme.training_range_learning_with_tf_enhanced_init + elif args.quant_scheme == 'tf': + quant_scheme = QuantScheme.post_training_tf + elif args.quant_scheme == 'tf_enhanced': + quant_scheme = QuantScheme.post_training_tf_enhanced + else: + raise ValueError("Got unrecognized quant_scheme: " + args.quant_scheme) + kwargs = { + 'quant_scheme': quant_scheme, + 'default_param_bw': args.default_param_bw, + 'default_output_bw': args.default_output_bw, + 'config_file': args.config_file + } + print(kwargs) + sim = QuantizationSimModel(model.cpu(), input_shapes=args.input_shape, **kwargs) + return sim \ No newline at end of file diff --git a/zoo_torch/examples/torch_ssd_eval.patch b/zoo_torch/examples/torch_ssd_eval.patch new file mode 100644 index 0000000..8baed03 --- /dev/null +++ b/zoo_torch/examples/torch_ssd_eval.patch @@ -0,0 +1,123 @@ +diff --git a/eval_ssd.py b/eval_ssd.py +index 5923915..e09c7e4 100644 +--- a/eval_ssd.py ++++ b/eval_ssd.py +@@ -1,3 +1,8 @@ ++#!/usr/bin/env python3.6 ++ ++''' AIMET QuantSim script on MobileNetV2-SSD Lite ''' ++''' Currently We apply QuantSIm on Batch Norm folded model ''' ++ + import torch + from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor + from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor +@@ -141,7 +146,7 @@ if __name__ == '__main__': + elif args.net == 'sq-ssd-lite': + net = create_squeezenet_ssd_lite(len(class_names), is_test=True) + elif args.net == 'mb2-ssd-lite': +- net = create_mobilenetv2_ssd_lite(len(class_names), width_mult=args.mb2_width_mult, is_test=True) ++ net = torch.load(args.trained_model) + elif args.net == 'mb3-large-ssd-lite': + net = create_mobilenetv3_large_ssd_lite(len(class_names), is_test=True) + elif args.net == 'mb3-small-ssd-lite': +@@ -150,10 +155,20 @@ if __name__ == '__main__': + logging.fatal("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.") + parser.print_help(sys.stderr) + sys.exit(1) ++ net.eval() ++ # from IPython import embed; embed() ++ args.input_shape = (1, 3, 300, 300) ++ args.quant_scheme = "tf_enhanced" ++ args.config_file = None ++ args.default_param_bw = 8 ++ args.default_output_bw = 8 + + timer.start("Load Model") +- net.load(args.trained_model) + net = net.to(DEVICE) ++ ++ from ssd_utils import model_eval, get_simulations ++ sim = get_simulations(net, args) ++ + print(f'It took {timer.end("Load Model")} seconds to load the model.') + if args.net == 'vgg16-ssd': + predictor = create_vgg_ssd_predictor(net, nms_method=args.nms_method, device=DEVICE) +@@ -164,12 +179,15 @@ if __name__ == '__main__': + elif args.net == 'sq-ssd-lite': + predictor = create_squeezenet_ssd_lite_predictor(net,nms_method=args.nms_method, device=DEVICE) + elif args.net == 'mb2-ssd-lite' or args.net == "mb3-large-ssd-lite" or args.net == "mb3-small-ssd-lite": +- predictor = create_mobilenetv2_ssd_lite_predictor(net, nms_method=args.nms_method, device=DEVICE) ++ predictor = create_mobilenetv2_ssd_lite_predictor(sim.model, nms_method=args.nms_method, device=DEVICE) + else: + logging.fatal("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.") + parser.print_help(sys.stderr) + sys.exit(1) + ++ eval_func = model_eval(args, predictor, dataset) ++ sim.compute_encodings(eval_func, (sim.model, 3000, True)) ++ + results = [] + for i in range(len(dataset)): + print("process image", i) +diff --git a/vision/ssd/ssd.py b/vision/ssd/ssd.py +index 962b9a2..d5e6676 100644 +--- a/vision/ssd/ssd.py ++++ b/vision/ssd/ssd.py +@@ -24,7 +24,24 @@ class SSD(nn.Module): + self.classification_headers = classification_headers + self.regression_headers = regression_headers + self.is_test = is_test +- self.config = config ++ #self.config = config ++ ++ self.image_size = 300 ++ self.image_mean = np.array([127, 127, 127]) # RGB layout ++ self.image_std = 128.0 ++ self.iou_threshold = 0.45 ++ self.center_variance = 0.1 ++ self.size_variance = 0.2 ++ ++ self.specs = [box_utils.SSDSpec(19, 16, box_utils.SSDBoxSizes(60, 105), [2, 3]), ++ box_utils.SSDSpec(10, 32, box_utils.SSDBoxSizes(105, 150), [2, 3]), ++ box_utils.SSDSpec(5, 64, box_utils.SSDBoxSizes(150, 195), [2, 3]), ++ box_utils.SSDSpec(3, 100, box_utils.SSDBoxSizes(195, 240), [2, 3]), ++ box_utils.SSDSpec(2, 150, box_utils.SSDBoxSizes(240, 285), [2, 3]), ++ box_utils.SSDSpec(1, 300, box_utils.SSDBoxSizes(285, 330), [2, 3])] ++ ++ ++ self.gen_priors = box_utils.generate_ssd_priors(self.specs, self.image_size) + + # register layers in source_layer_indexes by adding them to a module list + self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes +@@ -34,8 +51,9 @@ class SSD(nn.Module): + else: + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + if is_test: +- self.config = config +- self.priors = config.priors.to(self.device) ++ #self.config = config ++ #self.priors = config.priors.to(self.device) ++ self.priors = self.gen_priors.to(self.device) + + def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + confidences = [] +@@ -90,7 +108,7 @@ class SSD(nn.Module): + if self.is_test: + confidences = F.softmax(confidences, dim=2) + boxes = box_utils.convert_locations_to_boxes( +- locations, self.priors, self.config.center_variance, self.config.size_variance ++ locations.cpu(), self.priors.cpu(), self.center_variance, self.size_variance + ) + boxes = box_utils.center_form_to_corner_form(boxes) + return confidences, boxes +@@ -109,7 +127,9 @@ class SSD(nn.Module): + return confidence, location + + def init_from_base_net(self, model): +- self.base_net.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage), strict=True) ++ state_dict = torch.load(model, map_location=lambda storage, loc: storage) ++ state_dict = {k[9:]: v for k, v in state_dict.items() if k.startswith('features')} ++ self.base_net.load_state_dict(state_dict, strict=True) + self.source_layer_add_ons.apply(_xavier_init_) + self.extras.apply(_xavier_init_) + self.classification_headers.apply(_xavier_init_)