From b5869cba9b8e67fd7cb601ac60cb4ed620116253 Mon Sep 17 00:00:00 2001 From: Bharath Ramaswamy Date: Wed, 16 Nov 2022 14:10:08 -0800 Subject: [PATCH] Added PyTorch FFNet model, added INT4 to several models Added the following new model: PyTorch FFNet Added INT4 quantization support to the following models: - Pytorch Classification (regnet_x_3_2gf, resnet18, resnet50) - PyTorch HRNet Posenet - PyTorch HRNet - PyTorch EfficientNet Lite0 - PyTorch DeeplabV3-MobileNetV2 Signed-off-by: Bharath Ramaswamy --- README.md | 40 ++-- zoo_tensorflow/Docs/SRGAN.md | 2 +- zoo_torch/Docs/Classification.md | 60 +++-- zoo_torch/Docs/DeepLabV3.md | 15 +- zoo_torch/Docs/EfficientNet-lite0.md | 5 +- zoo_torch/Docs/FFNet.md | 56 +++++ zoo_torch/Docs/HRNet-w48.md | 23 +- zoo_torch/Docs/Hrnet-posenet.md | 40 +++- .../classification_quanteval.py | 80 ++++--- .../image_net_data_loader.py | 211 ------------------ .../examples/deeplabv3/deeplabv3_quanteval.py | 26 +-- .../efficientnetlite0_quanteval.py | 26 +-- zoo_torch/examples/ffnet/ffnet_quanteval.py | 203 +++++++++++++++++ .../hrnet-posenet/hrnet_posenet_quanteval.py | 103 +++++---- .../examples/hrnet-w48/hrnet-w48_quanteval.py | 61 +++-- 15 files changed, 550 insertions(+), 401 deletions(-) create mode 100755 zoo_torch/Docs/FFNet.md delete mode 100644 zoo_torch/examples/classification/classification_utils/image_net_data_loader.py create mode 100755 zoo_torch/examples/ffnet/ffnet_quanteval.py diff --git a/README.md b/README.md index 0d5078c..3b6adce 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ An original FP32 source model is quantized either using post-training quantizati RetinaNet GitHub Repo Pretrained Model - See Example + See Example (COCO) mAP
FP32: 0.35
INT8: 0.349
Detailed Results RetinaNet.md 1.15 @@ -250,7 +250,7 @@ An original FP32 source model is quantized either using post-training quantizati Pytorch Torchvision Pytorch Torchvision Quantized Model - (ImageNet) Top-1 Accuracy
FP32: 69.75%
INT8: 69.54%
+ (ImageNet) Top-1 Accuracy
FP32: 69.75%
INT8: 69.54%
INT4: 69.1%
Classification.md @@ -258,7 +258,7 @@ An original FP32 source model is quantized either using post-training quantizati Pytorch Torchvision Pytorch Torchvision Quantized Model - (ImageNet) Top-1 Accuracy
FP32: 76.14%
INT8: 75.81%
+ (ImageNet) Top-1 Accuracy
FP32: 76.14%
INT8: 75.81%
INT4: 75.63%
Classification.md @@ -266,30 +266,30 @@ An original FP32 source model is quantized either using post-training quantizati Pytorch Torchvision Pytorch Torchvision Quantized Model - (ImageNet) Top-1 Accuracy
FP32: 78.36%
INT8: 78.10%
+ (ImageNet) Top-1 Accuracy
FP32: 78.36%
INT8: 78.10%
INT4: 77.70%
Classification.md EfficientNet-lite0 GitHub Repo Pretrained Model - Quantized Model - (ImageNet) Top-1 Accuracy
FP32: 75.40%
INT8: 75.36% + Quantized Model + (ImageNet) Top-1 Accuracy
FP32: 75.40%
INT8: 75.36%
INT4: 74.46% EfficientNet-lite0.md DeepLabV3+ GitHub Repo Pretrained Model - Quantized Model - (PascalVOC) mIOU
FP32: 72.91%
INT8: 72.44% + Quantized Model + (PascalVOC) mIOU
FP32: 72.91%
INT8: 72.44%
INT4: 72.18% DeepLabV3.md MobileNetV2-SSD-Lite GitHub Repo Pretrained Model - Quantized Model + Quantized Model (PascalVOC) mAP
FP32: 68.7%
INT8: 68.6% MobileNetV2-SSD-lite.md @@ -306,7 +306,7 @@ An original FP32 source model is quantized either using post-training quantizati Based on Ref. FP32 Model Quantized Model - (COCO) mAP
FP32: 0.765
INT8: 0.763
mAR
FP32: 0.793
INT8: 0.792 + (COCO) mAP
FP32: 0.765
INT8: 0.763
INT4: 0.762
mAR
FP32: 0.793
INT8: 0.792
INT4: 0.791 Hrnet-posenet.md SRGAN @@ -320,8 +320,8 @@ An original FP32 source model is quantized either using post-training quantizati DeepSpeech2 GitHub Repo Pretrained Model - See Example - (Librispeech Test Clean) WER
FP32
9.92%
INT8: 10.22% + See Example + (Librispeech Test Clean) WER
FP32: 9.92%
INT8: 10.22% DeepSpeech2.md @@ -353,7 +353,7 @@ An original FP32 source model is quantized either using post-training quantizati GitHub Repo Original model weight not available See Example - (Cityscapes) mIOU
FP32
81.04%
INT8: 80.78% + (Cityscapes) mIOU
FP32: 81.04%
INT8: 80.65%
INT4: 80.07% HRNet-w48.md @@ -361,7 +361,7 @@ An original FP32 source model is quantized either using post-training quantizati GitHub Repo Pretrained Model See Example - (Cityscapes) mIOU
FP32
77.81%
INT8: 77.17% + (Cityscapes) mIOU
FP32: 77.81%
INT8: 77.17% InverseForm.md @@ -369,9 +369,17 @@ An original FP32 source model is quantized either using post-training quantizati GitHub Repo Pretrained Model See Example - (Cityscapes) mIOU
FP32
86.31%
INT8: 86.21% + (Cityscapes) mIOU
FP32: 86.31%
INT8: 86.21% InverseForm.md + + FFNets + Github Repo + Prepared Models (5 in total) + See Example + (Cityscapes) mIOU
segmentation_ffnet78S_dBBB_mobile
FP32: 81.3% INT8: 80.7%
segmentation_ffnet54S_dBBB_mobile
FP32: 80.8% INT8: 80.1%
segmentation_ffnet40S_dBBB_mobile
FP32: 79.2% INT8: 78.9%
segmentation_ffnet78S_BCC_mobile_pre_down
FP32: 80.6% INT8: 80.4%
segmentation_ffnet122NS_CCC_mobile_pre_down
FP32: 79.3% INT8: 79.0% + FFNet.md + *[1]* Original FP32 model source @@ -479,7 +487,7 @@ All results below used a *Scaling factor (LR-to-HR upscaling) of 2x* and the *Se ### Install AIMET Before you can run the example script for a specific model, you need to install the AI Model Efficiency ToolKit (AIMET) software. Please see this [Getting Started](https://github.com/quic/aimet#getting-started) page for an overview. Then install AIMET and its dependencies using these [Installation instructions](https://github.com/quic/aimet/blob/develop/packaging/install.md). -> **NOTE:** To obtain the exact version of AIMET software that was used to test this model zoo, please install release [1.13.0](https://github.com/quic/aimet/releases/tag/1.13.0) when following the above instructions *except where specified otherwise within the individual model documentation markdown file*. +> **NOTE:** To obtain the exact version of AIMET software that was used to test this model zoo, please install release [1.22.2](https://github.com/quic/aimet/releases/tag/1.22.2) when following the above instructions *except where specified otherwise within the individual model documentation markdown file*. ### Running the scripts Download the necessary datasets and code required to run the example for the model of interest. The examples run quantized evaluation and if necessary apply AIMET techniques to improve quantized model performance. They generate the final accuracy results noted in the table above. Refer to the Docs for [TensorFlow](zoo_tensorflow/Docs) or [PyTorch](zoo_torch/Docs) folder to access the documentation and procedures for a specific model. diff --git a/zoo_tensorflow/Docs/SRGAN.md b/zoo_tensorflow/Docs/SRGAN.md index a94a4d6..7227213 100644 --- a/zoo_tensorflow/Docs/SRGAN.md +++ b/zoo_tensorflow/Docs/SRGAN.md @@ -26,7 +26,7 @@ pip install tensorflow-gpu==2.4.0 ## Model Weights - The original SRGAN model is available at: - - [krasserm](https://github.com/krasserm/super-resolution") + - [krasserm](https://github.com/krasserm/super-resolution) ## Usage ```bash diff --git a/zoo_torch/Docs/Classification.md b/zoo_torch/Docs/Classification.md index d3a9016..c49cb92 100644 --- a/zoo_torch/Docs/Classification.md +++ b/zoo_torch/Docs/Classification.md @@ -1,13 +1,9 @@ # PyTorch Classification models This document describes evaluation of optimized checkpoints for Resnet18, Resnet50 and Regnet_x_3_2gf. -## AIMET installation and setup -Please [install and setup AIMET](https://github.com/quic/aimet/blob/release-aimet-1.21/packaging/install.md) (*Torch GPU* variant) before proceeding further. - -**NOTE** -- All AIMET releases are available here: https://github.com/quic/aimet/releases -- This model has been tested using AIMET version *1.21.0* (i.e. set `release_tag="1.21.0"` in the above instructions). -- This model is compatible with the PyTorch GPU variant of AIMET (i.e. set `AIMET_VARIANT="torch_gpu"` in the above instructions). +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](https://github.com/quic/aimet/blob/release-aimet-1.22/packaging/install.md) before proceeding further. +This model was tested with the `torch_gpu` variant of AIMET 1.22.2. ## Additional Setup Dependencies ``` @@ -15,22 +11,41 @@ sudo -H pip install torchvision==0.11.2 --no-deps sudo -H chmod 777 -R /dist-packages/* ``` -## Obtaining model checkpoint, ImageNet validation dataset and calibration dataset -- [Pytorch Torchvision hub](https://pytorch.org/vision/0.11/models.html#classification) instances of Resnet18, Resnet50 and Regnet_x_3_2gf are used as refernce FP32 models. These instances are optimized using AIMET to obtain quantized optimized checkpoints. -- Optimized Resnet18, Resnet50 and Regnet_x_3_2gf checkpoint can be downloaded from the [Releases](/../../releases) page. -- ImageNet can be downloaded from here: - - http://www.image-net.org/ -- Use standard validation set of ImageNet dataset (50k images set) for evaluting performance of FP32 and quantized models. +## Obtain the Original Model for Comparison +- [Pytorch Torchvision hub](https://pytorch.org/vision/0.11/models.html#classification) instances of Resnet18, Resnet50 and Regnet_x_3_2gf are used as reference FP32 models. These instances are optimized using AIMET to obtain quantized optimized checkpoints. + +## Experiment setup +```python +export PYTHONPATH=$PYTHONPATH:/aimet-model-zoo +``` -For the quantization task, we require the model path, evaluation dataset path and calibration dataset path - which is a subset of validation dataset to be used for computing the encodings and AdaRound optimizaiton. +## Dataset +This evaluation was designed for the 2012 ImageNet Large Scale Visual Recognition Challenge (ILSVRC2012), which can be obtained from: http://www.image-net.org/ +The dataset directory is expected to have 3 subdirectories: train, valid, and test (only the valid test is used, hence if the other subdirectories are missing that is ok). +Each of the {train, valid, test} directories is then expected to have 1000 subdirectories, each containing the images from the 1000 classes present in the ILSVRC2012 dataset, such as in the example below: + +``` + train/ + ├── n01440764 + │ ├── n01440764_10026.JPEG + │ ├── n01440764_10027.JPEG + │ ├── ...... + ├── ...... + val/ + ├── n01440764 + │ ├── ILSVRC2012_val_00000293.JPEG + │ ├── ILSVRC2012_val_00002138.JPEG + │ ├── ...... + ├── ...... +``` ## Usage -- To run evaluation with QuantSim in AIMET, use the following +To run evaluation with QuantSim in AIMET, use the following ```bash cd classification python classification_quanteval.py\ --fp32-model \ - --default-param-bw \ + --default-param-bw \ --default-output-bw \ --use-cuda \ --evaluation-dataset @@ -40,6 +55,8 @@ python classification_quanteval.py --fp32-model=resnet18 --default-weight-bw=8 - ``` ## Quantization Configuration +INT8 optimization + The following configuration has been used for the above models for INT8 quantization: - Weight quantization: 8 bits, symmetric quantization - Bias parameters are not quantized @@ -48,3 +65,14 @@ The following configuration has been used for the above models for INT8 quantiza - 2000 images from the calibration dataset were used for computing encodings - TF_enhanced was used as quantization scheme - Cross layer equalization and Adaround in per channel mode has been applied for all the models to get the best INT8 optimized checkpoint + +INT4 optimization + +The following configuration has been used for the above models for INT4 quantization: +- Weight quantization: 4 bits, symmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are quantized +- 2000 images from the calibration dataset were used for computing encodings +- TF_enhanced was used as quantization scheme +- Cross layer equalization and Adaround in per channel mode has been applied for all the models to get the best INT4 optimized checkpoint diff --git a/zoo_torch/Docs/DeepLabV3.md b/zoo_torch/Docs/DeepLabV3.md index 55e66e3..33d0bd6 100644 --- a/zoo_torch/Docs/DeepLabV3.md +++ b/zoo_torch/Docs/DeepLabV3.md @@ -47,7 +47,9 @@ python deeplabv3_quanteval.py \ --batch-size ``` -## Quantization Configuration (INT8) +## Quantization Configuration +INT8 optimization +The following configuration has been used for the above model for INT8 quantization - Weight quantization: 8 bits, per tensor symmetric quantization - Bias parameters are not quantized - Activation quantization: 8 bits, asymmetric quantization @@ -55,3 +57,14 @@ python deeplabv3_quanteval.py \ - TF-Enhanced was used as quantization scheme - Cross layer equalization and Adaround has been applied on optimized checkpoint - Data Free Quantization has been performed on the optimized checkpoint + +INT4 optimization +The following configuration has been used for the above model for W4A8 quantization +- Weight quantization: 4 bits, per channel symmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are quantized +- TF-Enhanced was used as quantization scheme +- Cross layer equalization and Adaround has been applied on optimized checkpoint +- Data Free Quantization has been performed on the optimized checkpoint +- Quantization Aware Traning has been performed on the optimized checkpoint diff --git a/zoo_torch/Docs/EfficientNet-lite0.md b/zoo_torch/Docs/EfficientNet-lite0.md index 1bae044..0fde441 100644 --- a/zoo_torch/Docs/EfficientNet-lite0.md +++ b/zoo_torch/Docs/EfficientNet-lite0.md @@ -43,6 +43,7 @@ Each of the {train, valid, test} directories is then expected to have 1000 subdi To run evaluation with QuantSim in AIMET, use the following ```bash python3 efficientnetlite0_quanteval.py \ + --default-param-bw \ --dataset-path < path to validation dataset> \ --batch-size \ --use-cuda @@ -50,7 +51,7 @@ To run evaluation with QuantSim in AIMET, use the following ``` ## Quantization Configuration -- Weight quantization: 8 bits per channel symmetric quantization +- Weight quantization: 8 or 4 bits per channel symmetric quantization - Bias parameters are not quantized - Activation quantization: 8 bits, asymmetric quantization - Model inputs are quantized @@ -58,5 +59,5 @@ To run evaluation with QuantSim in AIMET, use the following - TF was used for activation quantization scheme - Batch norm folding and Adaround have been applied on optimized efficientnet-lite checkpoint - [Conv - Relu6] layers has been fused as one operation via manual configurations -- 2K Images from ImageNet validation dataset (2 images per class) are used as calibration dataset +- 4K Images from ImageNet training dataset (4 images per class) are used as calibration dataset - Standard ImageNet validation dataset are usef as evaluation dataset diff --git a/zoo_torch/Docs/FFNet.md b/zoo_torch/Docs/FFNet.md new file mode 100755 index 0000000..5e2847b --- /dev/null +++ b/zoo_torch/Docs/FFNet.md @@ -0,0 +1,56 @@ +# PyTorch-FFNet + +## Setup AI Model Efficiency Toolkit +Please [install and setup AIMET](https://github.com/quic/aimet/blob/release-aimet-1.22/packaging/install.md) before proceeding further. +This model was tested with the `torch_gpu` variant of AIMET 1.22.2. + +## Additional Dependencies +1. Install skimage as follows +``` +pip install scikit-image +``` + +## Model modifications & Experiment Setup +1. Clone the [FFNet repo](https://morpheus-gitlab.qualcomm.com/dushmeht/ffnet/-/tree/master) +``` +git clone https://github.com/Qualcomm-AI-research/FFNet +``` +2. Hardcopy the two folders below for dataloader and model evaluation imports +``` +datasets/cityscapes/dataloader +datasets/cityscapes/utils +``` +3. Add AIMET Model Zoo to the pythonpath +```bash +export PYTHONPATH=$PYTHONPATH: +``` + +## Dataset +The Cityscape Dataset can be downloaded from here: + - https://www.cityscapes-dataset.com/ + +In the `datasets/cityscapes/dataloader/base_loader.py` script, change the Cityscape dataset path to point to the path where the dataset was downloaded. + +## Model checkpoint and configuration + +- The original prepared FFNet checkpoint can be downloaded from here: + - https://github.com/quic/aimet-model-zoo/releases/tag/torch_segmentation_ffnet +- The Quantization Simulation (*Quantsim*) Configuration file can be downloaded from here: [default_config_per_channel.json](https://github.com/quic/aimet/blob/release-aimet-1.22/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json) (Please see [this page](https://quic.github.io/aimet-pages/releases/1.22.2/user_guide/quantization_configuration.html) for more information on this file). + +## Usage +To run evaluation with QuantSim in AIMET, use the following +```bash +python ffnet_quanteval.py \ + --model-name \ + --use-cuda \ + --batch-size +``` + +## Quantization Configuration (INT8) +- Weight quantization: 8 bits, per channel symmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are quantized +- TF-Enhanced was used as quantization scheme +- Cross layer equalization (CLE) has been applied on optimized checkpoint +- for low resolution models with pre_down suffix, the GaussianConv2D layer is disabled for quantization. \ No newline at end of file diff --git a/zoo_torch/Docs/HRNet-w48.md b/zoo_torch/Docs/HRNet-w48.md index f329e69..f7bb1e3 100644 --- a/zoo_torch/Docs/HRNet-w48.md +++ b/zoo_torch/Docs/HRNet-w48.md @@ -14,20 +14,21 @@ This model was tested with the `torch_gpu` variant of AIMET 1.21.0. pip install pyyaml pip install 'yacs>=0.1.5' ``` -- Loading AIMET model zoo libraries -```bash +- Loading AIMET model zoo libraries +```bash export PYTHONPATH=$PYTHONPATH: ``` ## Model checkpoints and configuration -- Downloading checkpoints and Quantization configuration file are handled through evaluation script. +- The original HRNet-W48 checkpoint can be downloaded from links provided at [HRNet pytorch-v1.1](https://github.com/HRNet/HRNet-Semantic-Segmentation/blob/pytorch-v1.1). - Optimized HRNet checkpoint can be downloaded from the [Releases](/../../releases) page. - The Quantization Simulation (*Quantsim*) Configuration file can be downloaded from here: [default_config_per_channel.json](https://github.com/quic/aimet/blob/17bcc525d6188f177837bbb789ccf55a81f6a1b5/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json) (Please see [this page](https://quic.github.io/aimet-pages/releases/1.21.0/user_guide/quantization_configuration.html) for more information on this file). +- Downloading optimized checkpoints and quantization configuration file are also handled through evaluation script. ## Dataset -- This evaluation was designed for Cityscapes dataset, which can be downloaded through registration on https://www.cityscapes-dataset.com/. +- This evaluation was designed for Cityscapes dataset, which can be downloaded through registration on https://www.cityscapes-dataset.com/. - After registration, go to https://www.cityscapes-dataset.com/downloads/ to download gtFine_trainvaltest.zip and leftImg8bit_trainvaltest.zip -- Copy leftImg8bit_trainvaltest.zip and gtFine_trainvaltest.zip to $SEG_ROOT/data/cityscapes/ ($SEG_ROOT denotes path to HRNet-Semantic-Segmentation ) +- Copy leftImg8bit_trainvaltest.zip and gtFine_trainvaltest.zip to $SEG_ROOT/data/cityscapes/ ($SEG_ROOT denotes path to HRNet-Semantic-Segmentation) NOTE! Data has to be organized in the following way: @@ -53,22 +54,22 @@ $SEG_ROOT/data │ │ └── valList.txt ``` -## Usage -To run evaluation with QuantSim in AIMET, use the following +## Usage +To run evaluation with QuantSim in AIMET, use the following ```bash python hrnet-w48_quanteval.py \ + --default-param-bw <8|4> \ --hrnet-path \ - --use-cuda < Use GPU for evaluation> + --use-cuda ``` ## Quantization Configuration -- Weight quantization: 8 bits, per channel symmetric quantization +- Weight quantization: 8 or 4 bits, per channel symmetric quantization - Bias parameters are not quantized -- Activation quantization: 8 bits, asymmetric quantization +- Activation quantization: 8 bits asymmetric quantization - Model inputs are quantized - TF_enhanced was used for weight quantization scheme - TF_enhanced was used for activation quantization scheme - Cross layer equalization and Adaround have been applied on optimized checkpoint - 2K Images from Cityscapes test dataset are used as calibration dataset - diff --git a/zoo_torch/Docs/Hrnet-posenet.md b/zoo_torch/Docs/Hrnet-posenet.md index bb30d9f..1ba3fd4 100644 --- a/zoo_torch/Docs/Hrnet-posenet.md +++ b/zoo_torch/Docs/Hrnet-posenet.md @@ -5,15 +5,9 @@ This document describes evaluation of optimized checkpoint for Hrnet-posenet Clone the AIMET Model Zoo repo into your workspace: `git clone https://github.com/quic/aimet-model-zoo.git` -## AIMET installation and setup -Install the *Torch GPU* variant of AIMET package *and* setup the environment using the instructions here: -https://github.com/quic/aimet/blob/develop/packaging/install.md - ---- -**NOTE** -- All AIMET releases are available here: https://github.com/quic/aimet/releases -- This model has been tested using AIMET version *1.21.0* (i.e. set `release_tag="1.21.0"` in the above instructions). -- This model is compatible with the PyTorch GPU variant of AIMET (i.e. set `AIMET_VARIANT="torch_gpu"` in the above instructions). +## Setup AI Model Efficiency Toolkit (AIMET) +Please [install and setup AIMET](https://github.com/quic/aimet/blob/release-aimet-1.22/packaging/install.md) before proceeding further. +This model was tested with the `torch_gpu` variant of AIMET 1.22.2. ## Additional Setup Dependencies ```bash @@ -50,18 +44,29 @@ if on_cuda: input=input.cuda() ``` -## Obtaining model checkpoint and dataset +## Model checkpoints and configuration +- Downloading checkpoints and Quantization configuration file are handled through evaluation script. - FP32 and Optimized checkpoint of HRNET-posenet can be downloaded from the [Releases](/../../releases) page. +- The Quantization Simulation (*Quantsim*) Configuration file can be downloaded from here: [default_config_per_channel.json](https://github.com/quic/aimet/blob/17bcc525d6188f177837bbb789ccf55a81f6a1b5/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json) (Please see [this page](https://quic.github.io/aimet-pages/releases/1.21.0/user_guide/quantization_configuration.html) for more information on this file). + +## Experiment setup +```python +export PYTHONPATH=$PYTHONPATH:/aimet-model-zoo +``` + +## Dataset +- This evaluation script is built to evaluate on COCO2014 validation images with person keypoints. - COCO dataset can be downloaded from here: - [COCO 2014 Val images](http://images.cocodataset.org/zips/val2014.zip) - [COCO 2014 Train/Val annotations](http://images.cocodataset.org/annotations/annotations_trainval2014.zip) +- The COCO dataset path should include coco images and annotations. It assumes a folder structure containing two subdirectories: `images/val2014` and `annotations`. Corresponding images and annotations should be put into the two subdirectories. ## Usage - To run evaluation with QuantSim in AIMET, use the following ```bash cd /zoo_torch/examples/hrnet-posenet python hrnet_posenet_quanteval.py - --default-param-bw \ + --default-param-bw \ --default-output-bw \ --use-cuda \ --evaluation-dataset @@ -71,11 +76,22 @@ python hrnet_posenet_quanteval.py --default-param-bw=8 --default-output-bw=8 --u ``` ## Quantization Configuration -The following configuration has been used for the above model for INT8 quantization +INT8 optimization +The following configuration has been used for the above model for INT8 quantization - Weight quantization: 8 bits, symmetric quantization - Bias parameters are not quantized - Activation quantization: 8 bits, asymmetric quantization - Model inputs are quantized - 320 images (10 batches) from the validation dataloader was used for compute encodings - Batchnorm folding and "TF" quantscheme in per channel mode has been applied to get the INT8 optimized checkpoint + +INT4 optimization + +The following configuration has been used for the above model for INT4 quantization +- Weight quantization: 4 bits, symmetric quantization +- Bias parameters are not quantized +- Activation quantization: 8 bits, asymmetric quantization +- Model inputs are quantized +- 320 images (10 batches) from the validation dataloader was used for compute encodings +- Batchnorm folding and "TF" quantscheme in per channel mode has been applied to get the INT4 optimized checkpoint diff --git a/zoo_torch/examples/classification/classification_quanteval.py b/zoo_torch/examples/classification/classification_quanteval.py index 95e41cf..58bef52 100644 --- a/zoo_torch/examples/classification/classification_quanteval.py +++ b/zoo_torch/examples/classification/classification_quanteval.py @@ -11,8 +11,9 @@ #General Imports import argparse +import random +import sys, os, tarfile import urllib.request -import wget #Torch related imports import torch @@ -21,13 +22,29 @@ from torch.utils.data import DataLoader from torchvision import models + #AIMET torch related imports from aimet_torch.quantsim import QuantizationSimModel -from classification_utils.image_net_data_loader import ImageNetDataLoader +from zoo_torch.examples.common.image_net_data_loader import ImageNetDataLoader +from aimet_model_zoo.zoo_torch.common.utils import get_device + + +QUANTSIM_CONFIG_URL = "https://raw.githubusercontent.com/quic/aimet/release-aimet-1.22.1/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json" +OPTIMIZED_CHECKPOINT_URL = "https://github.com/quic/aimet-model-zoo/releases/download/torchvision_classification_INT4%2F8/" +def download_weights(prefix): + # Download config file + if not os.path.exists("./default_config_per_channel.json"): + urllib.request.urlretrieve(QUANTSIM_CONFIG_URL, "default_config_per_channel.json") + # Download optimized model + if not os.path.exists(f"./{prefix}.pth"): + urllib.request.urlretrieve(f"{OPTIMIZED_CHECKPOINT_URL}/{prefix}.pth", f"{prefix}.pth") + if not os.path.exists(f"./{prefix}.encodings"): + urllib.request.urlretrieve(f"{OPTIMIZED_CHECKPOINT_URL}/{prefix}.encodings",f"{prefix}.encodings") -def get_imagenet_dataloader(image_dir, BATCH_SIZE=64): + +def get_imagenet_dataloader(image_dir, BATCH_SIZE=128): ''' Helper function to get imagenet dataloader from dataset directory ''' @@ -96,9 +113,15 @@ def forward_pass(model, dataloader): data, label = data.cuda(), label.cuda() output = model(data) - del dataloader +def seed(seed_num, args): + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + torch.manual_seed(seed_num) + if args.use_cuda: + torch.cuda.manual_seed(seed_num) + torch.cuda.manual_seed_all(seed_num) # add arguments def arguments(): @@ -108,6 +131,8 @@ def arguments(): parser.add_argument('--default-output-bw', help='output bitwidth for quantization', default=8, type=int) parser.add_argument('--use-cuda', help='Use cuda', default=True, type=bool) parser.add_argument('--evaluation-dataset', help='path to evaluation dataset',type=str, required=True) + parser.add_argument('--batch-size', help='Data batch size for a model', type = int, default=128) + parser.add_argument('--config-file', help='Data batch size for a model', type = str, default='./default_config_per_channel.json') args = parser.parse_args() return args @@ -115,39 +140,31 @@ def arguments(): def main(): #Load parameters from arguments args = arguments() - + + # Set seed value + seed(0, args) + # Get fp32 model and convert to eval mode model = getattr(torchvision.models,args.fp32_model)(pretrained=True) model.eval() - #updata use-cuda args based on availability of cuda devices - use_cuda = args.use_cuda and torch.cuda.is_available() - + #get device + device = get_device(args) + #Define prefix prefix = f'{args.fp32_model}_W{args.default_param_bw}A{args.default_output_bw}' # Download weights for optimized model and load optimized model and encodings - print('Downloading optimized model weights') - URL = f"https://github.com/quic/aimet-model-zoo/releases/download/torchvision_classification_INT4%2F8/" - wget.download(URL+f'{prefix}.pth', f'./{prefix}.pth') - wget.download(URL+f'{prefix}.encodings', f'./{prefix}.encodings') - - #Download aimet config file - URL = 'https://raw.githubusercontent.com/quic/aimet/develop/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json' - wget.download(URL,'./default.json') - - args.aimet_config_file = './default.json' - + download_weights(prefix) + optimized_model = torch.load(f"./{prefix}.pth") optimized_encodings_path = f"./{prefix}.encodings" - - if use_cuda: - model.cuda() + + model.to(device) #Print FP32 accuracy - fp32_acc = eval_func(model, args.evaluation_dataset) - print(f'FP32 accuracy: {fp32_acc:0.3f}%') + fp32_acc = eval_func(model, args.evaluation_dataset, args.batch_size) #create quantsim from checkpoint #Define dummy input for quantsim @@ -156,12 +173,12 @@ def main(): #Move Optimized model to eval mode optimized_model.eval() - if use_cuda: - optimized_model.cuda() - dummy_input = dummy_input.cuda() + + optimized_model.to(device) + dummy_input = dummy_input.to(device) #Create quantsim using appropriate weight bitwidth for quantization - sim = QuantizationSimModel(optimized_model, quant_scheme='tf_enhanced',default_param_bw=args.default_param_bw,default_output_bw=args.default_output_bw, dummy_input=dummy_input, config_file=args.aimet_config_file) + sim = QuantizationSimModel(optimized_model, quant_scheme='tf_enhanced',default_param_bw=args.default_param_bw,default_output_bw=args.default_output_bw, dummy_input=dummy_input, config_file=args.config_file) #Set and freeze optimized weight encodings sim.set_and_freeze_param_encodings(encoding_path=optimized_encodings_path) @@ -174,7 +191,12 @@ def main(): sim.compute_encodings(forward_pass, forward_pass_callback_args=encoding_dataloader.data_loader) quant_acc = eval_func(sim.model.cuda(), args.evaluation_dataset) - print(f'Quantized quantized accuracy: {quant_acc:0.3f}%') + + #Print accuracy stats + print("Evaluation Summary:") + print(f"Original Model | Accuracy on 32-bit device: {fp32_acc:.4f}") + print(f"Optimized Model | Accuracy on {args.default_param_bw}-bit device: {quant_acc:.4f}") + if __name__ == '__main__': main() diff --git a/zoo_torch/examples/classification/classification_utils/image_net_data_loader.py b/zoo_torch/examples/classification/classification_utils/image_net_data_loader.py deleted file mode 100644 index 73f7879..0000000 --- a/zoo_torch/examples/classification/classification_utils/image_net_data_loader.py +++ /dev/null @@ -1,211 +0,0 @@ -# !/usr/bin/env python -# ============================================================================= -# @@-COPYRIGHT-START-@@ -# -# Copyright (c) 2021, Qualcomm Innovation Center, Inc. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# SPDX-License-Identifier: BSD-3-Clause -# -# @@-COPYRIGHT-END-@@ -# ============================================================================= - -""" -Creates data-loader for Image-Net dataset -""" -import logging -import os - -from torchvision import transforms -from torchvision.datasets.folder import default_loader, has_file_allowed_extension -from torch.utils.data import Dataset -import torch.utils.data as torch_data - - -logger = logging.getLogger('Dataloader') - -IMG_EXTENSIONS = '.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif' - - -def make_dataset(directory: str, class_to_idx: dict, extensions: tuple, num_samples_per_class: int) -> list: - """ - Creates a dataset of images with num_samples_per_class images in each class - :param directory: The string path to the data directory. - :param class_to_idx: A dictionary mapping the name of the class to the index (label) - :param extensions: list of valid extensions to load data - :param num_samples_per_class: Number of samples to use per class. - :return: list of images containing the entire dataset. - """ - images = [] - num_classes = 0 - directory = os.path.expanduser(directory) - for class_name in sorted(class_to_idx.keys()): - class_path = os.path.join(directory, class_name) - if os.path.isdir(class_path): - class_idx = class_to_idx[class_name] - class_images = add_images_for_class(class_path, extensions, num_samples_per_class, class_idx) - images.extend(class_images) - num_classes += 1 - - logger.info("Dataset consists of %d images in %d classes", len(images), num_classes) - return images - - -def add_images_for_class(class_path: str, extensions: tuple, num_samples_per_class: int, class_idx: int) -> list: - """ - For a given class, adds num_samples_per_class images to a list. - :param class_path: The string path to the class directory. - :param extensions: List of valid extensions to load data - :param num_samples_per_class: Number of samples to use per class. - :param class_idx: numerical index of class. - :return: list of images for given class. - """ - class_images = [] - count = 0 - for file_name in os.listdir(class_path): - if num_samples_per_class and count >= num_samples_per_class: - break - if has_file_allowed_extension(file_name, extensions): - image_path = os.path.join(class_path, file_name) - item = (image_path, class_idx) - class_images.append(item) - count += 1 - - return class_images - - -class ImageFolder(Dataset): - """ - Dataset class inspired by torchvision.datasets.folder.DatasetFolder for images organized as - individual files grouped by category. - """ - - def __init__(self, root: str, transform=None, target_transform=None, - num_samples_per_class: int = None): - - """ - :param root: The path to the data directory. - :param transform: The required processing to be applied on the sample. - :param target_transform: The required processing to be applied on the target. - :param num_samples_per_class: Number of samples to use per class. - """ - Dataset.__init__(self) - classes, class_to_idx = self._find_classes(root) - self.samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS, num_samples_per_class) - if not self.samples: - raise (RuntimeError( - "Found 0 files in sub folders of: {}\nSupported extensions are: {}".format( - root, ",".join(IMG_EXTENSIONS)))) - - self.root = root - self.loader = default_loader - self.extensions = IMG_EXTENSIONS - - self.classes = classes - self.class_to_idx = class_to_idx - self.targets = [s[1] for s in self.samples] - - self.transform = transform - self.target_transform = target_transform - - self.imgs = self.samples - - @staticmethod - def _find_classes(directory: str): - classes = [d for d in os.listdir(directory) if - os.path.isdir(os.path.join(directory, d))] - classes.sort() - class_to_idx = {classes[i]: i for i in range(len(classes))} - return classes, class_to_idx - - def __getitem__(self, index: int): - path, target = self.samples[index] - sample = self.loader(path) - if self.transform is not None: - sample = self.transform(sample) - if self.target_transform is not None: - target = self.target_transform(target) - - return sample, target - - def __len__(self): - return len(self.samples) - - -class ImageNetDataLoader: - """ - For loading Validation data from the ImageNet dataset. - """ - - def __init__(self, images_dir: str, image_size: int, batch_size: int = 128, - is_training: bool = False, num_workers: int = 8, num_samples_per_class: int = None): - """ - :param images_dir: The path to the data directory - :param image_size: The length of the image - :param batch_size: The batch size to use for training and validation - :param is_training: Indicates whether to load the training or validation data - :param num_workers: Indiicates to the data loader how many sub-processes to use for data loading. - :param num_samples_per_class: Number of samples to use per class. - """ - - # For normalization, mean and std dev values are calculated per channel - # and can be found on the web. - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - self.train_transforms = transforms.Compose([ - transforms.RandomResizedCrop(image_size), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize]) - - self.val_transforms = transforms.Compose([ - transforms.Resize(image_size + 24), - transforms.CenterCrop(image_size), - transforms.ToTensor(), - normalize]) - - if is_training: - data_set = ImageFolder( - root=images_dir, transform=self.train_transforms, - num_samples_per_class=num_samples_per_class) - else: - data_set = ImageFolder( - root=images_dir, transform=self.val_transforms, - num_samples_per_class=num_samples_per_class) - - self._data_loader = torch_data.DataLoader( - data_set, batch_size=batch_size, shuffle=is_training, - num_workers=num_workers, pin_memory=True) - - @property - def data_loader(self) -> torch_data.DataLoader: - """ - Returns the data-loader - """ - return self._data_loader diff --git a/zoo_torch/examples/deeplabv3/deeplabv3_quanteval.py b/zoo_torch/examples/deeplabv3/deeplabv3_quanteval.py index 66549ed..ba12523 100644 --- a/zoo_torch/examples/deeplabv3/deeplabv3_quanteval.py +++ b/zoo_torch/examples/deeplabv3/deeplabv3_quanteval.py @@ -32,8 +32,8 @@ from aimet_torch.quantsim import QuantizationSimModel QUANTSIM_CONFIG_URL = "https://raw.githubusercontent.com/quic/aimet/release-aimet-1.22.1/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json" -OPTIMIZED_WEIGHTS_URL = "https://github.com/quic/aimet-model-zoo/releases/download/torch_dlv3_w8a8_pc/deeplabv3+w8a8_tfe_perchannel.pth" -OPTIMIZED_ENCODINGS_URL = "https://github.com/quic/aimet-model-zoo/releases/download/torch_dlv3_w8a8_pc/deeplabv3+w8a8_tfe_perchannel_param.encodings" +OPTIMIZED_WEIGHTS_URL = "https://github.com/quic/aimet-model-zoo/releases/download/torch_dlv3_w8a8_pc/deeplabv3+w8a8_tfe_perchannel.pth" #NeedToReplace +OPTIMIZED_ENCODINGS_URL = "https://github.com/quic/aimet-model-zoo/releases/download/torch_dlv3_w8a8_pc/deeplabv3+w8a8_tfe_perchannel_param.encodings" #NeedToReplace ORIGINAL_MODEL_URL = 'https://drive.google.com/uc?id=1G9mWafUAj09P4KvGSRVzIsV_U5OqFLdt' def download_weights(): @@ -42,9 +42,9 @@ def download_weights(): urllib.request.urlretrieve(QUANTSIM_CONFIG_URL, "default_config_per_channel.json") # Download optimized model - if not os.path.exists("./deeplabv3+w8a8_tfe_perchannel.pth"): + if not os.path.exists("./deeplabv3+w8a8_tfe_perchannel.pth"): #NeedToReplace urllib.request.urlretrieve(OPTIMIZED_WEIGHTS_URL, "deeplabv3+w8a8_tfe_perchannel.pth") - if not os.path.exists("./deeplabv3+w8a8_tfe_perchannel_param.encodings"): + if not os.path.exists("./deeplabv3+w8a8_tfe_perchannel_param.encodings"): #NeedToReplace urllib.request.urlretrieve(OPTIMIZED_ENCODINGS_URL,"deeplabv3+w8a8_tfe_perchannel_param.encodings") # Download original model @@ -105,8 +105,8 @@ def __init__(self, args): self.input_shape = (1, 3, 513, 513) self.crop_size = 513 self.base_size = 513 - self.checkpoint_path = './deeplabv3+w8a8_tfe_perchannel.pth' - self.encodings_path = './deeplabv3+w8a8_tfe_perchannel_param.encodings' + self.checkpoint_path = './deeplabv3+w8a8_tfe_perchannel.pth' #NeedToReplace w4a8 + self.encodings_path = './deeplabv3+w8a8_tfe_perchannel_param.encodings' #NeedToReplace self.config_file = './default_config_per_channel.json' for arg in vars(args): setattr(self, arg, getattr(args, arg)) @@ -127,7 +127,7 @@ def main(): model_orig.eval() # Load optimized model - model_optim = torch.load(config.checkpoint_path) + model_optim = DeepLab(backbone='mobilenet') model_optim = model_optim.to(device) model_optim.eval() @@ -157,19 +157,15 @@ def main(): print('Evaluating Optimized Model') sim_optim = QuantizationSimModel(model_optim, **kwargs) - sim_optim.set_and_freeze_param_encodings(encoding_path=config.encodings_path) # use AdaRound encodings for the optimized model - sim_optim.compute_encodings(eval_func, [val_loader, config, device]) - mIoU_optim_fp32 = eval_func(model_optim, [val_loader, config, device]) - del model_optim - torch.cuda.empty_cache() - mIoU_optim_int8 = eval_func(sim_optim.model, [val_loader, config, device]) + sim_optim.model = load_checkpoint(config.checkpoint_path) # load QAT model directly here + mIoU_optim_int4 = eval_func(sim_optim.model, [val_loader, config, device]) del sim_optim torch.cuda.empty_cache() print(f'Original Model | 32-bit Environment | mIoU: {mIoU_orig_fp32:.4f}') print(f'Original Model | {config.default_param_bw}-bit Environment | mIoU: {mIoU_orig_int8:.4f}') - print(f'Optimized Model | 32-bit Environment | mIoU: {mIoU_optim_fp32:.4f}') - print(f'Optimized Model | {config.default_param_bw}-bit Environment | mIoU: {mIoU_optim_int8:.4f}') + + print(f'Optimized Model | {config.default_param_bw}-bit Environment | mIoU: {mIoU_optim_int4:.4f}') if __name__ == '__main__': download_weights() diff --git a/zoo_torch/examples/efficientnetlite0/efficientnetlite0_quanteval.py b/zoo_torch/examples/efficientnetlite0/efficientnetlite0_quanteval.py index fd2d23a..b8006c3 100755 --- a/zoo_torch/examples/efficientnetlite0/efficientnetlite0_quanteval.py +++ b/zoo_torch/examples/efficientnetlite0/efficientnetlite0_quanteval.py @@ -141,24 +141,24 @@ def seed(seednum, use_cuda): torch.cuda.manual_seed(seednum) torch.cuda.manual_seed_all(seednum) -def download_weights(): - if not os.path.exists("./default_config_per_channel.json"): - url_checkpoint = 'https://raw.githubusercontent.com/quic/aimet/17bcc525d6188f177837bbb789ccf55a81f6a1b5/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json' - urllib.request.urlretrieve(url_checkpoint, "default_config_per_channel.json") - if not os.path.exists("./efficientnetlite0_w8a8_pc.encodings"): - url_encoding = "https://github.com/quic/aimet-model-zoo/releases/download/torch_effnet_lite0_w8a8_pc/efficientnetlite0_w8a8_pc.encodings" - urllib.request.urlretrieve(url_encoding, "efficientnetlite0_w8a8_pc.encodings") - if not os.path.exists("model_efficientnetlite0_w8a8_pc_checkpoint.pth"): - url_config = "https://github.com/quic/aimet-model-zoo/releases/download/torch_effnet_lite0_w8a8_pc/model_efficientnetlite0_w8a8_pc_checkpoint.pth" - urllib.request.urlretrieve(url_config, "model_efficientnetlite0_w8a8_pc_checkpoint.pth") +def download_weights(config): + if not os.path.exists(config.config_file): + url_checkpoint = "https://raw.githubusercontent.com/quic/aimet/17bcc525d6188f177837bbb789ccf55a81f6a1b5/TrainingExtensions/common/src/python/aimet_common/quantsim_config/" + config.config_file + urllib.request.urlretrieve(url_checkpoint, config.config_file) + if not os.path.exists(config.encoding): + url_encoding = "https://github.com/quic/aimet-model-zoo/releases/download/torch_effnet_lite0_w8a8_pc/" + config.encoding + urllib.request.urlretrieve(url_encoding, config.encoding) + if not os.path.exists(config.checkpoint): + url_config = "https://github.com/quic/aimet-model-zoo/releases/download/torch_effnet_lite0_w8a8_pc/" + config.checkpoint + urllib.request.urlretrieve(url_config, config.checkpoint) # adding hardcoded values into args from parseargs() and return config object class ModelConfig(): def __init__(self, args): self.seed=23 self.input_shape=(1,3,224,224) - self.checkpoint='model_efficientnetlite0_w8a8_pc_checkpoint.pth' - self.encoding='efficientnetlite0_w8a8_pc.encodings' + self.checkpoint="model_efficientnetlite0_w" + str(args.default_param_bw) + "a" + str(args.default_output_bw) + "_pc_checkpoint.pth" + self.encoding="efficientnetlite0_w" + str(args.default_param_bw) + "a" + str(args.default_output_bw) + "_pc.encodings" self.quant_scheme='tf_enhanced' self.config_file='default_config_per_channel.json' for arg in vars(args): @@ -171,7 +171,7 @@ def main(): # Adding hardcoded values to config on top of args config=ModelConfig(args) - download_weights() + download_weights(config) device=utils.get_device(args) diff --git a/zoo_torch/examples/ffnet/ffnet_quanteval.py b/zoo_torch/examples/ffnet/ffnet_quanteval.py new file mode 100755 index 0000000..4a7ab25 --- /dev/null +++ b/zoo_torch/examples/ffnet/ffnet_quanteval.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2022 of Qualcomm Innovation Center, Inc. All rights reserved. +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +''' AIMET Quantsim code for FFNet ''' + +# General Python related imports +from __future__ import absolute_import +from __future__ import division +import os +import sys +sys.path.append(os.path.dirname(sys.path[0])) +import argparse +import urllib.request +from tqdm import tqdm +from functools import partial +from zoo_torch.examples.common.utils import get_device + +# Torch related imports +import torch + +# Dataloader and Model Evaluation imports +from datasets.cityscapes.utils.misc import eval_metrics +from datasets.cityscapes.utils.trnval_utils import eval_minibatch +from datasets.cityscapes.dataloader.get_dataloaders import return_dataloader + +# AIMET related imports +from aimet_torch.model_validator.model_validator import ModelValidator +from aimet_common.defs import QuantScheme +from aimet_torch.quantsim import QuantizationSimModel + + +def download_weights(args): + # Download original model + FILE_NAME = f"prepared_{args.model_name}.pth" + ORIGINAL_MODEL_URL = f"https://github.com/quic/aimet-model-zoo/releases/download/ffnet/{FILE_NAME}" + if not os.path.exists(FILE_NAME): + urllib.request.urlretrieve(ORIGINAL_MODEL_URL, FILE_NAME) + + # Download config file + QUANTSIM_CONFIG_URL = "https://raw.githubusercontent.com/quic/aimet/release-aimet-1.22.2/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json" + if not os.path.exists("./default_config_per_channel.json"): + urllib.request.urlretrieve(QUANTSIM_CONFIG_URL, "default_config_per_channel.json") + + # Download optimized weights + FILE_NAME = f"{args.model_name}_W{args.default_param_bw}A{args.default_output_bw}_CLE_tfe_perchannel.pth" + OPTIMIZED_WEIGHTS_URL = f"https://github.com/quic/aimet-model-zoo/releases/download/ffnet/{FILE_NAME}" + if not os.path.exists(FILE_NAME): + urllib.request.urlretrieve(OPTIMIZED_WEIGHTS_URL, FILE_NAME) + + # Download optimized encodings + FILE_NAME = f"{args.model_name}_W{args.default_param_bw}A{args.default_output_bw}_CLE_tfe_perchannel.encodings" + OPTIMIZED_ENCODINGS_URL = f"https://github.com/quic/aimet-model-zoo/releases/download/ffnet/{FILE_NAME}" + if not os.path.exists(FILE_NAME): + urllib.request.urlretrieve(OPTIMIZED_ENCODINGS_URL, FILE_NAME) + + +# Set seed for reproducibility +def seed(seed_number): + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = True + torch.manual_seed(seed_number) + torch.cuda.manual_seed(seed_number) + torch.cuda.manual_seed_all(seed_number) + + +# Define evaluation func to evaluate model with data_loader +def eval_func(model, dataloader): + model.eval() + iou_acc = 0 + + for data in tqdm(dataloader, desc='evaluate'): + _iou_acc = eval_minibatch(data, model, True, 0, False, False) + iou_acc += _iou_acc + mean_iou = eval_metrics(iou_acc, model) + + return mean_iou + + +# Forward pass for compute encodings +def forward_pass(device, model, data_loader): + model = model.to(device) + model.eval() + + for data in tqdm(data_loader): + images, gt_image, edge, img_names, scale_float = data + assert isinstance(images, torch.Tensor) + assert len(images.size()) == 4 and len(gt_image.size()) == 3 + assert images.size()[2:] == gt_image.size()[1:] + batch_pixel_size = images.size(0) * images.size(2) * images.size(3) + input_size = images.size(2), images.size(3) + + with torch.no_grad(): + inputs = images + _pred = model(inputs.to(device)) + + +def arguments(): + parser = argparse.ArgumentParser(description='Evaluation script for PyTorch FFNet models.') + parser.add_argument('--model-name', help='Select the model configuration', type=str, default="segmentation_ffnet78S_dBBB_mobile", choices=[ + "segmentation_ffnet78S_dBBB_mobile", + "segmentation_ffnet54S_dBBB_mobile", + "segmentation_ffnet40S_dBBB_mobile", + "segmentation_ffnet78S_BCC_mobile_pre_down", + "segmentation_ffnet122NS_CCC_mobile_pre_down"]) + parser.add_argument('--batch-size', help='Data batch size for a model', type=int, default=8) + parser.add_argument('--default-output-bw', help='Default output bitwidth for quantization.', type=int, default=8) + parser.add_argument('--default-param-bw', help='Default parameter bitwidth for quantization.', type=int, default=8) + parser.add_argument('--use-cuda', help='Run evaluation on GPU.', type=bool, default=True) + args = parser.parse_args() + return args + + +class ModelConfig(): + def __init__(self, args): + self.input_shape = (1, 3, 1024, 2048) + self.prepared_checkpoint_path = f"prepared_{args.model_name}.pth" + self.optimized_checkpoint_path = f"{args.model_name}_W{args.default_param_bw}A{args.default_output_bw}_CLE_tfe_perchannel.pth" + self.encodings_path = f"{args.model_name}_W{args.default_param_bw}A{args.default_output_bw}_CLE_tfe_perchannel.encodings" + self.config_file = "./default_config_per_channel.json" + for arg in vars(args): + setattr(self, arg, getattr(args, arg)) + + +def main(args): + seed(1234) + config = ModelConfig(args) + device = get_device(args) + print(f'device: {device}') + + # Load original model + model_orig = torch.load(config.prepared_checkpoint_path) + model_orig = model_orig.to(device) + model_orig.eval() + + # Load optimized model + model_optim = torch.load(config.optimized_checkpoint_path) + model_optim = model_optim.to(device) + model_optim.eval() + + # Get Dataloader + data_loader_kwargs = { 'num_workers':4 } + val_loader = return_dataloader(data_loader_kwargs['num_workers'], args.batch_size) + + # Initialize Quantized model + dummy_input = torch.rand(config.input_shape, device=device) + kwargs = { + 'quant_scheme': QuantScheme.post_training_tf_enhanced, + 'default_param_bw': config.default_param_bw, + 'default_output_bw': config.default_output_bw, + 'config_file': config.config_file, + 'dummy_input': dummy_input + } + + print("Validate Original Model") + ModelValidator.validate_model(model_orig, dummy_input) + + print('Evaluating Original Model') + sim_orig = QuantizationSimModel(model_orig, **kwargs) + if "pre_down" in config.prepared_checkpoint_path: + sim_orig.model.smoothing.output_quantizer.enabled = False + sim_orig.model.smoothing.param_quantizers['weight'].enabled = False + forward_func = partial(forward_pass, device) + sim_orig.compute_encodings(forward_func, forward_pass_callback_args=val_loader) + + mIoU_orig_fp32 = eval_func(model_orig, val_loader) + del model_orig + torch.cuda.empty_cache() + mIoU_orig_int8 = eval_func(sim_orig.model, val_loader) + del sim_orig + torch.cuda.empty_cache() + + print('Evaluating Optimized Model') + sim_optim = QuantizationSimModel(model_optim, **kwargs) + if "pre_down" in config.prepared_checkpoint_path: + sim_orig.model.smoothing.output_quantizer.enabled = False + sim_orig.model.smoothing.param_quantizers['weight'].enabled = False + forward_func = partial(forward_pass, device) + sim_optim.compute_encodings(forward_func, forward_pass_callback_args=val_loader) + + mIoU_optim_fp32 = eval_func(model_optim, val_loader) + del model_optim + torch.cuda.empty_cache() + mIoU_optim_int8 = eval_func(sim_optim.model, val_loader) + del sim_optim + torch.cuda.empty_cache() + + print(f'Original Model | 32-bit Environment | mIoU: {mIoU_orig_fp32:.4f}') + print(f'Original Model | {config.default_param_bw}-bit Environment | mIoU: {mIoU_orig_int8:.4f}') + print(f'Optimized Model | 32-bit Environment | mIoU: {mIoU_optim_fp32:.4f}') + print(f'Optimized Model | {config.default_param_bw}-bit Environment | mIoU: {mIoU_optim_int8:.4f}') + + +if __name__ == '__main__': + args = arguments() + download_weights(args) + main(args) \ No newline at end of file diff --git a/zoo_torch/examples/hrnet-posenet/hrnet_posenet_quanteval.py b/zoo_torch/examples/hrnet-posenet/hrnet_posenet_quanteval.py index 10a87ba..a14ca1d 100644 --- a/zoo_torch/examples/hrnet-posenet/hrnet_posenet_quanteval.py +++ b/zoo_torch/examples/hrnet-posenet/hrnet_posenet_quanteval.py @@ -23,6 +23,10 @@ #General Imports import _init_paths import argparse +import argparse +import random +import sys, os, tarfile +import urllib.request from config import cfg from config import update_config from core.function import validate @@ -37,6 +41,25 @@ #AIMET related imports from aimet_torch.quantsim import QuantizationSimModel +from aimet_model_zoo.zoo_torch.common.utils import get_device + + + +QUANTSIM_CONFIG_URL = "https://raw.githubusercontent.com/quic/aimet/release-aimet-1.22.1/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json" +OPTIMIZED_CHECKPOINT_URL = "https://github.com/quic/aimet-model-zoo/releases/download/hrnet-posenet/" + +def download_weights(prefix): + # Download config file + if not os.path.exists("./default_config_per_channel.json"): + urllib.request.urlretrieve(QUANTSIM_CONFIG_URL, "default_config_per_channel.json") + + # Download optimized and FP32 model + if not os.path.exists(f"./{prefix}.pth"): + urllib.request.urlretrieve(f"{OPTIMIZED_CHECKPOINT_URL}/{prefix}.pth", f"{prefix}.pth") + if not os.path.exists(f"./{prefix}.encodings"): + urllib.request.urlretrieve(f"{OPTIMIZED_CHECKPOINT_URL}/{prefix}.encodings",f"{prefix}.encodings") + if not os.path.exists(f"./hrnet_posenet_FP32.pth"): + urllib.request.urlretrieve(f"{OPTIMIZED_CHECKPOINT_URL}/hrnet_posenet_FP32.pth",f"hrnet_posenet_FP32.pth") #Add Arguments def parse_args(): @@ -46,55 +69,62 @@ def parse_args(): parser.add_argument('--default-output-bw', help='output bitwidth for quantization', default=8, type=int) parser.add_argument('--use-cuda', help='Use cuda', default=True, type=bool) parser.add_argument('--evaluation-dataset', help='path to evaluation dataset',type=str, required=True) + parser.add_argument('--batch-size', help='Data batch size for a model', type = int, default=32) + parser.add_argument('--config-file', help='Data batch size for a model', type = str, default='./default_config_per_channel.json') parser.add_argument('opts',help="Modify config options using the command-line",default=None,nargs=argparse.REMAINDER) args = parser.parse_args() return args +class ModelConfig(): + def __init__(self, args): + self.modelDir = './' + self.logDir = './' + self.dataDir = './' + for arg in vars(args): + setattr(self, arg, getattr(args, arg)) + +def seed(seed_num, args): + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + torch.manual_seed(seed_num) + if args.use_cuda: + torch.cuda.manual_seed(seed_num) + torch.cuda.manual_seed_all(seed_num) + def main(): #Load parameters from arguments args = parse_args() - #Set dir args to default - args.modelDir = './' - args.logDir = './' - args.dataDir = './' + # Set seed value + seed(0, args) - update_config(cfg, args) - logger, final_output_dir, tb_log_dir = create_logger( - cfg, args.cfg, 'valid') - - # Download weights for optimized model and load optimized model and encodings - print('Downloading optimized model weights') - prefix = f'hrnet_posenet_W{args.default_param_bw}A{args.default_output_bw}' + config = ModelConfig(args) - URL = "https://github.com/quic/aimet-model-zoo/releases/download/hrnet-posenet/" + update_config(cfg, config) + logger, final_output_dir, tb_log_dir = create_logger( + cfg, config.cfg, 'valid') - wget.download(URL+f'{prefix}.pth', f'./{prefix}.pth') - wget.download(URL+f'{prefix}.encodings', f'./{prefix}.encodings') - wget.download(URL+'hrnet_posenet_FP32.pth', f'./hrnet_posenet_FP32.pth') + #Define prefix + prefix = f'hrnet_posenet_W{config.default_param_bw}A{config.default_output_bw}' - #Download aimet config file - URL = 'https://raw.githubusercontent.com/quic/aimet/develop/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json' - wget.download(URL,'./default.json') - - args.aimet_config_file = './default.json' + # Download weights for optimized model and load optimized model and encodings + download_weights(prefix) #Load FP32 model model = torch.load('./hrnet_posenet_FP32.pth') model.eval() - - #updata use-cuda args based on availability of cuda devices - use_cuda = args.use_cuda and torch.cuda.is_available() + + # get device + device = get_device(args) #Define criterion criterion = JointsMSELoss( use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT ) - if use_cuda: - criterion = criterion.cuda() + criterion.to(device) #Create validation dataloader based on dataset pre-processing @@ -102,7 +132,7 @@ def main(): mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) valid_dataset = eval('dataset.coco')( - cfg, args.evaluation_dataset, 'val2017', False, + cfg, config.evaluation_dataset, 'val2017', False, transforms.Compose([ transforms.ToTensor(), normalize, @@ -110,18 +140,16 @@ def main(): ) valid_loader = torch.utils.data.DataLoader( valid_dataset, - batch_size=32, + batch_size=config.batch_size, shuffle=False, num_workers=8, pin_memory=True ) - if use_cuda: - model = model.cuda() + model.to(device) - print(f'FP32 evaluation:') - # validate(cfg, valid_loader, valid_dataset, model) + print(f"Original Model | Accuracy on 32-bit device:") validate(cfg, valid_loader, valid_dataset, model, criterion, final_output_dir, tb_log_dir) @@ -129,8 +157,7 @@ def main(): def forward_pass(model, batch=10): with torch.no_grad(): for i, (input, target, target_weight, meta) in enumerate(valid_loader): - if use_cuda: - input = input.cuda() + input = input.to(device) output = model(input) del input @@ -144,12 +171,11 @@ def forward_pass(model, batch=10): input_shape = (1, 3, 256, 192) dummy_input = torch.randn(input_shape) - if use_cuda: - optimized_model.cuda() - dummy_input = dummy_input.cuda() + optimized_model.to(device) + dummy_input = dummy_input.to(device) #Create QUantsim object - sim = QuantizationSimModel(optimized_model, quant_scheme='tf', default_param_bw=args.default_param_bw, default_output_bw=args.default_output_bw, dummy_input=dummy_input, config_file=args.aimet_config_file) + sim = QuantizationSimModel(optimized_model, quant_scheme='tf', default_param_bw=config.default_param_bw, default_output_bw=config.default_output_bw, dummy_input=dummy_input, config_file=args.config_file) # Set and freeze optimized weight encodings sim.set_and_freeze_param_encodings(encoding_path=optimized_encodings_path) @@ -158,8 +184,7 @@ def forward_pass(model, batch=10): sim.compute_encodings(forward_pass, forward_pass_callback_args=10) #Evalaute optimized quantized checkpoint - print(f'Optimized checkpoint evaluation') - # validate(cfg, valid_loader, valid_dataset, sim.model) + print(f"Optimized Model | Accuracy on {args.default_param_bw}-bit device:") validate(cfg, valid_loader, valid_dataset, sim.model, criterion, final_output_dir, tb_log_dir) diff --git a/zoo_torch/examples/hrnet-w48/hrnet-w48_quanteval.py b/zoo_torch/examples/hrnet-w48/hrnet-w48_quanteval.py index 3174a75..fa0acce 100644 --- a/zoo_torch/examples/hrnet-w48/hrnet-w48_quanteval.py +++ b/zoo_torch/examples/hrnet-w48/hrnet-w48_quanteval.py @@ -27,28 +27,28 @@ # AIMET imports from aimet_torch.quantsim import QuantizationSimModel -# AIMET model zoo imports +# AIMET model zoo imports from zoo_torch.examples.common import utils # Get evaluation func to evaluate the model -def model_eval(args, num_samples=None): +def model_eval(args, num_samples=None): """ Load HRnet libraries and loaded dataset through HRnet libraries :param args :param num_samples number of images for computing encoding :return: wrapper function for data forward pass - """ + # =========HRNet imports================= # adding HRNet lib into path system path - if os.path.exists(args.hrnet_path): lib_path = os.path.join(args.hrnet_path, "lib") sys.path.insert(0, lib_path) else: raise ValueError('HRNet github must be cloned first') + # import from HRNet lib path import datasets from config import config @@ -94,7 +94,6 @@ def eval_func(model, use_cuda): return eval_func - # Parse command line arguments def arguments(): parser = argparse.ArgumentParser(description='Evaluation script for HRNet') @@ -114,45 +113,45 @@ def seed(seednum, use_cuda): torch.cuda.manual_seed(seednum) torch.cuda.manual_seed_all(seednum) -def download_weights(): - if not os.path.exists("./default_config_per_channel.json"): - url_checkpoint = 'https://raw.githubusercontent.com/quic/aimet/17bcc525d6188f177837bbb789ccf55a81f6a1b5/TrainingExtensions/common/src/python/aimet_common/quantsim_config/default_config_per_channel.json' - urllib.request.urlretrieve(url_checkpoint, "default_config_per_channel.json") - if not os.path.exists("./hrnet_w8a8_pc.encodings"): - url_encoding = "https://github.com/quic/aimet-model-zoo/releases/download/torch_hrnet_w8a8_pc/hrnet_w8a8_pc.encodings" - urllib.request.urlretrieve(url_encoding, "hrnet_w8a8_pc.encodings") - if not os.path.exists("./hrnet_w8a8_pc.pth"): - url_config = "https://github.com/quic/aimet-model-zoo/releases/download/torch_hrnet_w8a8_pc/hrnet_w8a8_pc.pth" - urllib.request.urlretrieve(url_config, "hrnet_w8a8_pc.pth") +def download_weights(config): + # Download config file + if not os.path.exists(config.config_file): + url = "https://raw.githubusercontent.com/quic/aimet/release-aimet-1.22.1/TrainingExtensions/common/src/python/aimet_common/quantsim_config/" + config.config_file + urllib.request.urlretrieve(url, config.config_file) + # Download optimized model + if not os.path.exists(config.prefix + ".pth"): + url = "https://github.com/quic/aimet-model-zoo/releases/download/torch_" + config.prefix + "/" + config.prefix + ".pth" + urllib.request.urlretrieve(url, config.prefix + ".pth") + if not os.path.exists(config.prefix + ".encodings"): + url = "https://github.com/quic/aimet-model-zoo/releases/download/torch_" + config.prefix + "/" + config.prefix + ".encodings" + urllib.request.urlretrieve(url, config.prefix + ".encodings") # adding hardcoded values into args from parseargs() and return config object class ModelConfig(): def __init__(self, args): - self.cfg=args.hrnet_path+'/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml' + self.cfg = args.hrnet_path+'/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml' self.opts = ['TEST.FLIP_TEST', False, 'DATASET.ROOT', args.hrnet_path + '/data/'] self.seed = 0 - self.checkpoint = "hrnet_w8a8_pc.pth" - self.encoding = "hrnet_w8a8_pc.encodings" + self.prefix = "hrnet_w" + str(args.default_param_bw) + "a" + str(args.default_output_bw) + "_pc" self.quant_scheme = "tf_enhanced" self.config_file = "default_config_per_channel.json" for arg in vars(args): setattr(self, arg, getattr(args, arg)) def main(): - args = arguments() - # Adding hardcoded values to config on top of args - config=ModelConfig(args) - - download_weights() + # Adding hardcoded values to config on top of args + config = ModelConfig(args) + + download_weights(config) device = utils.get_device(args) - + seed(config.seed, config.use_cuda) # Get quantized model by loading checkpoint - model = torch.load(config.checkpoint) + model = torch.load(config.prefix + ".pth") model.eval() model.to(device) @@ -160,11 +159,9 @@ def main(): eval_func = model_eval(config) # Quantization related variables - dummy_input = torch.randn((1, 3, 512, 1024),device=device) - + dummy_input = torch.randn((1, 3, 512, 1024), device=device) # Compute encodings and eval - sim = QuantizationSimModel(model, dummy_input=dummy_input, default_param_bw=config.default_param_bw, default_output_bw=config.default_output_bw, @@ -172,20 +169,14 @@ def main(): config_file=config.config_file) # Set and freeze encodings to use same quantization grid and then invoke compute encodings - sim.set_and_freeze_param_encodings(encoding_path=config.encoding) + sim.set_and_freeze_param_encodings(encoding_path=config.prefix + ".encodings") sim.compute_encodings(forward_pass_callback=eval_func_calibration, forward_pass_callback_args=config) # Evaluate quantized model on 8 bit device - mIoU = eval_func(sim.model, config.use_cuda) - print(f"=======Quantized Model | mIoU on {config.default_param_bw}-bit device: {mIoU:.4f}") - - - if __name__ == '__main__': main() -