diff --git a/TrainingExtensions/torch/src/python/aimet_torch/adaround/activation_sampler.py b/TrainingExtensions/torch/src/python/aimet_torch/adaround/activation_sampler.py index a19b4d70a5b..484eb61be21 100644 --- a/TrainingExtensions/torch/src/python/aimet_torch/adaround/activation_sampler.py +++ b/TrainingExtensions/torch/src/python/aimet_torch/adaround/activation_sampler.py @@ -38,18 +38,138 @@ """ Sample input to quantized wrapper module and output from original module for Adaround feature """ -from typing import Tuple, Union, List, Callable, Any +from typing import Tuple, Union, List, Callable, Any, Dict import torch from torch.utils.data import Dataset # Import AIMET specific modules from aimet_common.utils import AimetLogger -from aimet_torch.utils import ModuleData +from aimet_torch.utils import CachedDataset, ModuleData, get_named_module, cache_intermediate_datasets,\ + change_tensor_device_placement, in_eval_mode, save_to_cache from aimet_torch.qc_quantize_op import QcQuantizeWrapper +from aimet_torch.quantsim import QuantizationSimModel logger = AimetLogger.get_area_logger(AimetLogger.LogAreas.Quant) +def create_modulelist_for_group_modules(model: torch.nn.Module, sim: QuantizationSimModel, grouped_modules: Dict)\ + -> Tuple[List[torch.nn.ModuleList], List[torch.nn.ModuleList]]: + """ + Use torch.nn.ModuleList to group modules from a single block. + + :param model: FP32 model + :param sim: QuantizationSimModel object + :param grouped_modules: Group modules + :return: List of modulelist for FP32 and quant models + """ + sub_fp_models = [] + sub_sim_models = [] + for _, modules in grouped_modules.items(): + fp_modulelist = torch.nn.ModuleList() + quant_modulelist = torch.nn.ModuleList() + for name in modules: + fp_modulelist.append(get_named_module(model, name)) + quant_modulelist.append(get_named_module(sim.model, name)) + sub_fp_models.append(fp_modulelist) + sub_sim_models.append(quant_modulelist) + + return sub_fp_models, sub_sim_models + + +def get_block_inputs(model: torch.nn.Module, sim: QuantizationSimModel, + breakpoint_module_name: str, cached_dataset: CachedDataset, + cache_on_cpu: bool, forward_fn: Callable, num_batches: int, working_dir: str)\ + -> Union[Tuple[List, List], Tuple[CachedDataset, CachedDataset]]: + """ + Get inputs to block/module from FP32 and QuantizationSimModel models + + :param model: FP32 model + :param sim: QuantizationSimModel object + :param breakpoint_module_name: Breakpoint block/module name + :param cached_dataset: Cached dataset + :param cache_on_cpu: Whether to cache intermediate data on CPU or store to disk + :param forward_fn: adapter function that performs forward pass given a model and inputs + yielded from the data loader. The function expects model as first argument and inputs to model + as second argument. + :param num_batches: Number of batches + :param working_dir: Working to directory to save block inputs data to disk + :return: Inputs to block from FP32 and QuantizationSimModel models + """ + # Cache input data to first block from both FP32 and quant models + if cache_on_cpu: + cached_fp_dataset = cache_intermediate_datasets(cached_dataset, cache_on_cpu, model, + breakpoint_module_name, forward_fn) + cached_quant_dataset = cache_intermediate_datasets(cached_dataset, cache_on_cpu, + sim.model, breakpoint_module_name, forward_fn) + else: + fp32_cache_path = working_dir + 'fp32/' + quant_cache_path = working_dir + 'quant/' + cache_intermediate_datasets(cached_dataset, cache_on_cpu, model, breakpoint_module_name, + forward_fn, fp32_cache_path) + cache_intermediate_datasets(cached_dataset, cache_on_cpu, sim.model, breakpoint_module_name, + forward_fn, quant_cache_path) + cached_fp_dataset = CachedDataset(None, num_batches, fp32_cache_path) + cached_quant_dataset = CachedDataset(None, num_batches, quant_cache_path) + return cached_fp_dataset, cached_quant_dataset + + +def get_block_outputs(fp_block: torch.nn.ModuleList, quant_block: torch.nn.ModuleList, include_static_inputs: str, + cached_fp_dataset: List, cached_quant_dataset: List, + cache_on_cpu: bool, forward_fn: Callable, device: torch.device, working_dir: str): + """ + Get outputs from block/module from FP32 and QuantizationSimModel models and assign for next block/module. + + NOTE: "static_inputs" (like attention_mask, position_ids) remains the same across different blocks. + So, if "include_static_inputs" is set to True, then such inputs are reused. + + :param fp_block: ModuleList for fp32 modules + :param quant_block: ModuleList for quant modules + :param include_static_inputs: Flag to include "static_inputs" or not + :param cached_fp_dataset: Cached dataset for fp32 model + :param cached_quant_dataset: Cached dataset for quant model + :param cache_on_cpu: Whether to cache intermediate data on CPU or store to disk + :param forward_fn: Optional adapter function that performs forward pass given a model and inputs + yielded from the data loader. The function expects model as first argument and inputs to model as second argument. + :param device: torch device + :param working_dir: Working to directory to save block inputs data to disk + """ + # pylint: disable=too-many-locals, too-many-arguments + fp_block.to(device) + quant_block.to(device) + + fp_iterator = iter(cached_fp_dataset) + quant_iterator = iter(cached_quant_dataset) + for idx in range(len(cached_fp_dataset)): # pylint: disable=consider-using-enumerate + fp_inputs = change_tensor_device_placement(next(fp_iterator), device) + quant_inputs = change_tensor_device_placement(next(quant_iterator), device) + + with in_eval_mode(fp_block), in_eval_mode(quant_block), torch.no_grad(): + fp_outputs = forward_fn(fp_block, fp_inputs) + fp_outputs = fp_outputs[0].cpu() if isinstance(fp_outputs, (tuple, list)) else fp_outputs.cpu() + quant_outputs = forward_fn(quant_block, quant_inputs) + quant_outputs = quant_outputs[0].cpu() if isinstance(quant_outputs, (tuple, list)) else quant_outputs.cpu() + + # Check if the next ModuleList needs static inputs or not and assign + # the outputs (fp32/quant) from current block to be the input (fp32/quant) of next block + if include_static_inputs == "True": + fp_inputs[0], quant_inputs[0] = fp_outputs, quant_outputs + else: + fp_inputs, quant_inputs = [fp_outputs], [quant_outputs] + + # Cache the outputs on CPU or disk + if cache_on_cpu: + cached_fp_dataset[idx] = fp_inputs + cached_quant_dataset[idx] = quant_inputs + else: + fp32_cache_path = working_dir + 'fp32/' + quant_cache_path = working_dir + 'quant/' + save_to_cache(fp_inputs, fp32_cache_path, idx) + save_to_cache(quant_inputs, quant_cache_path, idx) + + fp_block.cpu() + quant_block.cpu() + + class ActivationSampler: """ For a module in the original model and the corresponding module in the weight quantized QuantSim model, @@ -73,7 +193,8 @@ def __init__(self, orig_module: torch.nn.Module, quant_module: QcQuantizeWrapper self._orig_module_collector = ModuleData(orig_model, orig_module, forward_fn) self._quant_module_collector = ModuleData(quant_model, quant_module, forward_fn) - def sample_and_place_all_acts_on_cpu(self, cached_dataset: Dataset, cached_quant_dataset: Dataset = None) -> Tuple[torch.Tensor, torch.Tensor]: + def sample_and_place_all_acts_on_cpu(self, cached_dataset: Dataset, + cached_quant_dataset: Dataset = None) -> Tuple[torch.Tensor, torch.Tensor]: """ From the original module, collect output activations and input activations to corresponding quantized module. diff --git a/TrainingExtensions/torch/src/python/aimet_torch/adaround/adaround_weight.py b/TrainingExtensions/torch/src/python/aimet_torch/adaround/adaround_weight.py index 60fa0ed2640..e9ae5e80536 100644 --- a/TrainingExtensions/torch/src/python/aimet_torch/adaround/adaround_weight.py +++ b/TrainingExtensions/torch/src/python/aimet_torch/adaround/adaround_weight.py @@ -43,7 +43,7 @@ import itertools import json import shutil -from typing import Tuple, Union, Dict, List, Callable, Any +from typing import Tuple, Union, Dict, List, Callable, Any, Optional import torch from torch.utils.data import DataLoader from tqdm import tqdm @@ -61,6 +61,8 @@ from aimet_torch.adaround.adaround_tensor_quantizer import AdaroundTensorQuantizer from aimet_torch.adaround.adaround_optimizer import AdaroundOptimizer from aimet_torch.adaround.adaround_loss import AdaroundHyperParameters +from aimet_torch.adaround.activation_sampler import create_modulelist_for_group_modules, get_block_inputs,\ + get_block_outputs logger = AimetLogger.get_area_logger(AimetLogger.LogAreas.Quant) @@ -195,17 +197,22 @@ def _apply_adaround(cls, quant_sim: QuantizationSimModel, model: torch.nn.Module @classmethod def _adaround_model(cls, model: torch.nn.Module, quant_sim: QuantizationSimModel, module_act_func_pair: Dict, - params: AdaroundParameters, dummy_input: Union[torch.Tensor, Tuple], checkpoints_config: str = None): + params: AdaroundParameters, dummy_input: Union[torch.Tensor, Tuple], + checkpoints_config: str = None): """ Optimize weight rounding of every module (AdaroundSupportedModules) of model in sequential manner based on occurrence + + NOTE: When checkpoints_config file is provided, assumption is that the outputs from previous group modules (block) + should feed directly into next group modules (block) + :param model: Original fp32 model from which quant_sim was created. :param quant_sim: QuantizationSimModel object to optimize weight rounding. The activation quantizers are expected to have been disabled. :param module_act_func_pair: Dictionary of module to immediate following activation function :param params: Adaround parameters :param dummy_input: Dummy input to the model - :param checkpoints_config: Config files to split fp32/quant model by checkpoints + :param checkpoints_config: Config files to split fp32/quant model by checkpoints to speedup activations sampling """ # pylint: disable=too-many-locals, protected-access, too-many-branches, too-many-statements @@ -222,7 +229,6 @@ def _adaround_model(cls, model: torch.nn.Module, quant_sim: QuantizationSimModel num_iterations = 15000 else: num_iterations = 10000 - try: # Cache model input data to WORKING_DIR cached_dataset = utils.CachedDataset(params.data_loader, params.num_batches, WORKING_DIR) @@ -233,8 +239,28 @@ def _adaround_model(cls, model: torch.nn.Module, quant_sim: QuantizationSimModel # AdaRound must be applied to modules in the order of occurrence if checkpoints_config: + # Load the predefined json file for checkpoints info + ckpts_file = json.load(open(checkpoints_config)) + assert 'grouped_modules' in ckpts_file.keys(),\ + "Please provide a dictionary of grouped_modules in the file to define checkpoints" + assert 'include_static_inputs' in ckpts_file.keys(),\ + "Please provide a dictionary of include_static_inputs in the file to define checkpoints" + assert 'cache_on_cpu' in ckpts_file.keys(),\ + "Please define cache_on_cpu to determine whether to cache intermediate tensors on CPU" + + grouped_modules = ckpts_file['grouped_modules'] + breakpoint_module_name = ckpts_file['grouped_modules'][list(grouped_modules.keys())[0]][0] + include_static_inputs = ckpts_file['include_static_inputs'] + cache_on_cpu = ckpts_file['cache_on_cpu'] + cached_fp_dataset, cached_quant_dataset = get_block_inputs(model, quant_sim, + breakpoint_module_name, + cached_dataset, cache_on_cpu, + params.forward_fn, params.num_batches, + WORKING_DIR) # Get the device of model to latter be used to place input tensor on the same device device = utils.get_device(model) + model.cpu() + quant_sim.model.cpu() # Forward function for the ModuleList object def fwd_mod_ls(mod_ls, x): @@ -242,96 +268,25 @@ def fwd_mod_ls(mod_ls, x): x = params.forward_fn(mod, x) return x - fp32_cache_path = WORKING_DIR+'fp32/' - quant_cache_path = WORKING_DIR+'quant/' + sub_fp_models, sub_sim_models = create_modulelist_for_group_modules(model, quant_sim, grouped_modules) + for i, (fp_block, quant_sim_block, static_input) in enumerate(zip(sub_fp_models, + sub_sim_models, + include_static_inputs)): + modules = utils.get_ordered_list_of_modules(fp_block, cached_fp_dataset[0], fwd_mod_ls) + cls._run_adaround_model(modules, fp_block, quant_sim_block, + module_act_func_pair, opt_params, + fwd_mod_ls, + cached_fp_dataset, cached_quant_dataset) + + # Get the outputs from the current block and assign to be the inputs for next block + # except for the last block + if i < len(sub_fp_models) - 1: + get_block_outputs(fp_block, quant_sim_block, static_input, + cached_fp_dataset, cached_quant_dataset, cache_on_cpu, + fwd_mod_ls, device, WORKING_DIR) - # Load the predefined json file for checkpoints info - ckpts_file = json.load(open(checkpoints_config)) - assert 'grouped_modules' in ckpts_file.keys(), "Please provide a dictionary of grouped_modules in the file to define checkpoints" - assert 'include_static_inputs' in ckpts_file.keys(), "Please provide a dictionary of include_static_inputs in the file to define checkpoints" - assert 'cache_on_cpu' in ckpts_file.keys(), "Please define cache_on_cpu to determine whether to cache intermediate tensors on CPU" - - grouped_modules_dict = ckpts_file['grouped_modules'] - break_point = ckpts_file['grouped_modules'][list(grouped_modules_dict.keys())[0]][0] - include_static_inputs = ckpts_file['include_static_inputs'] - cache_on_cpu = ckpts_file['cache_on_cpu'] - - # Cache input data for both fp and quant model - if cache_on_cpu: - cached_fp_dataset = utils.cache_intermediate_datasets(cached_dataset, cache_on_cpu, model, - break_point, params.forward_fn) - cached_quant_dataset = utils.cache_intermediate_datasets(cached_dataset, cache_on_cpu, - quant_sim.model, break_point, - params.forward_fn) - else: - utils.cache_intermediate_datasets(cached_dataset, cache_on_cpu, model, break_point, - params.forward_fn, fp32_cache_path) - utils.cache_intermediate_datasets(cached_dataset, cache_on_cpu, quant_sim.model, break_point, - params.forward_fn, quant_cache_path) - cached_fp_dataset = utils.CachedDataset(None, params.num_batches, fp32_cache_path) - cached_quant_dataset = utils.CachedDataset(None, params.num_batches, quant_cache_path) - - # Place fp32/quant model to cpu to save the memory usage of GPU - model.cpu() - quant_sim.model.cpu() - - # Use torch.nn.ModuleList to group modules - sub_fp_models = [] - sub_sim_models = [] - for _, modules in grouped_modules_dict.items(): - fp_mod_ls = torch.nn.ModuleList() - quant_mod_ls = torch.nn.ModuleList() - for name in modules: - fp_mod_ls.append(utils.get_named_module(model, name)) - quant_mod_ls.append(utils.get_named_module(quant_sim.model, name)) - sub_fp_models.append(fp_mod_ls) - sub_sim_models.append(quant_mod_ls) - - for n, (fp_model, sim_model, include_static_input) in enumerate(zip(sub_fp_models, sub_sim_models, include_static_inputs)): - # Place sub fp32/quant model to the device - fp_model.to(device) - sim_model.to(device) - - modules = utils.get_ordered_list_of_modules(fp_model, cached_fp_dataset[0], fwd_mod_ls) - cls._run_adaround_model(modules, fp_model, sim_model, module_act_func_pair, opt_params, fwd_mod_ls, cached_fp_dataset, cached_quant_dataset) - - if n < len(sub_fp_models) - 1: - # Cache the outputs of current sub fp32/quant model to be the input of next sub fp32/quant model - fp_iterator = iter(cached_fp_dataset) - quant_iterator = iter(cached_quant_dataset) - # pylint: disable=consider-using-enumerate - for idx in range(len(cached_fp_dataset)): - # Place the input tensors on the same device as sub fp32/quant model - fp_data = utils.change_tensor_device_placement(next(fp_iterator), device) - quant_data = utils.change_tensor_device_placement(next(quant_iterator), device) - with utils.in_eval_mode(fp_model), utils.in_eval_mode(sim_model), torch.no_grad(): - fp_output = fwd_mod_ls(fp_model, fp_data) - fp_output = fp_output[0].cpu() if isinstance(fp_output, (tuple, list)) else fp_output.cpu() - quant_output = fwd_mod_ls(sim_model, quant_data) - quant_output = quant_output[0].cpu() if isinstance(quant_output, (tuple, list)) else quant_output.cpu() - - # Check if the next ModuleList needs static inputs or not - if include_static_input == "True": - fp_data[0] = fp_output - quant_data[0] = quant_output - else: - fp_data = [fp_output] - quant_data = [quant_output] - - # Cache the outputs on CPU or disk - if cache_on_cpu: - cached_fp_dataset[idx] = fp_data - cached_quant_dataset[idx] = quant_data - else: - utils.save_to_cache(fp_data, fp32_cache_path, idx) - utils.save_to_cache(quant_data, quant_cache_path, idx) - - # Place sub fp32/quant model to cpu - fp_model.cpu() - sim_model.cpu() # After finishing Adaround, placing the quant model back to its original device quant_sim.model.to(device) - else: modules = utils.get_ordered_list_of_modules(model, dummy_input) cls._run_adaround_model(modules, model, quant_sim.model, module_act_func_pair, opt_params, @@ -342,10 +297,14 @@ def fwd_mod_ls(mod_ls, x): shutil.rmtree(WORKING_DIR) @classmethod - def _run_adaround_model(cls, modules, model, quant_sim_model, module_act_func_pair, opt_params, forward_fn, - cached_dataset, cached_quant_dataset=None): + def _run_adaround_model(cls, modules: List, model: torch.nn.Module, quant_sim_model: torch.nn.Module, + module_act_func_pair: Dict, opt_params: AdaroundHyperParameters, forward_fn: Callable, + cached_dataset: utils.CachedDataset, + cached_quant_dataset: Optional[utils.CachedDataset] = None): """ - Iterate through all modules to find out Adaround supported modules and apply Adaround optimization to those modules + Iterate through all modules to find out Adaround supported modules and + apply Adaround optimization to those modules + :param modules: Candidate modules :param model: Original fp32 model :param quant_sim_model: QuantSim model diff --git a/TrainingExtensions/torch/src/python/aimet_torch/seq_mse.py b/TrainingExtensions/torch/src/python/aimet_torch/seq_mse.py new file mode 100644 index 00000000000..7483ef799e5 --- /dev/null +++ b/TrainingExtensions/torch/src/python/aimet_torch/seq_mse.py @@ -0,0 +1,504 @@ +# /usr/bin/env python +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +""" Sequential MSE implementation """ + +import json +import os +import tempfile +from dataclasses import dataclass +from typing import Optional, Union, Tuple, List, Callable +import torch +import torch.nn.functional as functional +from torch.utils.data import DataLoader + +from aimet_common.defs import QuantScheme +import aimet_common.libpymo as libpymo +from aimet_torch.utils import CachedDataset, get_ordered_list_of_modules, in_eval_mode, StopForwardException,\ + change_tensor_device_placement, get_device +from aimet_torch.adaround.activation_sampler import create_modulelist_for_group_modules,\ + get_block_inputs, get_block_outputs +from aimet_torch.qc_quantize_op import QcQuantizeWrapper, QcQuantizeOpMode +from aimet_torch.tensor_quantizer import TensorQuantizer, StaticGridPerTensorQuantizer, StaticGridPerChannelQuantizer +from aimet_torch.quantsim import QuantizationSimModel + +# The following modules with weights are supported +SUPPORTED_MODULES = (torch.nn.Linear, ) + + +def default_forward_fn(model, inputs): + """ + Default forward function. + :param model: pytorch model + :param inputs: model inputs + """ + if isinstance(inputs, torch.Tensor): + inputs = [inputs] + return model(*inputs) + + +@dataclass +class SeqMseParams: + """ + Sequential MSE parameters + + :param num_batches: Number of batches. + :param num_candidates: Number of candidates to perform grid search. Default 20. + :param inp_symmetry: Input symmetry. Default 'symqt'. + :param loss_fn: Loss function. Default 'mse'. + :param forward_fn: Optional adapter function that performs forward pass given a model and inputs + yielded from the data loader. The function expects model as first argument and inputs to model as second argument. + """ + num_batches: int + num_candidates: int = 20 + inp_symmetry: str = 'symqt' + loss_fn: str = 'mse' + forward_fn: Callable = default_forward_fn + + +def apply_seq_mse(model: torch.nn.Module, + sim: QuantizationSimModel, + data_loader: DataLoader, + params: SeqMseParams, + modules_to_exclude: Optional[List[torch.nn.Module]] = None, + module_classes_to_exclude: Optional[List[torch.nn.Module]] = None, + checkpoints_config: Optional[str] = None): + """ + Apply sequential MSE - find and freze optimal parameter encodings candidate + 1 Disable all input/output quantizers, param quantizers from exclusion list + 2 Find and feeze optimal parameter encodings candidate for remaining supported modules + 3 Re-enable disabled quantizers from step 1 + + NOTE: module reference(s) passed to module_to_exclude list should be from sim.model. + + :param model: Original fp32 model + :param sim: Corresponding QuantizationSimModel object + :param data_loader: Data loader + :param params: Sequential MSE parameters + :param modules_to_exclude: List of supported modules to exclude when applying Sequential MSE + :param module_classes_to_exclude: List of supported module classes to exclude when applying Sequential MSE + :param checkpoints_config: Config files to split fp32/quant model by checkpoints to speedup activations sampling + """ + # pylint: disable=protected-access + assert sim._quant_scheme == QuantScheme.post_training_tf, "Use TF quant-scheme with sequential MSE." + + # disable all input/output activation quantizers and + # parameter quantizers corresponding to modules from exclusion list + quantizers = get_quantizers_to_be_disabled(sim, modules_to_exclude, module_classes_to_exclude) + enable_disable_quantizers(quantizers, enabled=False) + + # Initialize all remaining parameters' encodings + compute_all_param_encodings(sim) + + # Find and freeze optimal parameter encodings candidate + with tempfile.TemporaryDirectory() as tempdir: + cached_dataset = CachedDataset(data_loader, params.num_batches, os.path.join(tempdir, 'cached_dataset')) + if checkpoints_config: + apply_seq_mse_using_opt_sampling(checkpoints_config, model, sim, cached_dataset, params, tempdir) + else: + dummy_input = change_tensor_device_placement(next(iter(data_loader)), get_device(model)) + fp32_modules = get_ordered_list_of_modules(model, dummy_input) + fp32_modules = [(name, module) for name, module in fp32_modules if isinstance(module, SUPPORTED_MODULES)] + run_seq_mse(fp32_modules, model, sim.model, params, params.forward_fn, + cached_dataset, None) + + # re-enable disabled quantizers + enable_disable_quantizers(quantizers, enabled=True) + + +def apply_seq_mse_using_opt_sampling(checkpoints_config: str, + model: torch.nn.Module, + sim: QuantizationSimModel, + cached_dataset: CachedDataset, + params: SeqMseParams, + tempdir: str): + """ + Apply sequential MSE using optimized sampling of intermediate data. When checkpoints_config file is provided, + intermediate activations from breakpoint are treated as model inputs for next blocks. + + NOTE: Assumption is that the outputs from the current block are fed directly to following block + and there are no funciotnal operations in-between. + + :param checkpoints_config: Config files to split fp32/quant model by checkpoints to speedup activations sampling + :param model: Original fp32 model + :param sim: Corresponding QuantizationSimModel object + :param cached_dataset: Cached dataset + :param params: Sequential MSE parameters + :param tempdir: temporary working directory + """ + # pylint: disable=too-many-locals + ckpts_file = json.load(open(checkpoints_config)) + assert 'grouped_modules' in ckpts_file.keys(), \ + "Please provide a dictionary of grouped_modules in the file to define checkpoints" + assert 'include_static_inputs' in ckpts_file.keys(), \ + "Please provide a dictionary of include_static_inputs in the file to define checkpoints" + assert 'cache_on_cpu' in ckpts_file.keys(), \ + "Please define cache_on_cpu to determine whether to cache intermediate tensors on CPU" + + grouped_modules = ckpts_file['grouped_modules'] + breakpoint_module_name = ckpts_file['grouped_modules'][list(grouped_modules.keys())[0]][0] + include_static_inputs = ckpts_file['include_static_inputs'] + cache_on_cpu = ckpts_file['cache_on_cpu'] + cached_fp_dataset, cached_quant_dataset = get_block_inputs(model, sim, + breakpoint_module_name, + cached_dataset, cache_on_cpu, + params.forward_fn, params.num_batches, + tempdir) + # Get the device of model to latter be used to place input tensor on the same device + device = get_device(model) + model.cpu() + sim.model.cpu() + + # Forward function for the ModuleList object + def fwd_fn_modulelist(modulelists, x): + for mod in modulelists: + x = mod(*x) if isinstance(x, (tuple, list)) else mod(x) + return x + + sub_fp_models, sub_sim_models = create_modulelist_for_group_modules(model, sim, grouped_modules) + for i, (fp_block, quant_sim_block, static_input) in enumerate(zip(sub_fp_models, + sub_sim_models, + include_static_inputs)): + fp32_modules = get_ordered_list_of_modules(fp_block, cached_fp_dataset[0], fwd_fn_modulelist) + fp32_modules = [(name, module) for name, module in fp32_modules if isinstance(module, SUPPORTED_MODULES)] + run_seq_mse(fp32_modules, fp_block, quant_sim_block, params, fwd_fn_modulelist, + cached_fp_dataset, cached_quant_dataset) + + # Get the outputs from the current block and assign to be the inputs for next block + # except for the last block + if i < len(sub_fp_models) - 1: + get_block_outputs(fp_block, quant_sim_block, static_input, + cached_fp_dataset, cached_quant_dataset, cache_on_cpu, + fwd_fn_modulelist, device, tempdir) + sim.model.to(device) + +def run_seq_mse(fp32_modules: List[Tuple[str, torch.nn.Module]], + model: torch.nn.Module, + quant_model: torch.nn.Module, + params: SeqMseParams, + forward_fn: Callable, + cached_fp_dataset: CachedDataset, + cached_quant_dataset: Optional[CachedDataset] = None, + ): + """ + Run Sequential MSE + + :param fp32_modules: List of FP32 candidate modules in order of occurence + :param model: FP32 model + :param quant_model: QuantizationSimModel object + :param params: Sequential MSE parameters + :param forward_fn: Optional adapter function that performs forward pass given a model and inputs + yielded from the data loader. The function expects model as first argument and inputs to model as second argument. + :param cached_fp_dataset: Cached dataset object + :param cached_quant_dataset: Cached dataset object + """ + name_to_quant_module = {} + for name, quant_module in quant_model.named_modules(): + name_to_quant_module[name] = quant_module + + if not cached_quant_dataset: + cached_quant_dataset = cached_fp_dataset + + for module_qualified_name, fp32_module in fp32_modules: + try: + quant_module = name_to_quant_module[module_qualified_name] + except KeyError: + continue + + print("Finding optimal parameter encodings candidate: ", module_qualified_name) + if params.inp_symmetry == "asym": + fp32_inp_acts = get_module_inp_acts(fp32_module, model, params, forward_fn, cached_fp_dataset) + quant_inp_acts = get_module_inp_acts(quant_module, quant_model, params, forward_fn, cached_quant_dataset) + optimize_module(quant_module, fp32_inp_acts, quant_inp_acts, params) + elif params.inp_symmetry == "symfp": + fp32_inp_acts = get_module_inp_acts(fp32_module, model, params, forward_fn, cached_fp_dataset) + optimize_module(quant_module, fp32_inp_acts, fp32_inp_acts, params) + elif params.inp_symmetry == "symqt": + quant_inp_acts = get_module_inp_acts(quant_module, quant_model, params, forward_fn, cached_quant_dataset) + optimize_module(quant_module, quant_inp_acts, quant_inp_acts, params) + else: + raise ValueError(f"Invalid inp_symmetry: {params.inp_symmetry}") + + +def get_module_inp_acts(module: torch.nn.Module, + model: torch.nn.Module, + params: SeqMseParams, + forward_fn: Callable, + cached_dataset: CachedDataset, + ) -> torch.Tensor: + """ + For given module, get inputs to the module. + + :param module: FP32/quant module + :param model: FP32/quant model + :param params: Sequential MSE parameters + :param forward_fn: Optional adapter function that performs forward pass given a model and inputs + yielded from the data loader. The function expects model as first argument and inputs to model as second argument. + :param cached_dataset: Cached dataset + :return: Concatenated inputs + """ + inp_acts = [] + def hook_fn(_, inp, __): + if isinstance(inp, tuple): + inp_acts.append(inp[0]) + raise StopForwardException + handle = module.register_forward_hook(hook_fn) + + iterator = iter(cached_dataset) + for _ in range(params.num_batches): + batch = change_tensor_device_placement(next(iterator), get_device(model)) + try: + with in_eval_mode(model), torch.no_grad(): + forward_fn(model, batch) + except StopForwardException: + pass + handle.remove() + + inp_acts = torch.stack(inp_acts) + return inp_acts + + +def get_quantizers_to_be_disabled(sim: QuantizationSimModel, + modules_to_exclude: Optional[List[torch.nn.Module]], + module_classes_to_exclude: Optional[List[torch.nn.Module]])\ + -> List[TensorQuantizer]: + """ + For given quantsim model, get all quantizers to be disabled before applying sequential MSE. + + :param sim: QuantizationSimModel object + :param modules_to_exclude: List of supported modules to exclude when applying Sequential MSE + :param module_classes_to_exclude: List of supported module classes to exclude when applying Sequential MSE + :return: List of quantizers to be disabled. + """ + # pylint: disable=protected-access + # pylint: disable=unidiomatic-typecheck + quantizers_to_be_disabled = [] + for _, quant_wrapper in sim.quant_wrappers(): + for quantizer in quant_wrapper.input_quantizers: + if quantizer.enabled: + quantizers_to_be_disabled.append(quantizer) + for quantizer in quant_wrapper.output_quantizers: + if quantizer.enabled: + quantizers_to_be_disabled.append(quantizer) + + for _, quant_wrapper in sim.quant_wrappers(): + if modules_to_exclude and quant_wrapper in modules_to_exclude: + for quantizer in quant_wrapper.param_quantizers.values(): + if quantizer.enabled: + quantizers_to_be_disabled.append(quantizer) + if module_classes_to_exclude and type(quant_wrapper._module_to_wrap) in module_classes_to_exclude: + for quantizer in quant_wrapper.param_quantizers.values(): + if quantizer.enabled: + quantizers_to_be_disabled.append(quantizer) + return quantizers_to_be_disabled + + +def enable_disable_quantizers(quantizers: List[TensorQuantizer], enabled: bool): + """ + For given list of quantizers, set (enable/disable) quantizer's 'enabled' attribute. + + :param quantizers: List of quantizers. + :param enabled: Enabled flag. + """ + for quantizer in quantizers: + quantizer.enabled = enabled + + +def compute_all_param_encodings(sim: QuantizationSimModel): + """ + Compute encodings for all parameters, needed for initializing Sequential MSE + + :param sim: Quant sim + """ + for _, quant_wrapper in sim.quant_wrappers(): + for name, quantizer in quant_wrapper.param_quantizers.items(): + quantizer.reset_encoding_stats() + quantizer.update_encoding_stats(getattr(quant_wrapper, name).data) + quantizer.compute_encoding() + + # Wrapper mode must be set to ACTIVE because the wrapper's quantize_dequantize_params() will only call + # into the param tensor quantizer's quantize_dequantize() if the mode isn't PASSTHROUGH. + quant_wrapper.set_mode(QcQuantizeOpMode.ACTIVE) + + +def get_candidates(num_candidates: int, + per_channel_max: torch.Tensor, + per_channel_min: Optional[torch.Tensor]) -> List[Tuple[torch.Tensor, torch.Tensor]]: + """ + Perform grid search. + + :param num_candidates: Number of candidates + :param per_channel_max: Per channel max values + :param per_channel_min: Per channel min values + :return: candidates + """ + candidates = [] + if per_channel_min is not None: + for cand in range(num_candidates): + cand_max = torch.tensor(per_channel_max / num_candidates * (cand + 1)) + cand_min = torch.tensor(per_channel_min / num_candidates * (cand + 1)) + candidates.append((cand_max, cand_min)) + else: + for cand in range(num_candidates): + cand_max = torch.tensor(per_channel_max / num_candidates * (cand + 1)) + cand_min = -cand_max + candidates.append((cand_max, cand_min)) + return candidates + + +def optimize_module(quant_module: QcQuantizeWrapper, + x: torch.Tensor, + xq: torch.Tensor, + params: SeqMseParams): + """ + Find and freeze optimal parameter encodings candidate for given module. + + :param quant_module: Quant module to be optimized + :param x: Inputs to module from FP32 model + :param xq: Inputs to module from QuantSim model + :param params: Sequenial MSE parameters + """ + # pylint: disable=too-many-locals + if quant_module.param_quantizers["weight"].use_symmetric_encodings: + per_channel_max = torch.max(quant_module.weight.abs(), dim=1)[0].detach() + per_channel_min = None + else: + per_channel_max = torch.max(quant_module.weight, dim=1)[0].detach() + per_channel_min = torch.min(quant_module.weight, dim=1)[0].detach() + candidates = get_candidates(params.num_candidates, per_channel_max, per_channel_min) + + total_loss = [] + for cand_max, cand_min in candidates: + compute_param_encodings(quant_module.param_quantizers['weight'], cand_min, cand_max) + w = quant_module.weight + wq = quant_module.param_quantizers['weight'].quantize_dequantize(w, libpymo.RoundingMode.ROUND_NEAREST) + loss = torch.zeros(len(cand_max), device=w.device) + with torch.no_grad(): + for batch_idx in range(params.num_batches): + xqwq, xw = compute_outputs(quant_module, x[batch_idx], xq[batch_idx], w, wq) + loss += compute_recon_loss(xqwq, xw, params) + total_loss.append(loss) + + best_indices = torch.stack(total_loss).min(0, keepdim=True)[1] + print(best_indices.squeeze(0)[:params.num_candidates]) + best_max = torch.stack([cand_max for cand_max, _ in candidates]).gather(0, best_indices)[0] + best_min = torch.stack([cand_min for _, cand_min in candidates]).gather(0, best_indices)[0] + + # Compute and freeze parameter encodings using best candidate + compute_param_encodings(quant_module.param_quantizers['weight'], best_min, best_max) + quant_module.param_quantizers['weight'].freeze_encoding() + + +def compute_param_encodings(quantizer: Union[StaticGridPerTensorQuantizer, StaticGridPerChannelQuantizer], + x_min: torch.Tensor, + x_max: torch.Tensor): + """ + Compute encodings for parameter quantizer using given x_min and x_max values. + + :param quantizer: Tensor quantizer + :param x_min: min values + :param x_max: max values + """ + tensor = torch.stack([x_min, x_max], dim=-1) + quantizer.reset_encoding_stats() + quantizer.update_encoding_stats(tensor) + quantizer.compute_encoding() + + +def compute_outputs(quant_module: QcQuantizeWrapper, + x: torch.Tensor, + xq: torch.Tensor, + w: torch.Tensor, + wq: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Compute X^W^ and XW output acitvations. + + :param quant_module: Wrapper module to be optimized + :param x: Inputs from FP32 model + :param xq: Inputs from QuantSim model + :param w: FP32 weights + :param wq: Quantized-dequantized weights + :return: xqwq, xw + """ + # pylint: disable=protected-access + module = quant_module._module_to_wrap + + if isinstance(module, torch.nn.Linear): + xqwq = functional.linear(xq, wq, module.bias) + xw = functional.linear(x, w, module.bias) + else: + raise ValueError('Unsupported module: ', module) + return xqwq, xw + + +def compute_recon_loss(xqwq: torch.Tensor, xw: torch.Tensor, params: SeqMseParams): + """ + Compute reconsturction loss + + :param xqwq: X^Q^ quantized-dequantized values + :param xw: XW FP32 values + :param params: Sequenial MSE parameters + :return: loss + """ + if params.loss_fn == "mse": + loss_fn = functional.mse_loss + elif params.loss_fn == "l1": + loss_fn = functional.l1_loss + else: + loss_fn = neg_sqnr + loss = loss_fn(xqwq, xw, reduction="none").sum((0, 1)) + return loss + + +def neg_sqnr(pred: torch.Tensor, target: torch.Tensor, eps=1e-10, reduction="none"): + """ + Loss function to minimize negative SQNR which is equivalent to maximizing SQNR. + + :param pred: X^Q^ quantized-dequantized values + :param target: XW FP32 values + :param eps: epsilon + :param reduction: unused arg + :return: Negative SQNR + """ + # pylint: disable=unused-argument + quant_error = target - pred + exp_noise = torch.mean(quant_error ** 2, (0, 1), keepdim=True) + eps + exp_signal = torch.mean(target ** 2, (0, 1), keepdim=True) + sqnr = exp_signal / exp_noise + sqnr_db = 10 * torch.log10(sqnr) + return -sqnr_db diff --git a/TrainingExtensions/torch/test/python/test_seq_mse.py b/TrainingExtensions/torch/test/python/test_seq_mse.py new file mode 100644 index 00000000000..af3791195e2 --- /dev/null +++ b/TrainingExtensions/torch/test/python/test_seq_mse.py @@ -0,0 +1,223 @@ +# /usr/bin/env python +# -*- mode: python -*- +# ============================================================================= +# @@-COPYRIGHT-START-@@ +# +# Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# SPDX-License-Identifier: BSD-3-Clause +# +# @@-COPYRIGHT-END-@@ +# ============================================================================= + +import json +import pytest +import numpy +import torch +from torch.utils.data import Dataset, DataLoader + +from aimet_torch.utils import create_fake_data_loader +from aimet_torch.quantsim import QuantizationSimModel +from aimet_torch.qc_quantize_op import StaticGridQuantWrapper, QuantScheme +from aimet_torch.seq_mse import apply_seq_mse, get_candidates, optimize_module, SeqMseParams +from models.mnist_torch_model import Net + +@pytest.fixture(scope="session") +def dummy_input(): + return torch.randn((1, 1, 28, 28)) + + +@pytest.fixture(scope="session") +def unlabeled_data_loader(dummy_input): + class MyDataset(Dataset): + def __init__(self, data): + self.data = data + + def __getitem__(self, index): + return self.data[index] + + def __len__(self): + return len(self.data) + + dataset = MyDataset([dummy_input[0, :] for _ in range(32)]) + return DataLoader(dataset) + + +def save_config_file_for_checkpoints(): + checkpoints_config = { + "grouped_modules": { + "0": ["conv1", "bn1", "relu1", "maxpool"], + "1": ["conv2", "bn2", "relu2"], + "2": ["conv3", "relu3", "avgpool"], + "3": ["conv4", "flatten", "fc"], + }, + "include_static_inputs": [ + "False", + "False", + "False", + "False" + ], + "cache_on_cpu": "False" + } + + with open('./test_checkpoints.json', 'w') as f: + json.dump(checkpoints_config, f) + + +class SplittableModel(torch.nn.Module): + """ Use this model for unit testing purposes. Expect input shape (1, 3, 32, 32) """ + def __init__(self): + super(SplittableModel, self).__init__() + self.conv1 = torch.nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=2, bias=False) + self.bn1 = torch.nn.BatchNorm2d(32) + self.relu1 = torch.nn.ReLU(inplace=True) + self.maxpool = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1) + self.conv2 = torch.nn.Conv2d(32, 16, kernel_size=2, stride=2, padding=2, bias=False) + self.bn2 = torch.nn.BatchNorm2d(16) + self.relu2 = torch.nn.ReLU(inplace=True) + self.conv3 = torch.nn.Conv2d(16, 8, kernel_size=2, stride=2, padding=2, bias=False) + self.relu3 = torch.nn.ReLU(inplace=True) + self.avgpool = torch.nn.AvgPool2d(3, stride=1) + self.conv4 = torch.nn.Conv2d(8, 4, kernel_size=2, stride=2, padding=2, bias=True) + self.flatten = torch.nn.Flatten() + self.fc = torch.nn.Linear(36, 12) + + def forward(self, *inputs): + x = self.conv1(inputs[0]) + x = self.bn1(x) + x = self.relu1(x) + x = self.maxpool(x) + x = self.conv2(x) + x = self.bn2(x) + x = self.relu2(x) + x = self.conv3(x) + x = self.relu3(x) + x = self.avgpool(x) + x = self.conv4(x) + x = self.flatten(x) + x = self.fc(x) + return x + + +class TestSeqMse: + + def test_seq_mse(self): + """ test get_candidates() """ + torch.manual_seed(0) + linear = torch.nn.Linear(2, 4) + x_max = torch.max(linear.weight.abs(), dim=1)[0] + x_min = None + candidates = get_candidates(20, x_max, x_min) + for cand_max, cand_min in candidates: + assert list(cand_max.size())[0] == linear.out_features + assert list(cand_min.size())[0] == linear.out_features + + @pytest.mark.parametrize("enable_pcq", [True, False]) + @pytest.mark.parametrize("param_bw", [2, 31]) + def test_optimize_module_linear(self, enable_pcq, param_bw): + """ test optimize module for linear """ + torch.manual_seed(0) + linear = torch.nn.Linear(64, 128) + wrapper = StaticGridQuantWrapper(linear, param_bw, 16, 'nearest', QuantScheme.post_training_tf) + wrapper.input_quantizers[0].enabled = False + wrapper.output_quantizers[0].enabled = False + if enable_pcq: + wrapper.enable_per_channel_quantization() + + xq = torch.randn(32, 4, 32, 64) + wrapper.param_quantizers['weight'].reset_encoding_stats() + wrapper.param_quantizers['weight'].update_encoding_stats(wrapper.weight.data) + wrapper.param_quantizers['weight'].compute_encoding() + before = wrapper.param_quantizers['weight'].encoding + params = SeqMseParams(num_batches=32) + optimize_module(wrapper, xq, xq, params) + after = wrapper.param_quantizers['weight'].encoding + + # If we use higher param_bw (for example 16, 31), then it should always choose larger candidates so + # before and after param encodings should be almost same. + if param_bw == 31: + if enable_pcq: + assert numpy.isclose(before[0].min, after[0].min) + assert numpy.isclose(before[0].max, after[0].max) + else: + assert numpy.isclose(before.min, after.min) + assert numpy.isclose(before.max, after.max) + else: + if enable_pcq: + assert not numpy.isclose(before[0].min, after[0].min) + assert not numpy.isclose(before[0].max, after[0].max) + else: + assert not numpy.isclose(before.min, after.min) + assert not numpy.isclose(before.max, after.max) + + @pytest.mark.cuda() + @pytest.mark.parametrize("inp_symmetry", ['asym', 'symfp', 'symqt']) + @pytest.mark.parametrize("loss_fn", ['mse', 'l1', 'aa']) + def test_apply_seq_mse(self, unlabeled_data_loader, inp_symmetry, loss_fn): + """ test apply_seq_mse end-to-end """ + torch.manual_seed(0) + model = Net().eval().cuda() + dummy_input = torch.randn(1, 1, 28, 28).cuda() + sim = QuantizationSimModel(model, dummy_input, default_param_bw=4, quant_scheme=QuantScheme.post_training_tf) + params = SeqMseParams(num_batches=2, inp_symmetry=inp_symmetry, loss_fn=loss_fn) + apply_seq_mse(model, sim, unlabeled_data_loader, params, modules_to_exclude=[sim.model.conv1]) + assert sim.model.fc1.param_quantizers['weight'].is_encoding_frozen + assert sim.model.fc2.param_quantizers['weight'].is_encoding_frozen + assert not sim.model.conv1.param_quantizers['weight'].encoding + assert sim.model.conv2.param_quantizers['weight'].encoding + + @pytest.mark.parametrize("inp_symmetry", ['asym', 'symfp', 'symqt']) + @pytest.mark.parametrize("loss_fn", ['mse', 'l1', 'aa']) + def test_seq_mse_with_and_without_checkpoints_config(self, inp_symmetry, loss_fn): + """ test apply_seq_mse end-to-end with and without checkpoints configs """ + torch.manual_seed(0) + + data_loader = create_fake_data_loader(dataset_size=2, batch_size=1, image_size=(3, 32, 32)) + model = SplittableModel().eval() + save_config_file_for_checkpoints() + dummy_input = torch.randn(1, 3, 32, 32) + sim_without = QuantizationSimModel(model, dummy_input, default_param_bw=4, + quant_scheme=QuantScheme.post_training_tf) + sim_with = QuantizationSimModel(model, dummy_input, default_param_bw=4, + quant_scheme=QuantScheme.post_training_tf) + params = SeqMseParams(num_batches=2, inp_symmetry=inp_symmetry, loss_fn=loss_fn) + + # Apply Sequential MSE without checkpoints config + apply_seq_mse(model, sim_without, data_loader, params) + without_checkpoints_enc = sim_without.model.fc.param_quantizers['weight'].encoding + + # Apply Sequential MSE with checkpoints config + apply_seq_mse(model, sim_with, data_loader, params, checkpoints_config="./test_checkpoints.json") + with_checkpoints_enc = sim_with.model.fc.param_quantizers['weight'].encoding + + # encodings should be bit-exact + assert without_checkpoints_enc.min == with_checkpoints_enc.min + assert without_checkpoints_enc.max == with_checkpoints_enc.max + assert without_checkpoints_enc.delta == with_checkpoints_enc.delta + assert without_checkpoints_enc.offset == with_checkpoints_enc.offset