From 6634e8a45b22d157bfba21d329b6bb38b5b806ca Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Tue, 28 Nov 2023 17:21:07 +0200 Subject: [PATCH 1/8] Extract quantization preparation steps from the main runner to an external "quantization prep runner": graph analyzing, statistic collection, network editor, qparam calculation, snc and stat correction. In addition, minor documentation fixes to the graph preparation runner and removed duplicated functions. --- .../core/graph_prep_runner.py | 33 +++-- .../core/quantization_prep_runner.py | 134 +++++++++++++++++ model_compression_toolkit/core/runner.py | 139 +----------------- 3 files changed, 161 insertions(+), 145 deletions(-) create mode 100644 model_compression_toolkit/core/quantization_prep_runner.py diff --git a/model_compression_toolkit/core/graph_prep_runner.py b/model_compression_toolkit/core/graph_prep_runner.py index 79782d024..4d405b727 100644 --- a/model_compression_toolkit/core/graph_prep_runner.py +++ b/model_compression_toolkit/core/graph_prep_runner.py @@ -1,4 +1,4 @@ -# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved. +# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -41,23 +41,24 @@ def graph_preparation_runner(in_model: Any, tb_w: TensorboardWriter = None, mixed_precision_enable: bool = False) -> Graph: """ - Quantize a trained model using post-training quantization. - First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding - layers). - Second, statistics (e.g. min/max, histogram, etc.) are collected for each layer's output - (and input, depends on the quantization configuration) using a given representative dataset. - Next, quantization parameters are calculated using the collected statistics - (both coefficients and activations by default). + Runs all required preparations in order to build a quantization graph from the given model, + quantization configuration and target platform specifications. + This runner include the following steps: + - Reading and building a graph from the given model. + - Setting quantization config to each relevant node in the graph. + - Apply all necessary substitutions to finalize the graph for quantization. + Args: in_model: Model to quantize. representative_data_gen: Dataset used for calibration. - core_config: CoreConfig containing parameters of how the model should be quantized + quantization_config: QuantizationConfig containing parameters of how the model should be quantized. fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, - groups of layers by how they should be quantized, etc.). + groups of layers by how they should be quantized, etc.). fw_impl: FrameworkImplementation object with a specific framework methods implementation. tpc: TargetPlatformCapabilities object that models the inference target platform and - the attached framework operator's information. + the attached framework operator's information. tb_w: TensorboardWriter object for logging + Returns: An internal graph representation of the input model. """ @@ -92,16 +93,18 @@ def get_finalized_graph(initial_graph: Graph, """ Applies all edit operation (edit, substitutions, etc.) on the model's graph, to prepare it for the quantization process. All future graph substitutions and operations that change the graph should be added to this method. + Args: initial_graph (Graph): Graph to apply the changes to. tpc (TargetPlatformCapabilities): TargetPlatformCapabilities object that describes the desired inference target platform (includes fusing patterns MCT should handle). quant_config (QuantizationConfig): QuantizationConfig containing parameters of how the model should be - quantized. + quantized. fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., - kernel channels indices, groups of layers by how they should be quantized, etc.) + kernel channels indices, groups of layers by how they should be quantized, etc.) tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc. fw_impl (FrameworkImplementation): FrameworkImplementation object with a specific framework methods implementation. - mixed_precision_enable: is mixed precision enabled. + mixed_precision_enable: is mixed precision enabled. + Returns: Graph object that represents the model, after applying all required modifications to it. """ @@ -173,6 +176,7 @@ def read_model_to_graph(in_model: Any, """ Read a model into a graph object. + Args: in_model: Model to optimize and prepare for quantization. representative_data_gen: Dataset used for calibration. @@ -181,6 +185,7 @@ def read_model_to_graph(in_model: Any, fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.) fw_impl: FrameworkImplementation object with a specific framework methods implementation. + Returns: Graph object that represents the model. """ diff --git a/model_compression_toolkit/core/quantization_prep_runner.py b/model_compression_toolkit/core/quantization_prep_runner.py new file mode 100644 index 000000000..e34ae4698 --- /dev/null +++ b/model_compression_toolkit/core/quantization_prep_runner.py @@ -0,0 +1,134 @@ +# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +from typing import Callable + +from tqdm import tqdm + +from model_compression_toolkit.core.common import FrameworkInfo +from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation +from model_compression_toolkit.core.common.graph.base_graph import Graph +from model_compression_toolkit.core.common.model_collector import ModelCollector +from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph +from model_compression_toolkit.core.common.quantization.core_config import CoreConfig +from model_compression_toolkit.core.common.quantization.quantization_analyzer import analyzer_graph +from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_computation import \ + calculate_quantization_params +from model_compression_toolkit.core.common.statistics_correction.statistics_correction import \ + statistics_correction_runner +from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute + +from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter + + +def quantization_preparation_runner(graph: Graph, + representative_data_gen: Callable, + core_config: CoreConfig, + fw_info: FrameworkInfo, + fw_impl: FrameworkImplementation, + tb_w: TensorboardWriter = None): + """ + Prepares a trained model for post-training quantization. + First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers). + Second, statistics (e.g. min/max, histogram, etc.) are collected for each layer's output + (and input, depends on the quantization configuration) using a given representative dataset. + Next, quantization parameters are calculated using the collected statistics. + Finally, more transformations (based on the statistics) are applied to increase the model's performance. + + Args: + graph: A graph representation of the model to be quantized. + representative_data_gen: Dataset used for calibration. + core_config: CoreConfig containing parameters of how the model should be quantized + fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices, + groups of layers by how they should be quantized, etc.). + fw_impl: FrameworkImplementation object with a specific framework methods implementation. + tb_w: TensorboardWriter object for logging + + Returns: + Graph object that represents the model, contains thresholds, and ready for quantization. + """ + + ###################################### + # Graph analyzing (attaching statistics collectors) + ###################################### + analyzer_graph(fw_impl.attach_sc_to_node, + graph, + fw_info, + core_config.quantization_config) # Mark points for statistics collection + + if tb_w is not None: + tb_w.add_graph(graph, 'after_analyzer_graph') + + ###################################### + # Statistic collection + ###################################### + mi = ModelCollector(graph, + fw_impl, + fw_info) + + for _data in tqdm(representative_data_gen()): + mi.infer(_data) + + ###################################### + # Edit network according to user + # specific settings + ###################################### + # Notice that not all actions affect at this stage (for example, actions that edit the final configuration as + # there are no final configurations at this stage of the optimization). For this reason we edit the graph + # again at the end of the optimization process. + edit_network_graph(graph, fw_info, core_config.debug_config.network_editor) + + ###################################### + # Calculate quantization params + ###################################### + calculate_quantization_params(graph, + fw_info, + fw_impl=fw_impl) + + if tb_w is not None: + tb_w.add_graph(graph, 'thresholds_selection') + tb_w.add_all_statistics(graph, 'thresholds_selection') + + ###################################### + # Graph substitution (post statistics collection) + ###################################### + transformed_graph = substitute(graph, + fw_impl.get_substitutions_post_statistics_collection(core_config.quantization_config)) + + ###################################### + # Shift Negative Activations + ###################################### + if core_config.quantization_config.shift_negative_activation_correction: + transformed_graph = fw_impl.shift_negative_correction(transformed_graph, + core_config, + fw_info) + if tb_w is not None: + tb_w.add_graph(transformed_graph, 'after_shift_negative_correction') + tb_w.add_all_statistics(transformed_graph, 'after_shift_negative_correction') + + if tb_w is not None: + tb_w.add_graph(transformed_graph, 'post_statistics_collection_substitutions') + tb_w.add_all_statistics(transformed_graph, 'post_statistics_collection_substitutions') + + ###################################### + # Statistics Correction + ###################################### + tg_with_bias = statistics_correction_runner(transformed_graph, core_config, fw_info, fw_impl, tb_w) + + for n in tg_with_bias.nodes: + assert n.final_weights_quantization_cfg is None + + return tg_with_bias \ No newline at end of file diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index db707954d..be7305b20 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -23,6 +23,7 @@ from model_compression_toolkit.core.common import FrameworkInfo from model_compression_toolkit.core.common.hessian.hessian_info_service import HessianInfoService from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner +from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner from model_compression_toolkit.logger import Logger from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.graph.base_graph import Graph @@ -47,6 +48,7 @@ ActivationFinalBitwidthConfigVisualizer from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter + def core_runner(in_model: Any, representative_data_gen: Callable, core_config: CoreConfig, @@ -94,12 +96,12 @@ def core_runner(in_model: Any, representative_dataset=representative_data_gen, fw_impl=fw_impl) - tg = _prepare_model_for_quantization(graph, - representative_data_gen, - core_config, - fw_info, - tb_w, - fw_impl) + tg = quantization_preparation_runner(graph=graph, + representative_data_gen=representative_data_gen, + core_config=core_config, + fw_info=fw_info, + fw_impl=fw_impl, + tb_w=tb_w) ###################################### # Finalize bit widths @@ -179,131 +181,6 @@ def _init_tensorboard_writer(fw_info: FrameworkInfo) -> TensorboardWriter: return tb_w -def read_model_to_graph(in_model: Any, - representative_data_gen: Callable, - tpc: TargetPlatformCapabilities, - fw_info: FrameworkInfo = None, - fw_impl: FrameworkImplementation = None) -> Graph: - - """ - Read a model into a graph object. - Args: - in_model: Model to optimize and prepare for quantization. - representative_data_gen: Dataset used for calibration. - tpc: TargetPlatformCapabilities object that models the inference target platform and - the attached framework operator's information. - fw_info: Information needed for quantization about the specific framework (e.g., - kernel channels indices, groups of layers by how they should be quantized, etc.) - fw_impl: FrameworkImplementation object with a specific framework methods implementation. - Returns: - Graph object that represents the model. - """ - graph = fw_impl.model_reader(in_model, - representative_data_gen) - graph.set_fw_info(fw_info) - graph.set_tpc(tpc) - return graph - - -def _prepare_model_for_quantization(transformed_graph: Graph, - representative_data_gen: Callable, - core_config: CoreConfig = CoreConfig(), - fw_info: FrameworkInfo = None, - tb_w: TensorboardWriter = None, - fw_impl: FrameworkImplementation = None) -> Graph: - """ - Prepare a trained model for post-training quantization. - First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers). - Second, statistics (e.g. min/max, histogram, etc.) are collected for each layer's output - (and input, depends on the quantization configuration) using a given representative dataset. - Next, quantization parameters are calculated using the collected statistics. - Finally, more transformations (based on the statistics) are applied to increase the model's performance. - - Args: - representative_data_gen (Callable): Dataset used for calibration. - core_config (CoreConfig): CoreConfig containing parameters of how the model should be quantized. - fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., - kernel channels indices, groups of layers by how they should be quantized, etc.) - tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc. - fw_impl (FrameworkImplementation): FrameworkImplementation object with a specific framework methods implementation. - - Returns: - Graph object that represents the model, contains thresholds, and ready for quantization. - """ - - ###################################### - # Graph analyzing (attaching statistics collectors) - ###################################### - analyzer_graph(fw_impl.attach_sc_to_node, - transformed_graph, - fw_info, - core_config.quantization_config) # Mark points for statistics collection - - if tb_w is not None: - tb_w.add_graph(transformed_graph, 'after_analyzer_graph') - - ###################################### - # Statistic collection - ###################################### - mi = ModelCollector(transformed_graph, - fw_impl, - fw_info) - - for _data in tqdm(representative_data_gen()): - mi.infer(_data) - - ###################################### - # Edit network according to user - # specific settings - ###################################### - # Notice that not all actions affect at this stage (for example, actions that edit the final configuration as - # there are no final configurations at this stage of the optimization). For this reason we edit the graph - # again at the end of the optimization process. - edit_network_graph(transformed_graph, fw_info, core_config.debug_config.network_editor) - - ###################################### - # Calculate quantization params - ###################################### - calculate_quantization_params(transformed_graph, - fw_info, - fw_impl=fw_impl) - - if tb_w is not None: - tb_w.add_graph(transformed_graph, 'thresholds_selection') - tb_w.add_all_statistics(transformed_graph, 'thresholds_selection') - - ###################################### - # Graph substitution (post statistics collection) - ###################################### - transformed_graph = substitute(transformed_graph, - fw_impl.get_substitutions_post_statistics_collection(core_config.quantization_config)) - - ###################################### - # Shift Negative Activations - ###################################### - if core_config.quantization_config.shift_negative_activation_correction: - transformed_graph = fw_impl.shift_negative_correction(transformed_graph, - core_config, - fw_info) - if tb_w is not None: - tb_w.add_graph(transformed_graph, 'after_shift_negative_correction') - tb_w.add_all_statistics(transformed_graph, 'after_shift_negative_correction') - - if tb_w is not None: - tb_w.add_graph(transformed_graph, 'post_statistics_collection_substitutions') - tb_w.add_all_statistics(transformed_graph, 'post_statistics_collection_substitutions') - - ###################################### - # Statistics Correction - ###################################### - tg_with_bias = statistics_correction_runner(transformed_graph, core_config, fw_info, fw_impl, tb_w) - - for n in tg_with_bias.nodes: - assert n.final_weights_quantization_cfg is None - - return tg_with_bias - - def _set_final_kpi(graph: Graph, final_bit_widths_config: List[int], kpi_functions_dict: Dict[KPITarget, Tuple[MpKpiMetric, MpKpiAggregation]], From b3e1ee94b78af6e90ca1906f152787b7258ac6f9 Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Wed, 29 Nov 2023 15:45:25 +0200 Subject: [PATCH 2/8] Fix type hints --- model_compression_toolkit/core/quantization_prep_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model_compression_toolkit/core/quantization_prep_runner.py b/model_compression_toolkit/core/quantization_prep_runner.py index e34ae4698..1f7729227 100644 --- a/model_compression_toolkit/core/quantization_prep_runner.py +++ b/model_compression_toolkit/core/quantization_prep_runner.py @@ -39,7 +39,7 @@ def quantization_preparation_runner(graph: Graph, core_config: CoreConfig, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, - tb_w: TensorboardWriter = None): + tb_w: TensorboardWriter = None) -> Graph: """ Prepares a trained model for post-training quantization. First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers). From bdfe3727367dfa288d8947addfef2a88af68ba01 Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Wed, 29 Nov 2023 16:04:23 +0200 Subject: [PATCH 3/8] Move tensor board functions to tensorboard_writer.py --- .../visualization/tensorboard_writer.py | 46 +++++++++++++++++++ model_compression_toolkit/core/runner.py | 31 ++----------- .../gptq/keras/quantization_facade.py | 5 +- .../gptq/pytorch/quantization_facade.py | 5 +- .../legacy/keras_quantization_facade.py | 7 +-- .../legacy/pytorch_quantization_facade.py | 7 +-- .../ptq/keras/quantization_facade.py | 5 +- .../ptq/pytorch/quantization_facade.py | 5 +- .../qat/keras/quantization_facade.py | 5 +- .../qat/pytorch/quantization_facade.py | 5 +- .../helpers/prep_graph_for_func_test.py | 12 ++--- .../second_moment_correction_test.py | 5 +- .../second_moment_correction_test.py | 5 +- 13 files changed, 87 insertions(+), 56 deletions(-) diff --git a/model_compression_toolkit/core/common/visualization/tensorboard_writer.py b/model_compression_toolkit/core/common/visualization/tensorboard_writer.py index 126682c5b..447503dd6 100644 --- a/model_compression_toolkit/core/common/visualization/tensorboard_writer.py +++ b/model_compression_toolkit/core/common/visualization/tensorboard_writer.py @@ -16,6 +16,7 @@ from copy import deepcopy import io +import os import numpy as np from PIL import Image from matplotlib.figure import Figure @@ -34,6 +35,9 @@ from model_compression_toolkit.core import FrameworkInfo from model_compression_toolkit.core.common import Graph, BaseNode from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector +from model_compression_toolkit.logger import Logger +from model_compression_toolkit.core.common.visualization.final_config_visualizer import \ + WeightsFinalBitwidthConfigVisualizer, ActivationFinalBitwidthConfigVisualizer DEVICE_STEP_STATS = "/device:CPU:0" @@ -486,3 +490,45 @@ def add_figure(self, er = self.__get_event_writer_by_tag_name(main_tag_name) er.add_event(event) er.flush() + + +def init_tensorboard_writer(fw_info: FrameworkInfo) -> TensorboardWriter: + """ + Create a TensorBoardWriter object initialized with the logger dir path if it was set, + or None otherwise. + + Args: + fw_info: FrameworkInfo object. + + Returns: + A TensorBoardWriter object. + """ + tb_w = None + if Logger.LOG_PATH is not None: + tb_log_dir = os.path.join(os.getcwd(), Logger.LOG_PATH, 'tensorboard_logs') + Logger.info(f'To use Tensorboard, please run: tensorboard --logdir {tb_log_dir}') + tb_w = TensorboardWriter(tb_log_dir, fw_info) + return tb_w + + +def finalize_bitwidth_in_tb(tb_w: TensorboardWriter, + weights_conf_nodes_bitwidth: List, + activation_conf_nodes_bitwidth: List): + """ + Set the final bit-width configuration of the quantized model in the provided TensorBoard object. + + Args: + tb_w: A TensorBoard object. + weights_conf_nodes_bitwidth: Final weights bit-width configuration. + activation_conf_nodes_bitwidth: Final activation bit-width configuration. + + """ + + if len(weights_conf_nodes_bitwidth) > 0: + visual = WeightsFinalBitwidthConfigVisualizer(weights_conf_nodes_bitwidth) + figure = visual.plot_config_bitwidth() + tb_w.add_figure(figure, f'Weights final bit-width config') + if len(activation_conf_nodes_bitwidth) > 0: + visual = ActivationFinalBitwidthConfigVisualizer(activation_conf_nodes_bitwidth) + figure = visual.plot_config_bitwidth() + tb_w.add_figure(figure, f'Activation final bit-width config') diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index be7305b20..66eb25e0b 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -46,7 +46,8 @@ from model_compression_toolkit.core.common.visualization.final_config_visualizer import \ WeightsFinalBitwidthConfigVisualizer, \ ActivationFinalBitwidthConfigVisualizer -from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter +from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter, \ + finalize_bitwidth_in_tb def core_runner(in_model: Any, @@ -150,37 +151,11 @@ def core_runner(in_model: Any, f'Final activation bit-width configuration: {[node_b[1] for node_b in activation_conf_nodes_bitwidth]}') if tb_w is not None: - if len(weights_conf_nodes_bitwidth) > 0: - visual = WeightsFinalBitwidthConfigVisualizer(weights_conf_nodes_bitwidth) - figure = visual.plot_config_bitwidth() - tb_w.add_figure(figure, f'Weights final bit-width config') - if len(activation_conf_nodes_bitwidth) > 0: - visual = ActivationFinalBitwidthConfigVisualizer(activation_conf_nodes_bitwidth) - figure = visual.plot_config_bitwidth() - tb_w.add_figure(figure, f'Activation final bit-width config') + finalize_bitwidth_in_tb(tb_w, weights_conf_nodes_bitwidth, activation_conf_nodes_bitwidth) return tg, bit_widths_config, hessian_info_service -def _init_tensorboard_writer(fw_info: FrameworkInfo) -> TensorboardWriter: - """ - Create a TensorBoardWriter object initialized with the logger dir path if it was set, - or None otherwise. - - Args: - fw_info: FrameworkInfo object. - - Returns: - A TensorBoardWriter object. - """ - tb_w = None - if Logger.LOG_PATH is not None: - tb_log_dir = os.path.join(os.getcwd(), Logger.LOG_PATH, 'tensorboard_logs') - Logger.info(f'To use Tensorboard, please run: tensorboard --logdir {tb_log_dir}') - tb_w = TensorboardWriter(tb_log_dir, fw_info) - return tb_w - - def _set_final_kpi(graph: Graph, final_bit_widths_config: List[int], kpi_functions_dict: Dict[KPITarget, Tuple[MpKpiMetric, MpKpiAggregation]], diff --git a/model_compression_toolkit/gptq/keras/quantization_facade.py b/model_compression_toolkit/gptq/keras/quantization_facade.py index e9c98f3db..1e5f72413 100644 --- a/model_compression_toolkit/gptq/keras/quantization_facade.py +++ b/model_compression_toolkit/gptq/keras/quantization_facade.py @@ -16,6 +16,7 @@ from typing import Callable, Tuple from packaging import version +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF from model_compression_toolkit.core.common.user_info import UserInformation @@ -24,7 +25,7 @@ from model_compression_toolkit.core.common.framework_info import FrameworkInfo from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfigV2 from model_compression_toolkit.core import CoreConfig -from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.gptq.runner import gptq_runner from model_compression_toolkit.core.exporter import export_model from model_compression_toolkit.core.analyzer import analyzer_model_quantization @@ -202,7 +203,7 @@ def keras_gradient_post_training_quantization_experimental(in_model: Model, Logger.info("Using experimental mixed-precision quantization. " "If you encounter an issue please file a bug.") - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = GPTQKerasImplemantation() diff --git a/model_compression_toolkit/gptq/pytorch/quantization_facade.py b/model_compression_toolkit/gptq/pytorch/quantization_facade.py index a20c3842c..57f06d1af 100644 --- a/model_compression_toolkit/gptq/pytorch/quantization_facade.py +++ b/model_compression_toolkit/gptq/pytorch/quantization_facade.py @@ -15,12 +15,13 @@ from typing import Callable from model_compression_toolkit.core import common from model_compression_toolkit.constants import FOUND_TORCH +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import PYTORCH from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2 from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI -from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.gptq.keras.quantization_facade import GPTQ_MOMENTUM from model_compression_toolkit.gptq.runner import gptq_runner from model_compression_toolkit.core.exporter import export_model @@ -161,7 +162,7 @@ def pytorch_gradient_post_training_quantization_experimental(model: Module, Logger.info("Using experimental mixed-precision quantization. " "If you encounter an issue please file a bug.") - tb_w = _init_tensorboard_writer(DEFAULT_PYTORCH_INFO) + tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO) fw_impl = GPTQPytorchImplemantation() diff --git a/model_compression_toolkit/legacy/keras_quantization_facade.py b/model_compression_toolkit/legacy/keras_quantization_facade.py index 52505409d..57a8c1532 100644 --- a/model_compression_toolkit/legacy/keras_quantization_facade.py +++ b/model_compression_toolkit/legacy/keras_quantization_facade.py @@ -15,6 +15,7 @@ from typing import Callable, List, Tuple +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import TENSORFLOW from model_compression_toolkit.core.common.user_info import UserInformation @@ -28,7 +29,7 @@ from model_compression_toolkit.core.common.quantization.core_config import CoreConfig from model_compression_toolkit.core.common.quantization.debug_config import DebugConfig from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG -from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.gptq.runner import gptq_runner from model_compression_toolkit.ptq.runner import ptq_runner from model_compression_toolkit.core.exporter import export_model @@ -114,7 +115,7 @@ def keras_post_training_quantization(in_model: Model, network_editor=network_editor) ) - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = KerasImplementation() @@ -249,7 +250,7 @@ def keras_post_training_quantization_mixed_precision(in_model: Model, network_editor=network_editor) ) - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = KerasImplementation() diff --git a/model_compression_toolkit/legacy/pytorch_quantization_facade.py b/model_compression_toolkit/legacy/pytorch_quantization_facade.py index 630aa93c0..a28b9d7d7 100644 --- a/model_compression_toolkit/legacy/pytorch_quantization_facade.py +++ b/model_compression_toolkit/legacy/pytorch_quantization_facade.py @@ -14,6 +14,7 @@ # ============================================================================== from typing import Callable, List, Tuple +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import PYTORCH from model_compression_toolkit.core.common.user_info import UserInformation @@ -28,7 +29,7 @@ MixedPrecisionQuantizationConfig, DEFAULT_MIXEDPRECISION_CONFIG from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG -from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.gptq.runner import gptq_runner from model_compression_toolkit.ptq.runner import ptq_runner from model_compression_toolkit.core.exporter import export_model @@ -106,7 +107,7 @@ def pytorch_post_training_quantization(in_module: Module, debug_config=DebugConfig(analyze_similarity=analyze_similarity, network_editor=network_editor)) - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = PytorchImplementation() @@ -235,7 +236,7 @@ def pytorch_post_training_quantization_mixed_precision(in_model: Module, debug_config=DebugConfig(analyze_similarity=analyze_similarity, network_editor=network_editor)) - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = PytorchImplementation() diff --git a/model_compression_toolkit/ptq/keras/quantization_facade.py b/model_compression_toolkit/ptq/keras/quantization_facade.py index 829628e64..b0bdce3ad 100644 --- a/model_compression_toolkit/ptq/keras/quantization_facade.py +++ b/model_compression_toolkit/ptq/keras/quantization_facade.py @@ -17,6 +17,7 @@ from model_compression_toolkit.core import CoreConfig from model_compression_toolkit.core.analyzer import analyzer_model_quantization +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI @@ -24,7 +25,7 @@ MixedPrecisionQuantizationConfigV2 from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities from model_compression_toolkit.core.exporter import export_model -from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.ptq.runner import ptq_runner if FOUND_TF: @@ -130,7 +131,7 @@ def keras_post_training_quantization_experimental(in_model: Model, Logger.info("Using experimental mixed-precision quantization. " "If you encounter an issue please file a bug.") - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = KerasImplementation() diff --git a/model_compression_toolkit/ptq/pytorch/quantization_facade.py b/model_compression_toolkit/ptq/pytorch/quantization_facade.py index 9a946d471..a53051c86 100644 --- a/model_compression_toolkit/ptq/pytorch/quantization_facade.py +++ b/model_compression_toolkit/ptq/pytorch/quantization_facade.py @@ -15,6 +15,7 @@ from typing import Callable from model_compression_toolkit.core import common +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import PYTORCH, FOUND_TORCH from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities @@ -22,7 +23,7 @@ from model_compression_toolkit.core import CoreConfig from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \ MixedPrecisionQuantizationConfigV2 -from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.ptq.runner import ptq_runner from model_compression_toolkit.core.exporter import export_model from model_compression_toolkit.core.analyzer import analyzer_model_quantization @@ -102,7 +103,7 @@ def pytorch_post_training_quantization_experimental(in_module: Module, Logger.info("Using experimental mixed-precision quantization. " "If you encounter an issue please file a bug.") - tb_w = _init_tensorboard_writer(DEFAULT_PYTORCH_INFO) + tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO) fw_impl = PytorchImplementation() diff --git a/model_compression_toolkit/qat/keras/quantization_facade.py b/model_compression_toolkit/qat/keras/quantization_facade.py index 457b83e1f..bc43e91c1 100644 --- a/model_compression_toolkit/qat/keras/quantization_facade.py +++ b/model_compression_toolkit/qat/keras/quantization_facade.py @@ -17,6 +17,7 @@ from functools import partial from model_compression_toolkit.core import CoreConfig +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.constants import FOUND_TF from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI @@ -25,7 +26,7 @@ from mct_quantizers import KerasActivationQuantizationHolder from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities -from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.ptq.runner import ptq_runner if FOUND_TF: @@ -177,7 +178,7 @@ def keras_quantization_aware_training_init(in_model: Model, Logger.info("Using experimental mixed-precision quantization. " "If you encounter an issue please file a bug.") - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = KerasImplementation() diff --git a/model_compression_toolkit/qat/pytorch/quantization_facade.py b/model_compression_toolkit/qat/pytorch/quantization_facade.py index 181716b91..28df342e5 100644 --- a/model_compression_toolkit/qat/pytorch/quantization_facade.py +++ b/model_compression_toolkit/qat/pytorch/quantization_facade.py @@ -20,6 +20,7 @@ from model_compression_toolkit.core import CoreConfig from model_compression_toolkit.core import common +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.logger import Logger from model_compression_toolkit.core.common.framework_info import FrameworkInfo from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI @@ -27,7 +28,7 @@ MixedPrecisionQuantizationConfigV2 from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import \ TargetPlatformCapabilities -from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.ptq.runner import ptq_runner if FOUND_TORCH: @@ -145,7 +146,7 @@ def pytorch_quantization_aware_training_init(in_model: Module, Logger.info("Using experimental mixed-precision quantization. " "If you encounter an issue please file a bug.") - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = PytorchImplementation() diff --git a/tests/common_tests/helpers/prep_graph_for_func_test.py b/tests/common_tests/helpers/prep_graph_for_func_test.py index e41aea691..3c59beb2a 100644 --- a/tests/common_tests/helpers/prep_graph_for_func_test.py +++ b/tests/common_tests/helpers/prep_graph_for_func_test.py @@ -21,9 +21,9 @@ from model_compression_toolkit.core.common.quantization.quantization_analyzer import analyzer_graph from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_computation import \ calculate_quantization_params +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner -from model_compression_toolkit.core.runner import _init_tensorboard_writer, \ - _prepare_model_for_quantization +from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import generate_tp_model, \ get_op_quantization_configs @@ -114,7 +114,7 @@ def prepare_graph_set_bit_widths(in_model, debug_config=DebugConfig(analyze_similarity=analyze_similarity, network_editor=network_editor)) - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) # convert old representative dataset generation to a generator def _representative_data_gen(): @@ -129,12 +129,12 @@ def _representative_data_gen(): tpc=tpc, mixed_precision_enable=core_config.mixed_precision_enable) - tg = _prepare_model_for_quantization(graph, + tg = quantization_preparation_runner(graph, _representative_data_gen, core_config, fw_info, - tb_w, - fw_impl) + fw_impl, + tb_w) ###################################### # Finalize bit widths diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/second_moment_correction_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/second_moment_correction_test.py index 74cce220b..03f5ed40d 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/second_moment_correction_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/second_moment_correction_test.py @@ -27,13 +27,14 @@ from model_compression_toolkit.core.common.network_editors import EditRule from model_compression_toolkit.core.common.statistics_correction.apply_second_moment_correction_to_graph import \ quantized_model_builder_for_second_moment_correction +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.core.keras.constants import EPSILON_VAL, GAMMA, BETA, MOVING_MEAN, MOVING_VARIANCE from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation from model_compression_toolkit.core.keras.keras_model_validation import KerasModelValidation from model_compression_toolkit.core.keras.statistics_correction.apply_second_moment_correction import \ keras_apply_second_moment_correction -from model_compression_toolkit.core.runner import _init_tensorboard_writer, core_runner +from model_compression_toolkit.core.runner import core_runner from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities @@ -277,7 +278,7 @@ def prepare_graph(self, network_editor=network_editor) ) - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = KerasImplementation() diff --git a/tests/pytorch_tests/model_tests/feature_models/second_moment_correction_test.py b/tests/pytorch_tests/model_tests/feature_models/second_moment_correction_test.py index 58b62512e..2d3a61b95 100644 --- a/tests/pytorch_tests/model_tests/feature_models/second_moment_correction_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/second_moment_correction_test.py @@ -24,6 +24,7 @@ from model_compression_toolkit.core.common import Graph from model_compression_toolkit.core.common.statistics_correction.apply_second_moment_correction_to_graph import \ quantized_model_builder_for_second_moment_correction +from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities from model_compression_toolkit.core.pytorch.constants import EPSILON_VAL, GAMMA, BETA, MOVING_MEAN, MOVING_VARIANCE @@ -32,7 +33,7 @@ from model_compression_toolkit.core.pytorch.statistics_correction.apply_second_moment_correction import \ pytorch_apply_second_moment_correction from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, set_model -from model_compression_toolkit.core.runner import _init_tensorboard_writer, core_runner +from model_compression_toolkit.core.runner import core_runner from tests.common_tests.helpers.generate_test_tp_model import generate_test_tp_model from tests.pytorch_tests.model_tests.base_pytorch_test import BasePytorchTest from tests.pytorch_tests.tpc_pytorch import get_pytorch_test_tpc_dict @@ -346,7 +347,7 @@ def prepare_graph(self, target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_INFO) -> \ Tuple[Graph, Graph]: - tb_w = _init_tensorboard_writer(fw_info) + tb_w = init_tensorboard_writer(fw_info) fw_impl = PytorchImplementation() From 25a954607112b65146df0c7e53de8737df82926d Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Thu, 30 Nov 2023 16:57:43 +0200 Subject: [PATCH 4/8] imports fixes --- tests/common_tests/helpers/prep_graph_for_func_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/common_tests/helpers/prep_graph_for_func_test.py b/tests/common_tests/helpers/prep_graph_for_func_test.py index 72e596dbe..78455ca8e 100644 --- a/tests/common_tests/helpers/prep_graph_for_func_test.py +++ b/tests/common_tests/helpers/prep_graph_for_func_test.py @@ -23,7 +23,6 @@ calculate_quantization_params from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner -from model_compression_toolkit.core.runner import _init_tensorboard_writer from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import generate_tp_model, \ get_op_quantization_configs From d415aacf02825f7f87f818c379537f05eaca6f76 Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Thu, 30 Nov 2023 17:03:59 +0200 Subject: [PATCH 5/8] remove the old tb writer init function --- model_compression_toolkit/core/runner.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index 387b3d271..3238ec48c 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -154,25 +154,6 @@ def core_runner(in_model: Any, return tg, bit_widths_config, hessian_info_service -def _init_tensorboard_writer(fw_info: FrameworkInfo) -> TensorboardWriter: - """ - Create a TensorBoardWriter object initialized with the logger dir path if it was set, - or None otherwise. - - Args: - fw_info: FrameworkInfo object. - - Returns: - A TensorBoardWriter object. - """ - tb_w = None - if Logger.LOG_PATH is not None: - tb_log_dir = os.path.join(os.getcwd(), Logger.LOG_PATH, 'tensorboard_logs') - Logger.info(f'To use Tensorboard, please run: tensorboard --logdir {tb_log_dir}') - tb_w = TensorboardWriter(tb_log_dir, fw_info) - return tb_w - - def _set_final_kpi(graph: Graph, final_bit_widths_config: List[int], kpi_functions_dict: Dict[KPITarget, Tuple[MpKpiMetric, MpKpiAggregation]], From 384cf9dcc97c6c0101690f7dc59b39bd5e172902 Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Thu, 30 Nov 2023 17:04:35 +0200 Subject: [PATCH 6/8] remove import --- model_compression_toolkit/core/runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index 3238ec48c..ccf76a68a 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -14,7 +14,6 @@ # ============================================================================== -import os from typing import Callable, Tuple, Any, List, Dict import numpy as np From 31528434a9138c880a3980ab8abed04661ce77e9 Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Thu, 30 Nov 2023 17:12:26 +0200 Subject: [PATCH 7/8] fix import --- tests/common_tests/helpers/prep_graph_for_func_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/common_tests/helpers/prep_graph_for_func_test.py b/tests/common_tests/helpers/prep_graph_for_func_test.py index 78455ca8e..3c59beb2a 100644 --- a/tests/common_tests/helpers/prep_graph_for_func_test.py +++ b/tests/common_tests/helpers/prep_graph_for_func_test.py @@ -23,6 +23,7 @@ calculate_quantization_params from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner +from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import generate_tp_model, \ get_op_quantization_configs From 0f085be306d6a4cd2bc14d296a669239f8b8fca0 Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Sun, 3 Dec 2023 08:58:18 +0200 Subject: [PATCH 8/8] Add missing call --- model_compression_toolkit/core/runner.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/model_compression_toolkit/core/runner.py b/model_compression_toolkit/core/runner.py index ccf76a68a..6fcd77e53 100644 --- a/model_compression_toolkit/core/runner.py +++ b/model_compression_toolkit/core/runner.py @@ -37,7 +37,8 @@ from model_compression_toolkit.core.common.visualization.final_config_visualizer import \ WeightsFinalBitwidthConfigVisualizer, \ ActivationFinalBitwidthConfigVisualizer -from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter +from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter, \ + finalize_bitwidth_in_tb def core_runner(in_model: Any, @@ -141,14 +142,7 @@ def core_runner(in_model: Any, f'Final activation bit-width configuration: {[node_b[1] for node_b in activation_conf_nodes_bitwidth]}') if tb_w is not None: - if len(weights_conf_nodes_bitwidth) > 0: - visual = WeightsFinalBitwidthConfigVisualizer(weights_conf_nodes_bitwidth) - figure = visual.plot_config_bitwidth() - tb_w.add_figure(figure, f'Weights final bit-width config') - if len(activation_conf_nodes_bitwidth) > 0: - visual = ActivationFinalBitwidthConfigVisualizer(activation_conf_nodes_bitwidth) - figure = visual.plot_config_bitwidth() - tb_w.add_figure(figure, f'Activation final bit-width config') + finalize_bitwidth_in_tb(tb_w, weights_conf_nodes_bitwidth, activation_conf_nodes_bitwidth) return tg, bit_widths_config, hessian_info_service