quic · quic-akhobare · Dec 1, 2023 · Oct 2, 2023 · Oct 3, 2023 · Oct 4, 2023
diff --git a/TrainingExtensions/onnx/src/python/aimet_onnx/adaround/adaround_optimizer.py b/TrainingExtensions/onnx/src/python/aimet_onnx/adaround/adaround_optimizer.py
@@ -105,6 +105,7 @@ def adaround_module(cls, module: ModuleInfo, quantized_input_name: str,
         # After optimization, set the optimized layer's rounding mode to "Hard rounding"
         param_to_adaround_tensor_quantizer[module.params['weight'].name].use_soft_rounding = False
 
+    # pylint: disable=too-many-statements
     @classmethod
     def _optimize_rounding(cls, module: ModuleInfo, quantized_input_name,
                            orig_model: ModelProto, quant_model: QuantizationSimModel,
@@ -150,9 +151,10 @@ def _optimize_rounding(cls, module: ModuleInfo, quantized_input_name,
                                                                           out_data_torch.shape)
 
         attributes = read_attributes_for_op(module)
-        if len(attributes['pads']) > 2:
-            logger.info("Skipping the Convolution layer because padding size of 4 is not supported for optimization")
-            return
+        if 'pads' in attributes:
+            if len(attributes['pads']) > 2:
+                logger.info("Skipping the Convolution layer because padding size greater than 2 is not supported for optimization")
+                return
 
         if use_cache_acts_data and AdaroundOptimizer.enable_caching_acts_data():
             logger.debug("Caching intermediate activations data for optimization.")

diff --git a/TrainingExtensions/onnx/src/python/aimet_onnx/meta/connectedgraph.py b/TrainingExtensions/onnx/src/python/aimet_onnx/meta/connectedgraph.py
@@ -148,15 +148,19 @@ def check_if_node_has_predecessor(node):
             for node_output in node.output:
                 output_names[node_output] = node
 
+        # Capture constant tensors associated to a node that has only contant tensor inputs and are not in the form of a constant node.
         for node in self.model.graph.node:
-            for input_name in node.input:
-                if node.op_type not in OPS_WITH_PARAMS and not check_if_node_has_predecessor(node) and input_name not in output_names:
-                    input_tensors_names.append(input_name)
+            if node.op_type != 'Identity' and node.op_type not in OPS_WITH_PARAMS and not check_if_node_has_predecessor(node):
+                for input_name in node.input:
+                    if input_name not in output_names:
+                        input_tensors_names.append(input_name)
 
+        # Capture model input tensors.
         for tensor in self.model.graph.input:
             if tensor.name not in input_tensors_names and tensor.name in self._input_to_node:
                 input_tensors_names.append(tensor.name)
 
+        # Capture nodes having all the inputs as constant tensors and these constants are coming from a constant node.
         input_ops = []
         for node in self.model.graph.node:
             flag = True
@@ -168,13 +172,14 @@ def check_if_node_has_predecessor(node):
                 else:
                     flag = False
                     break
-            if flag:
+            if flag and node not in input_ops:
                 input_ops.append(node)
 
         for input_tensor_name in input_tensors_names:
             if input_tensor_name in self._input_to_node:
                 for node in self._input_to_node[input_tensor_name]:
-                    input_ops.append(node)
+                    if node not in input_ops:
+                        input_ops.append(node)
 
         return input_ops
 
@@ -560,18 +565,22 @@ def create_batchnorm_params(my_op: Op):
             op = my_op.get_module()
 
             gamma_tensor = ParamUtils.get_param(self.model, op, WEIGHT_INDEX)
-            create_and_connect_product(gamma_tensor.name, gamma_tensor.dims, my_op, gamma_tensor, 'weight')
+            if gamma_tensor:
+                create_and_connect_product(gamma_tensor.name, gamma_tensor.dims, my_op, gamma_tensor, 'weight')
 
             beta_tensor = ParamUtils.get_param(self.model, op, BIAS_INDEX)
-            create_and_connect_product(beta_tensor.name, beta_tensor.dims, my_op, beta_tensor, 'bias')
+            if beta_tensor:
+                create_and_connect_product(beta_tensor.name, beta_tensor.dims, my_op, beta_tensor, 'bias')
 
             moving_mean_tensor = ParamUtils.get_param(self.model, op, RUNNING_MEAN_INDEX)
-            create_and_connect_product(moving_mean_tensor.name, moving_mean_tensor.dims, my_op,
-                                       moving_mean_tensor, None)
+            if moving_mean_tensor:
+                create_and_connect_product(moving_mean_tensor.name, moving_mean_tensor.dims, my_op,
+                                           moving_mean_tensor, None)
 
             moving_variance_tensor = ParamUtils.get_param(self.model, op, RUNNING_VAR_INDEX)
-            create_and_connect_product(moving_variance_tensor.name, moving_variance_tensor.dims, my_op,
-                                       moving_variance_tensor, None)
+            if moving_variance_tensor:
+                create_and_connect_product(moving_variance_tensor.name, moving_variance_tensor.dims, my_op,
+                                           moving_variance_tensor, None)
 
         def handle_default(my_op: Op):
             """ Handler for other modules """

diff --git a/TrainingExtensions/onnx/src/python/aimet_onnx/utils.py b/TrainingExtensions/onnx/src/python/aimet_onnx/utils.py
@@ -98,7 +98,7 @@ def remove_node(node: ModelProto, onnx_graph: onnx.GraphProto):
                     other_node.input[idx] = node.input[0]
             # Check if removed node output is an output of the graph
             for outputs in onnx_graph.output:
-                if outputs.name in node.output[0] and other_node.output[0] == node.input[0]:
+                if outputs.name == node.output[0] and other_node.output[0] == node.input[0]:
                     other_node.output[0] = outputs.name
     inits_to_remove = []
     # Remove the node's initializers

diff --git a/TrainingExtensions/onnx/test/python/test_adaround_optimizer.py b/TrainingExtensions/onnx/test/python/test_adaround_optimizer.py
@@ -55,6 +55,7 @@ class TestAdaroundOptimizer:
     Test functions in utils
     """
 
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="This unit-test is meant to be run on GPU")
     @pytest.mark.parametrize("warm_start", [1.0, 0.2])
     def test_optimize_rounding(self, warm_start):
         if version.parse(torch.__version__) >= version.parse("1.13"):

diff --git a/TrainingExtensions/onnx/test/python/test_adaround_weight.py b/TrainingExtensions/onnx/test/python/test_adaround_weight.py
@@ -41,6 +41,7 @@
 import numpy as np
 import torch
 from onnxruntime import SessionOptions, GraphOptimizationLevel, InferenceSession
+import pytest
 
 from aimet_onnx.adaround.adaround_weight import Adaround, AdaroundParameters
 import models.models_for_tests as test_models
@@ -50,6 +51,7 @@ class TestAdaround:
     AdaRound Weights Unit Test Cases
     """
 
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="This unit-test is meant to be run on GPU")
     def test_apply_adaround(self):
         if version.parse(torch.__version__) >= version.parse("1.13"):
             np.random.seed(0)