From 29108c9560619f6749adc0c83765e1a32b823a3a Mon Sep 17 00:00:00 2001
From: reuvenp <reuvenp@altair-semi.com>
Date: Tue, 26 Nov 2024 15:23:46 +0200
Subject: [PATCH] Remove commented out code from tests

---
 .../core/keras/data_util.py                     |  1 +
 ...ctivation_hessian_scores_calculator_keras.py | 13 +++----------
 ...ivation_hessian_scores_calculator_pytorch.py |  2 +-
 .../gptq/keras/quantization_facade.py           | 17 -----------------
 .../feature_networks/gptq/gptq_test.py          | 14 --------------
 5 files changed, 5 insertions(+), 42 deletions(-)

diff --git a/model_compression_toolkit/core/keras/data_util.py b/model_compression_toolkit/core/keras/data_util.py
index a7adeebb3..f1fba0ef3 100644
--- a/model_compression_toolkit/core/keras/data_util.py
+++ b/model_compression_toolkit/core/keras/data_util.py
@@ -74,6 +74,7 @@ def __init__(self, data_gen_fn: Callable[[], Generator]):
         self.orig_batch_size = inputs[0].shape[0]
         self._size = None
 
+        # TFDatasetFromGenerator flattens the dataset, thus we ignore the batch dimension
         output_signature = get_tensor_spec(inputs, ignore_batch_dim=True)
         self.dataset = tf.data.Dataset.from_generator(flat_gen_fn(data_gen_fn), output_signature=output_signature)
 
diff --git a/model_compression_toolkit/core/keras/hessian/activation_hessian_scores_calculator_keras.py b/model_compression_toolkit/core/keras/hessian/activation_hessian_scores_calculator_keras.py
index 9a86a252f..e18a96b24 100644
--- a/model_compression_toolkit/core/keras/hessian/activation_hessian_scores_calculator_keras.py
+++ b/model_compression_toolkit/core/keras/hessian/activation_hessian_scores_calculator_keras.py
@@ -60,8 +60,6 @@ def compute(self) -> List[np.ndarray]:
         Returns:
             List[np.ndarray]: Scores based on the Hessian-approximation for the requested nodes.
         """
-        # self.hessian_request.granularity = HessianScoresGranularity.PER_TENSOR
-
         model_output_nodes = [ot.node for ot in self.graph.get_outputs()]
 
         if len([n for n in self.hessian_request.target_nodes if n in model_output_nodes]) > 0:
@@ -141,7 +139,6 @@ def compute(self) -> List[np.ndarray]:
                 # we stop the calculation.
                 if j > MIN_HESSIAN_ITER:
                     if prev_mean_results is not None:
-                        # new_mean_res = tf.reduce_mean(tf.stack(ipts_hessian_approximations), axis=1)
                         if self.hessian_request.granularity == HessianScoresGranularity.PER_TENSOR:
                             new_mean_res = tf.reduce_mean(tf.stack(ipts_hessian_approximations), axis=1)
                             relative_delta_per_node = (tf.abs(new_mean_res - prev_mean_results) /
@@ -150,7 +147,7 @@ def compute(self) -> List[np.ndarray]:
 
                         elif self.hessian_request.granularity == HessianScoresGranularity.PER_ELEMENT:
                             # Reshape to maintain spatial dimensions and average only across batch
-                            new_mean_res = [tf.reduce_mean(h, axis=0) for h in ipts_hessian_approximations]  # List of tensors with shapes [H1,W1,C], [H2,W2,C], etc.
+                            new_mean_res = [tf.reduce_mean(h, axis=0) for h in ipts_hessian_approximations]
                             relative_deltas = [
                                 tf.abs(new - prev) / (tf.abs(new) + 1e-6)
                                 for new, prev in zip(new_mean_res, prev_mean_results)
@@ -158,17 +155,13 @@ def compute(self) -> List[np.ndarray]:
                             max_delta = tf.reduce_max([tf.reduce_max(delta) for delta in relative_deltas])
 
                         if max_delta < HESSIAN_COMP_TOLERANCE:
-                            print(f"breaking - got to converge in iteration {j}")
                             break
+
                 if self.hessian_request.granularity == HessianScoresGranularity.PER_TENSOR:
                     prev_mean_results = tf.reduce_mean(tf.stack(ipts_hessian_approximations), axis=1)
                 elif self.hessian_request.granularity == HessianScoresGranularity.PER_ELEMENT:
                     # Reshape to maintain spatial dimensions and average only across batch
-                    prev_mean_results = [tf.reduce_mean(h, axis=0) for h in ipts_hessian_approximations]  # List of tensors with shapes [H1,W1,C], [H2,W2,C], etc.
-
-                    # stacked = tf.stack(ipts_hessian_approximations)  # Shape: [3, 32, H, W, C]
-                    # prev_mean_results = tf.reduce_mean(stacked, axis=1)  # Shape: [3, H, W, C]
-                # prev_mean_results = tf.reduce_mean(tf.stack(ipts_hessian_approximations), axis=1)
+                    prev_mean_results = [tf.reduce_mean(h, axis=0) for h in ipts_hessian_approximations]
 
             # Convert results to list of numpy arrays
             hessian_results = [h.numpy() for h in ipts_hessian_approximations]
diff --git a/model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py b/model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py
index a086193a1..9151f21b7 100644
--- a/model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py
+++ b/model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py
@@ -157,7 +157,7 @@ def _compute_per_channel(self, output, target_activation_tensors):
         assert self.hessian_request.granularity == HessianScoresGranularity.PER_OUTPUT_CHANNEL
         ipts_hessian_approx_scores = [torch.tensor(0.0, requires_grad=True, device=output.device)
                                       for _ in range(len(target_activation_tensors))]
-        # TODO: why no convergence test?
+
         for j in tqdm(range(self.num_iterations_for_approximation), "Hessian random iterations"):  # Approximation iterations
             v = self._generate_random_vectors_batch(output.shape, output.device)
             f_v = torch.sum(v * output)
diff --git a/model_compression_toolkit/gptq/keras/quantization_facade.py b/model_compression_toolkit/gptq/keras/quantization_facade.py
index e36b26566..7a44ff78f 100644
--- a/model_compression_toolkit/gptq/keras/quantization_facade.py
+++ b/model_compression_toolkit/gptq/keras/quantization_facade.py
@@ -127,23 +127,6 @@ def get_keras_gptq_config(n_epochs: int,
                                                              hessians_num_samples=GPTQ_HESSIAN_NUM_SAMPLES,
                                                              hessian_batch_size=hessian_batch_size)
 
-        # if use_hessian_sample_attention:
-        #     if not use_hessian_based_weights:  # pragma: no cover
-        #         raise ValueError(
-        #             'use_hessian_based_weights must be set to True in order to use Sample Layer Attention.')
-        #     hessian_weights_config = GPTQHessianScoresConfig(
-        #         hessians_num_samples=None,
-        #         norm_scores=False,
-        #         log_norm=False,
-        #         scale_log_norm=False,
-        #         hessian_batch_size=hessian_batch_size,
-        #         per_sample=True
-        #     )
-        #     loss = loss or sample_layer_attention_loss
-        # else:
-        #     hessian_weights_config = GPTQHessianScoresConfig(hessian_batch_size=hessian_batch_size)
-        #     loss = loss or GPTQMultipleTensorsLoss()
-
         if isinstance(gradual_activation_quantization, bool):
             gradual_quant_config = GradualActivationQuantizationConfig() if gradual_activation_quantization else None
         elif isinstance(gradual_activation_quantization, GradualActivationQuantizationConfig):
diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py
index ead217751..487297ced 100644
--- a/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py
+++ b/tests/keras_tests/feature_networks_tests/feature_networks/gptq/gptq_test.py
@@ -77,12 +77,6 @@ def __init__(self,
                  reg_factor=1,
                  grad_act_quant_cfg=None,
                  per_sample=False):
-    # def __init__(self, unit_test, quant_method=QuantizationMethod.SYMMETRIC, rounding_type=RoundingType.STE,
-    #              per_channel=True, input_shape=(1, 16, 16, 3),
-    #              hessian_weights=True, log_norm_weights=True, scaled_log_norm=False, norm_scores=False,
-    #              quantization_parameter_learning=True, num_calibration_iter=GPTQ_HESSIAN_NUM_SAMPLES,
-    #              hessian_num_samples=GPTQ_HESSIAN_NUM_SAMPLES, per_sample=False,
-    #              reg_factor=1, grad_act_quant_cfg=None):
 
         super().__init__(unit_test,
                          input_shape=input_shape,
@@ -140,14 +134,6 @@ def get_gptq_config(self):
                                  gradual_activation_quantization_config=self.grad_act_quant_cfg,
                                  regularization_factor=self.reg_factor
                                  )
-        # return GradientPTQConfig(5,
-        #                          optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
-        #                          optimizer_rest=tf.keras.optimizers.Adam(learning_rate=0.0001),
-        #                          loss=multiple_tensors_mse_loss, train_bias=True, rounding_type=self.rounding_type,
-        #                          hessian_weights_config=self.hessian_weights_config,
-        #                          gradual_activation_quantization_config=self.grad_act_quant_cfg,
-        #                          regularization_factor=self.reg_factor,
-        #                          gptq_quantizer_params_override=self.override_params)
 
     def create_networks(self):
         in_shape = self.get_input_shapes()[0][1:]