From e76110af20c3a38ae338ca648157661879f800fd Mon Sep 17 00:00:00 2001 From: Ofir Gordon Date: Tue, 28 Nov 2023 14:46:37 +0200 Subject: [PATCH] Remove constant multiplication factor from activation hessian computation (#870) Co-authored-by: Ofir Gordon --- .../hessian/activation_trace_hessian_calculator_keras.py | 2 +- .../hessian/activation_trace_hessian_calculator_pytorch.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py b/model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py index 4f5080fa1..916812826 100644 --- a/model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py +++ b/model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py @@ -152,7 +152,7 @@ def compute(self) -> List[float]: # Compute the final approximation for each output index num_node_outputs = len(interest_point_scores[0]) for output_idx in range(num_node_outputs): - final_approx_per_output.append(2 * tf.reduce_mean([x[output_idx] for x in interest_point_scores]) / output.shape[-1]) + final_approx_per_output.append(tf.reduce_mean([x[output_idx] for x in interest_point_scores])) # final_approx_per_output is a list of all approximations (one per output), thus we average them to # get the final score of a node. diff --git a/model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py b/model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py index 0ef2d50b1..baf612b16 100644 --- a/model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py +++ b/model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py @@ -131,8 +131,8 @@ def compute(self) -> List[float]: break trace_hv.append(hessian_trace_approx) - ipts_hessian_trace_approx.append(2 * torch.mean(torch.stack(trace_hv)) / output.shape[ - -1]) # Get averaged Hessian trace approximation + + ipts_hessian_trace_approx.append(torch.mean(torch.stack(trace_hv))) # Get averaged Hessian trace approximation # If a node has multiple outputs, it means that multiple approximations were computed # (one per output since granularity is per-tensor). In this case we average the approximations.