use maxcut for total ru

sony · Jan 13, 2025 · e718734 · e718734
1 parent f60dcef
commit e718734
Show file tree

Hide file tree

Showing 8 changed files with 238 additions and 289 deletions.
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -26,7 +26,7 @@
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     RUTarget, ResourceUtilization
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
-    ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
+    TargetInclusionCriterion, BitwidthMode
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import \
     MixedPrecisionRUHelper
 from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
@@ -67,13 +67,19 @@ def __init__(self,
         self.compute_metric_fn = self.get_sensitivity_metric()
         self._cuts = None
 
-        self.ru_metrics = target_resource_utilization.get_restricted_metrics()
+        # To define RU Total constraints we need to compute weights and activations even if they have no constraints
+        # TODO currently this logic is duplicated in linear_programming.py
+        targets = target_resource_utilization.get_restricted_metrics()
+        if RUTarget.TOTAL in targets:
+            targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
+        self.ru_targets_to_compute = targets
+
         self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
         self.target_resource_utilization = target_resource_utilization
         self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
         self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
-        self.min_ru = self.ru_helper.compute_utilization(self.ru_metrics, self.min_ru_config)
-        self.non_conf_ru_dict = self._non_configurable_nodes_ru()
+        self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
+        self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
 
         self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
                                                                        original_graph=self.original_graph)
@@ -111,18 +117,14 @@ def get_sensitivity_metric(self) -> Callable:
     def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
         """
         Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
-        The matrix is constructed as follows (for a given target):
-        - Each row represents the set of resource utilization values for a specific resource utilization 
-            measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
-        - Each entry in a specific column represents the resource utilization value of a given configuration 
-            (single layer is configured with specific candidate, all other layer are at the minimal resource 
-            utilization configuration) for the resource utilization measure of the respective row.
+        Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
 
         Args:
             target: The resource target for which the resource utilization is calculated (a RUTarget value).
 
-        Returns: A resource utilization matrix.
-
+        Returns:
+            A resource utilization matrix of shape (num memory elements, num configurations). Num memory elements
+            depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
         """
         assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
 
@@ -132,21 +134,14 @@ def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
         for c, c_n in enumerate(configurable_sorted_nodes):
             for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
                 if candidate_idx == self.min_ru_config[c]:
-                    # skip ru computation for min configuration. Since we compute the difference from min_ru it'll
-                    # always be 0 for all entries in the results vector.
-                    candidate_rus = np.zeros(shape=self.min_ru[target].shape)
+                    candidate_rus = self.min_ru[target]
                 else:
-                    candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target) - self.min_ru[target]
+                    candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
 
                 ru_matrix.append(np.asarray(candidate_rus))
 
-        # We need to transpose the calculated ru matrix to allow later multiplication with
-        # the indicators' diagonal matrix.
-        # We only move the first axis (num of configurations) to be last,
-        # the remaining axes include the metric specific nodes (rows dimension of the new tensor)
-        # and the ru metric values (if they are non-scalars)
-        np_ru_matrix = np.array(ru_matrix)
-        return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
+        np_ru_matrix = np.array(ru_matrix) - self.min_ru[target]    # num configurations X num elements
+        return np_ru_matrix.T
 
     def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
         """
@@ -162,7 +157,6 @@ def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int,
 
         """
         cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
-        # TODO compute for all targets at once. Currently the way up to add_set_of_ru_constraints is per target.
         return self.ru_helper.compute_utilization({target}, cfg)[target]
 
     @staticmethod
@@ -183,18 +177,6 @@ def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int
         updated_cfg[idx] = value
         return updated_cfg
 
-    def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
-        """
-        Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the 
-        resource utilization targets.
-
-        Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
-        """
-        ru_metrics = self.ru_metrics - {RUTarget.BOPS}
-        ru = self.ru_helper.compute_utilization(ru_targets=ru_metrics, mp_cfg=None)
-        ru[RUTarget.BOPS] = None
-        return ru
-
     def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
         """
         Computes the resource utilization values for a given mixed-precision configuration.
@@ -206,7 +188,7 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource
         with the given config.
 
         """
-        act_qcs, w_qcs = self.ru_helper.get_configurable_qcs(config)
+        act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
         ru = self.ru_helper.ru_calculator.compute_resource_utilization(
             target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
             w_qcs=w_qcs)

diff --git a/...core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py b/...core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py
@@ -152,20 +152,18 @@ def compute_resource_utilization(self,
         elif w_qcs is not None:    # pragma: no cover
             raise ValueError('Weight configuration passed but no relevant metric requested.')
 
-        if act_qcs and not {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):    # pragma: no cover
-            raise ValueError('Activation configuration passed but no relevant metric requested.')
-        if RUTarget.ACTIVATION in ru_targets:
+        if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
             a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
+        elif act_qcs is not None:    # pragma: no cover
+            raise ValueError('Activation configuration passed but no relevant metric requested.')
 
         ru = ResourceUtilization()
         if RUTarget.WEIGHTS in ru_targets:
             ru.weights_memory = w_total
         if RUTarget.ACTIVATION in ru_targets:
             ru.activation_memory = a_total
         if RUTarget.TOTAL in ru_targets:
-            # TODO use maxcut
-            act_tensors_total, *_ = self.compute_activation_tensors_utilization(target_criterion, bitwidth_mode, act_qcs)
-            ru.total_memory = w_total + act_tensors_total
+            ru.total_memory = w_total + a_total
         if RUTarget.BOPS in ru_targets:
             ru.bops, _ = self.compute_bops(target_criterion=target_criterion,
                                            bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs)

diff --git a/..._compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py b/..._compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py
@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import List, Set, Dict, Optional, Tuple
+from typing import List, Set, Dict, Optional, Tuple, Any
 
 import numpy as np
 
 from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
 from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     RUTarget
@@ -44,9 +43,8 @@ def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImple
     def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
         """
         Compute utilization of requested targets for a specific configuration in the format expected by LP problem
-        formulation, namely an array of ru values corresponding to graph's configurable nodes in the topological order.
-        For activation target, the array contains values for activation cuts in unspecified order (as long as it is
-        consistent between configurations).
+        formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
+        (between calls).
 
         Args:
             ru_targets: resource utilization targets to compute.
@@ -57,33 +55,26 @@ def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[i
         """
 
         ru = {}
-
-        act_qcs, w_qcs = self.get_configurable_qcs(mp_cfg) if mp_cfg else (None, None)
-        w_util = None
+        act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None)
         if RUTarget.WEIGHTS in ru_targets:
-            w_util = self._weights_utilization(w_qcs)
-            ru[RUTarget.WEIGHTS] = np.array(list(w_util.values()))
+            wu = self._weights_utilization(w_qcs)
+            ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))
 
-        # TODO make mp agnostic to activation method
         if RUTarget.ACTIVATION in ru_targets:
-            act_util = self._activation_maxcut_utilization(act_qcs)
-            ru[RUTarget.ACTIVATION] = np.array(list(act_util.values()))
-
-        # TODO use maxcut
-        if RUTarget.TOTAL in ru_targets:
-            act_tensors_util = self._activation_tensor_utilization(act_qcs)
-            w_util = w_util or self._weights_utilization(w_qcs)
-            total = {n: (w_util.get(n, 0), act_tensors_util.get(n, 0))
-                     # for n in self.graph.nodes if n in act_tensors_util or n in w_util}
-                     for n in self.graph.get_topo_sorted_nodes() if n in act_tensors_util or n in w_util}
-            ru[RUTarget.TOTAL] = np.array(list(total.values()))
+            au = self._activation_utilization(act_qcs)
+            ru[RUTarget.ACTIVATION] = np.array(list(au.values()))
 
         if RUTarget.BOPS in ru_targets:
             ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)
 
+        if RUTarget.TOTAL in ru_targets:
+            raise ValueError('Total target should be computed based on weights and activations targets.')
+
+        assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
+                                            f'Requested {ru_targets}')
         return ru
 
-    def get_configurable_qcs(self, mp_cfg) \
+    def get_quantization_candidates(self, mp_cfg) \
             -> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
         """
         Retrieve quantization candidates objects for weights and activations from the configuration list.
@@ -92,15 +83,13 @@ def get_configurable_qcs(self, mp_cfg) \
             mp_cfg: a list of candidates indices for configurable layers.
 
         Returns:
-            Mapping between nodes to weights quantization config, and a mapping between nodes and activation
+            A mapping between nodes to weights quantization config, and a mapping between nodes and activation
             quantization config.
         """
         mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
         node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
-        act_qcs = {n: node_qcs[n].activation_quantization_cfg
-                   for n in self.graph.get_activation_configurable_nodes()}
-        w_qcs = {n: node_qcs[n].weights_quantization_cfg
-                 for n in self.graph.get_weights_configurable_nodes(self.fw_info)}
+        act_qcs = {n: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
+        w_qcs = {n: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
         return act_qcs, w_qcs
 
     def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
@@ -127,8 +116,8 @@ def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantiz
         nodes_util = {n: u.bytes for n, u in nodes_util.items()}
         return nodes_util
 
-    def _activation_maxcut_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
-            -> Optional[Dict[Cut, float]]:
+    def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
+            -> Optional[Dict[Any, float]]:
         """
         Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
 
@@ -138,57 +127,34 @@ def _activation_maxcut_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeAc
         Returns:
             Activation utilization per cut, or empty dict if no configuration was passed.
         """
-        if act_qcs:
-            _, cuts_util, _ = self.ru_calculator.compute_cut_activation_utilization(TargetInclusionCriterion.AnyQuantized,
-                                                                                    bitwidth_mode=BitwidthMode.QCustom,
-                                                                                    act_qcs=act_qcs)
-            cuts_util = {c: u.bytes for c, u in cuts_util.items()}
-            return cuts_util
-
-        # Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
-        # configurable nodes.
-        return {}
-
-    def _activation_tensor_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]):
-        """
-        Compute activation tensors utilization fo configurable nodes if configuration is passed or
-        for non-configurable nodes otherwise.
-
-        Args:
-            act_qcs: activation quantization configuration or None.
-
-        Returns:
-            Activation utilization per node.
-        """
-        if act_qcs:
-            target_criterion = TargetInclusionCriterion.QConfigurable
-            bitwidth_mode = BitwidthMode.QCustom
-        else:
-            target_criterion = TargetInclusionCriterion.QNonConfigurable
-            bitwidth_mode = BitwidthMode.QDefaultSP
-
-        _, nodes_util = self.ru_calculator.compute_activation_tensors_utilization(target_criterion=target_criterion,
-                                                                                  bitwidth_mode=bitwidth_mode,
-                                                                                  act_qcs=act_qcs)
-        return {n: u.bytes for n, u in nodes_util.items()}
-
-    def _bops_utilization(self, mp_cfg: List[int]):
+        # Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
+        # covered by the computation of configurable activations.
+        if not act_qcs:
+            return {}
+
+        _, cuts_util, *_ = self.ru_calculator.compute_cut_activation_utilization(TargetInclusionCriterion.AnyQuantized,
+                                                                                 bitwidth_mode=BitwidthMode.QCustom,
+                                                                                 act_qcs=act_qcs)
+        cuts_util = {c: u.bytes for c, u in cuts_util.items()}
+        return cuts_util
+
+    def _bops_utilization(self, mp_cfg: List[int]) -> np.ndarray:
         """
         Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
         according to the given mixed-precision configuration of a virtual graph with composed nodes.
 
         Args:
             mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
 
-        Returns: A vector of node's BOPS count.
-        Note that the vector is not necessarily of the same length as the given config.
-
+        Returns:
+            A vector of node's BOPS count.
         """
-        # TODO keeping old implementation for now
-
-        # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
-        # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
+        # bops is computed for all nodes, so non-configurable memory is already covered by the computation of
+        # configurable nodes
+        if not mp_cfg:
+            return np.array([])
 
+        # TODO keeping old implementation for now
         virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
 
         mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)