Skip to content

Commit

Permalink
use maxcut for total ru
Browse files Browse the repository at this point in the history
  • Loading branch information
irenaby committed Jan 13, 2025
1 parent f60dcef commit e718734
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 289 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
RUTarget, ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
TargetInclusionCriterion, BitwidthMode
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import \
MixedPrecisionRUHelper
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
Expand Down Expand Up @@ -67,13 +67,19 @@ def __init__(self,
self.compute_metric_fn = self.get_sensitivity_metric()
self._cuts = None

self.ru_metrics = target_resource_utilization.get_restricted_metrics()
# To define RU Total constraints we need to compute weights and activations even if they have no constraints
# TODO currently this logic is duplicated in linear_programming.py
targets = target_resource_utilization.get_restricted_metrics()
if RUTarget.TOTAL in targets:
targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
self.ru_targets_to_compute = targets

self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
self.target_resource_utilization = target_resource_utilization
self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
self.min_ru = self.ru_helper.compute_utilization(self.ru_metrics, self.min_ru_config)
self.non_conf_ru_dict = self._non_configurable_nodes_ru()
self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)

self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
original_graph=self.original_graph)
Expand Down Expand Up @@ -111,18 +117,14 @@ def get_sensitivity_metric(self) -> Callable:
def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
"""
Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
The matrix is constructed as follows (for a given target):
- Each row represents the set of resource utilization values for a specific resource utilization
measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
- Each entry in a specific column represents the resource utilization value of a given configuration
(single layer is configured with specific candidate, all other layer are at the minimal resource
utilization configuration) for the resource utilization measure of the respective row.
Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
Args:
target: The resource target for which the resource utilization is calculated (a RUTarget value).
Returns: A resource utilization matrix.
Returns:
A resource utilization matrix of shape (num memory elements, num configurations). Num memory elements
depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
"""
assert isinstance(target, RUTarget), f"{target} is not a valid resource target"

Expand All @@ -132,21 +134,14 @@ def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
for c, c_n in enumerate(configurable_sorted_nodes):
for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
if candidate_idx == self.min_ru_config[c]:
# skip ru computation for min configuration. Since we compute the difference from min_ru it'll
# always be 0 for all entries in the results vector.
candidate_rus = np.zeros(shape=self.min_ru[target].shape)
candidate_rus = self.min_ru[target]
else:
candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target) - self.min_ru[target]
candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)

ru_matrix.append(np.asarray(candidate_rus))

# We need to transpose the calculated ru matrix to allow later multiplication with
# the indicators' diagonal matrix.
# We only move the first axis (num of configurations) to be last,
# the remaining axes include the metric specific nodes (rows dimension of the new tensor)
# and the ru metric values (if they are non-scalars)
np_ru_matrix = np.array(ru_matrix)
return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
np_ru_matrix = np.array(ru_matrix) - self.min_ru[target] # num configurations X num elements
return np_ru_matrix.T

def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
"""
Expand All @@ -162,7 +157,6 @@ def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int,
"""
cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
# TODO compute for all targets at once. Currently the way up to add_set_of_ru_constraints is per target.
return self.ru_helper.compute_utilization({target}, cfg)[target]

@staticmethod
Expand All @@ -183,18 +177,6 @@ def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int
updated_cfg[idx] = value
return updated_cfg

def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
"""
Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the
resource utilization targets.
Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
"""
ru_metrics = self.ru_metrics - {RUTarget.BOPS}
ru = self.ru_helper.compute_utilization(ru_targets=ru_metrics, mp_cfg=None)
ru[RUTarget.BOPS] = None
return ru

def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
"""
Computes the resource utilization values for a given mixed-precision configuration.
Expand All @@ -206,7 +188,7 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource
with the given config.
"""
act_qcs, w_qcs = self.ru_helper.get_configurable_qcs(config)
act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
ru = self.ru_helper.ru_calculator.compute_resource_utilization(
target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
w_qcs=w_qcs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,20 +152,18 @@ def compute_resource_utilization(self,
elif w_qcs is not None: # pragma: no cover
raise ValueError('Weight configuration passed but no relevant metric requested.')

if act_qcs and not {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets): # pragma: no cover
raise ValueError('Activation configuration passed but no relevant metric requested.')
if RUTarget.ACTIVATION in ru_targets:
if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
elif act_qcs is not None: # pragma: no cover
raise ValueError('Activation configuration passed but no relevant metric requested.')

ru = ResourceUtilization()
if RUTarget.WEIGHTS in ru_targets:
ru.weights_memory = w_total
if RUTarget.ACTIVATION in ru_targets:
ru.activation_memory = a_total
if RUTarget.TOTAL in ru_targets:
# TODO use maxcut
act_tensors_total, *_ = self.compute_activation_tensors_utilization(target_criterion, bitwidth_mode, act_qcs)
ru.total_memory = w_total + act_tensors_total
ru.total_memory = w_total + a_total
if RUTarget.BOPS in ru_targets:
ru.bops, _ = self.compute_bops(target_criterion=target_criterion,
bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from typing import List, Set, Dict, Optional, Tuple
from typing import List, Set, Dict, Optional, Tuple, Any

import numpy as np

from model_compression_toolkit.core import FrameworkInfo
from model_compression_toolkit.core.common import Graph, BaseNode
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
RUTarget
Expand All @@ -44,9 +43,8 @@ def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImple
def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
"""
Compute utilization of requested targets for a specific configuration in the format expected by LP problem
formulation, namely an array of ru values corresponding to graph's configurable nodes in the topological order.
For activation target, the array contains values for activation cuts in unspecified order (as long as it is
consistent between configurations).
formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
(between calls).
Args:
ru_targets: resource utilization targets to compute.
Expand All @@ -57,33 +55,26 @@ def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[i
"""

ru = {}

act_qcs, w_qcs = self.get_configurable_qcs(mp_cfg) if mp_cfg else (None, None)
w_util = None
act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None)
if RUTarget.WEIGHTS in ru_targets:
w_util = self._weights_utilization(w_qcs)
ru[RUTarget.WEIGHTS] = np.array(list(w_util.values()))
wu = self._weights_utilization(w_qcs)
ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))

# TODO make mp agnostic to activation method
if RUTarget.ACTIVATION in ru_targets:
act_util = self._activation_maxcut_utilization(act_qcs)
ru[RUTarget.ACTIVATION] = np.array(list(act_util.values()))

# TODO use maxcut
if RUTarget.TOTAL in ru_targets:
act_tensors_util = self._activation_tensor_utilization(act_qcs)
w_util = w_util or self._weights_utilization(w_qcs)
total = {n: (w_util.get(n, 0), act_tensors_util.get(n, 0))
# for n in self.graph.nodes if n in act_tensors_util or n in w_util}
for n in self.graph.get_topo_sorted_nodes() if n in act_tensors_util or n in w_util}
ru[RUTarget.TOTAL] = np.array(list(total.values()))
au = self._activation_utilization(act_qcs)
ru[RUTarget.ACTIVATION] = np.array(list(au.values()))

if RUTarget.BOPS in ru_targets:
ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)

if RUTarget.TOTAL in ru_targets:
raise ValueError('Total target should be computed based on weights and activations targets.')

assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
f'Requested {ru_targets}')
return ru

def get_configurable_qcs(self, mp_cfg) \
def get_quantization_candidates(self, mp_cfg) \
-> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
"""
Retrieve quantization candidates objects for weights and activations from the configuration list.
Expand All @@ -92,15 +83,13 @@ def get_configurable_qcs(self, mp_cfg) \
mp_cfg: a list of candidates indices for configurable layers.
Returns:
Mapping between nodes to weights quantization config, and a mapping between nodes and activation
A mapping between nodes to weights quantization config, and a mapping between nodes and activation
quantization config.
"""
mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
act_qcs = {n: node_qcs[n].activation_quantization_cfg
for n in self.graph.get_activation_configurable_nodes()}
w_qcs = {n: node_qcs[n].weights_quantization_cfg
for n in self.graph.get_weights_configurable_nodes(self.fw_info)}
act_qcs = {n: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
w_qcs = {n: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
return act_qcs, w_qcs

def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
Expand All @@ -127,8 +116,8 @@ def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantiz
nodes_util = {n: u.bytes for n, u in nodes_util.items()}
return nodes_util

def _activation_maxcut_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
-> Optional[Dict[Cut, float]]:
def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
-> Optional[Dict[Any, float]]:
"""
Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
Expand All @@ -138,57 +127,34 @@ def _activation_maxcut_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeAc
Returns:
Activation utilization per cut, or empty dict if no configuration was passed.
"""
if act_qcs:
_, cuts_util, _ = self.ru_calculator.compute_cut_activation_utilization(TargetInclusionCriterion.AnyQuantized,
bitwidth_mode=BitwidthMode.QCustom,
act_qcs=act_qcs)
cuts_util = {c: u.bytes for c, u in cuts_util.items()}
return cuts_util

# Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
# configurable nodes.
return {}

def _activation_tensor_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]):
"""
Compute activation tensors utilization fo configurable nodes if configuration is passed or
for non-configurable nodes otherwise.
Args:
act_qcs: activation quantization configuration or None.
Returns:
Activation utilization per node.
"""
if act_qcs:
target_criterion = TargetInclusionCriterion.QConfigurable
bitwidth_mode = BitwidthMode.QCustom
else:
target_criterion = TargetInclusionCriterion.QNonConfigurable
bitwidth_mode = BitwidthMode.QDefaultSP

_, nodes_util = self.ru_calculator.compute_activation_tensors_utilization(target_criterion=target_criterion,
bitwidth_mode=bitwidth_mode,
act_qcs=act_qcs)
return {n: u.bytes for n, u in nodes_util.items()}

def _bops_utilization(self, mp_cfg: List[int]):
# Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
# covered by the computation of configurable activations.
if not act_qcs:
return {}

_, cuts_util, *_ = self.ru_calculator.compute_cut_activation_utilization(TargetInclusionCriterion.AnyQuantized,
bitwidth_mode=BitwidthMode.QCustom,
act_qcs=act_qcs)
cuts_util = {c: u.bytes for c, u in cuts_util.items()}
return cuts_util

def _bops_utilization(self, mp_cfg: List[int]) -> np.ndarray:
"""
Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
according to the given mixed-precision configuration of a virtual graph with composed nodes.
Args:
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
Returns: A vector of node's BOPS count.
Note that the vector is not necessarily of the same length as the given config.
Returns:
A vector of node's BOPS count.
"""
# TODO keeping old implementation for now

# BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
# for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
# bops is computed for all nodes, so non-configurable memory is already covered by the computation of
# configurable nodes
if not mp_cfg:
return np.array([])

# TODO keeping old implementation for now
virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]

mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)
Expand Down
Loading

0 comments on commit e718734

Please sign in to comment.