#########################################################
# Copyright (C) 2022 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Ljubomir Papuga
#########################################################
import copy
import itertools
import numpy as np
from termcolor import colored
from typing import List, Dict
from sima_utils.data.data_generator import DataGenerator
from afe.core.configs import (
ModelConfigs, OptimizationConfigs,
QuantizationAwarePartitioningConfigs
)
from afe.core.calibrate_networks import calibrate_network
from afe.core.quantize_networks import quantize_network
from afe.core.evaluate_networks import GraphEvaluator
from afe.ir.debug import improve_quantized_net_performance
from afe.ir.defines import NodeName
from afe.ir.execute import execute_node, execute_node_quant
from afe.ir.net import AwesomeNet
[docs]
def select_quantization_aware_partition(fp32_net: AwesomeNet,
model_config: ModelConfigs,
opt_config: OptimizationConfigs,
qap_configs: QuantizationAwarePartitioningConfigs,
calibration_generator: DataGenerator,
graph_evaluator: GraphEvaluator,
) -> List[str]:
"""
Runs the quantization-aware partitioning of the input floating-point AwesomeNet.
Selects the nodes that should be run in higher precision.
Steps in QAP are as follows:
- Analyze the performance of floating-point AwesomeNet for reference.
- Calibrate the network.
- Execute loop which quantizes the network, analyzes its performance and,
if the performance is not sufficient, finds the node with the highest
quantization error and fixes it to floating-point.
:param fp32_net: AwesomeNet.
:param model_config: ModelConfigs. Configuration parameters for model.
:param opt_config: OptimizationConfigs. Configuration parameters used in
AwesomeNet calibration and quantization.
:param qap_configs: QuantizationAwarePartitioningConfigs. Configuration parameters
for quantization-aware partitioning algorithm.
:param calibration_generator: DataGenerator. Used to generate data used in calibration.
:param graph_evaluator: GraphEvaluator. Holds objects used in graph evaluation.
:return: List[str]. The list of nodes that should be set to floating point
and executed on EV.
"""
def _run_func(input_dict: [Dict[NodeName, np.ndarray]]) -> List[np.ndarray]:
return fp32_net.run(input_dict, node_callable=execute_node)
def _run_func_quant(input_dict: [Dict[NodeName, np.ndarray]]) -> List[np.ndarray]:
return quantized_net.run(input_dict, node_callable=execute_node_quant)
fp32_performance = graph_evaluator.evaluate(_run_func)
assert fp32_performance > 0.0
target_performance = qap_configs.performance_threshold.set_threshold(fp32_performance)
summary = initialize_quantization_aware_partitioning_summary(model_config.name,
fp32_performance,
target_performance)
calibrated_net = copy.deepcopy(fp32_net)
calibrate_network(calibrated_net, opt_config, calibration_generator)
quantized_net_performance = 0.0
for _ in range(qap_configs.max_iterations):
quantized_net = copy.deepcopy(calibrated_net)
quantize_network(quantized_net, model_config, opt_config)
quantized_net_performance = graph_evaluator.evaluate(_run_func_quant)
if quantized_net_performance >= target_performance:
break
summary = add_performance_value_to_summary(quantized_net_performance, target_performance, summary)
graph_analyzer_samples = \
itertools.islice(graph_evaluator.input_generator, qap_configs.graph_analyzer_number_of_samples)
fixed_node = improve_quantized_net_performance(calibrated_net, quantized_net, fp32_net,
graph_analyzer_samples,
qap_configs.graph_analyzer_mode,
qap_configs.graph_analyzer_metric)
summary = add_fixed_node_info_to_summary(fixed_node, summary)
summary = add_performance_value_to_summary(quantized_net_performance, target_performance, summary)
finalize_and_print_summary(quantized_net_performance, target_performance, qap_configs.max_iterations, summary)
return calibrated_net.float_node_list
[docs]
def initialize_quantization_aware_partitioning_summary(model_name: str,
fp32_performance: float,
target_performance: float) -> str:
summary = colored("\n**************************************************\n", "yellow")
summary += colored(f"QAP summary for model {model_name}\n", "yellow")
summary += colored("**************************************************\n", "yellow")
summary += colored("Floating-point model performance:\t", "yellow")
summary += colored(f"{fp32_performance: >10.3f}\n", "cyan")
summary += colored("Target performance:\t", "yellow")
summary += colored(f"{target_performance: >26.3f}\n\n", "blue")
summary += colored("--------------------------------------------------\n", "yellow")
summary += colored("Quantized model performance:\t", "yellow")
return summary
[docs]
def add_fixed_node_info_to_summary(fixed_node: str, summary: str) -> str:
summary += colored("\nFixed node ", "yellow") + \
colored(f"{fixed_node}", "cyan") + \
colored(" to floating point\n", "yellow")
summary += colored("Quantized model performance:\t", "yellow")
return summary
[docs]
def finalize_and_print_summary(achieved_performance: float, target_performance: float,
max_iterations: int, summary: str):
if achieved_performance >= target_performance:
summary += colored(f"\nQAP SUCCESS!\nAchieved performance of {achieved_performance: .3f} > "
f"{target_performance: .3f}\n", "green")
else:
summary += colored(f"\nQAP FAILURE.\nCould not achieve {target_performance: .3f} in "
f"{max_iterations} iterations.\n"
f"Final achieved performance is {achieved_performance: .3f}\n", "red")
summary += colored("**************************************************\n", "yellow")
print(summary)