Source code for afe.core.quantization_aware_partitioning

#########################################################
# Copyright (C) 2022 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Ljubomir Papuga
#########################################################
import copy
import itertools

import numpy as np

from termcolor import colored
from typing import List, Dict

from sima_utils.data.data_generator import DataGenerator
from afe.core.configs import (
    ModelConfigs, OptimizationConfigs,
    QuantizationAwarePartitioningConfigs
)
from afe.core.calibrate_networks import calibrate_network
from afe.core.quantize_networks import quantize_network
from afe.core.evaluate_networks import GraphEvaluator
from afe.ir.debug import improve_quantized_net_performance
from afe.ir.defines import NodeName
from afe.ir.execute import execute_node, execute_node_quant
from afe.ir.net import AwesomeNet


[docs] def select_quantization_aware_partition(fp32_net: AwesomeNet, model_config: ModelConfigs, opt_config: OptimizationConfigs, qap_configs: QuantizationAwarePartitioningConfigs, calibration_generator: DataGenerator, graph_evaluator: GraphEvaluator, ) -> List[str]: """ Runs the quantization-aware partitioning of the input floating-point AwesomeNet. Selects the nodes that should be run in higher precision. Steps in QAP are as follows: - Analyze the performance of floating-point AwesomeNet for reference. - Calibrate the network. - Execute loop which quantizes the network, analyzes its performance and, if the performance is not sufficient, finds the node with the highest quantization error and fixes it to floating-point. :param fp32_net: AwesomeNet. :param model_config: ModelConfigs. Configuration parameters for model. :param opt_config: OptimizationConfigs. Configuration parameters used in AwesomeNet calibration and quantization. :param qap_configs: QuantizationAwarePartitioningConfigs. Configuration parameters for quantization-aware partitioning algorithm. :param calibration_generator: DataGenerator. Used to generate data used in calibration. :param graph_evaluator: GraphEvaluator. Holds objects used in graph evaluation. :return: List[str]. The list of nodes that should be set to floating point and executed on EV. """ def _run_func(input_dict: [Dict[NodeName, np.ndarray]]) -> List[np.ndarray]: return fp32_net.run(input_dict, node_callable=execute_node) def _run_func_quant(input_dict: [Dict[NodeName, np.ndarray]]) -> List[np.ndarray]: return quantized_net.run(input_dict, node_callable=execute_node_quant) fp32_performance = graph_evaluator.evaluate(_run_func) assert fp32_performance > 0.0 target_performance = qap_configs.performance_threshold.set_threshold(fp32_performance) summary = initialize_quantization_aware_partitioning_summary(model_config.name, fp32_performance, target_performance) calibrated_net = copy.deepcopy(fp32_net) calibrate_network(calibrated_net, opt_config, calibration_generator) quantized_net_performance = 0.0 for _ in range(qap_configs.max_iterations): quantized_net = copy.deepcopy(calibrated_net) quantize_network(quantized_net, model_config, opt_config) quantized_net_performance = graph_evaluator.evaluate(_run_func_quant) if quantized_net_performance >= target_performance: break summary = add_performance_value_to_summary(quantized_net_performance, target_performance, summary) graph_analyzer_samples = \ itertools.islice(graph_evaluator.input_generator, qap_configs.graph_analyzer_number_of_samples) fixed_node = improve_quantized_net_performance(calibrated_net, quantized_net, fp32_net, graph_analyzer_samples, qap_configs.graph_analyzer_mode, qap_configs.graph_analyzer_metric) summary = add_fixed_node_info_to_summary(fixed_node, summary) summary = add_performance_value_to_summary(quantized_net_performance, target_performance, summary) finalize_and_print_summary(quantized_net_performance, target_performance, qap_configs.max_iterations, summary) return calibrated_net.float_node_list
[docs] def initialize_quantization_aware_partitioning_summary(model_name: str, fp32_performance: float, target_performance: float) -> str: summary = colored("\n**************************************************\n", "yellow") summary += colored(f"QAP summary for model {model_name}\n", "yellow") summary += colored("**************************************************\n", "yellow") summary += colored("Floating-point model performance:\t", "yellow") summary += colored(f"{fp32_performance: >10.3f}\n", "cyan") summary += colored("Target performance:\t", "yellow") summary += colored(f"{target_performance: >26.3f}\n\n", "blue") summary += colored("--------------------------------------------------\n", "yellow") summary += colored("Quantized model performance:\t", "yellow") return summary
[docs] def add_performance_value_to_summary(performance: float, target_performance: float, summary: str) -> str: color = "red" if performance < target_performance else "green" summary += colored(f"{performance: >18.3f}\n", color) return summary
[docs] def add_fixed_node_info_to_summary(fixed_node: str, summary: str) -> str: summary += colored("\nFixed node ", "yellow") + \ colored(f"{fixed_node}", "cyan") + \ colored(" to floating point\n", "yellow") summary += colored("Quantized model performance:\t", "yellow") return summary
[docs] def finalize_and_print_summary(achieved_performance: float, target_performance: float, max_iterations: int, summary: str): if achieved_performance >= target_performance: summary += colored(f"\nQAP SUCCESS!\nAchieved performance of {achieved_performance: .3f} > " f"{target_performance: .3f}\n", "green") else: summary += colored(f"\nQAP FAILURE.\nCould not achieve {target_performance: .3f} in " f"{max_iterations} iterations.\n" f"Final achieved performance is {achieved_performance: .3f}\n", "red") summary += colored("**************************************************\n", "yellow") print(summary)