Source code for afe.core.quantization_aware_partitioning

#########################################################
# Copyright (C) 2022 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Ljubomir Papuga
#########################################################
import copy
import itertools

import numpy as np

from termcolor import colored
from typing import List, Dict

from sima_utils.data.data_generator import DataGenerator
from afe.core.configs import (
    ModelConfigs, OptimizationConfigs,
    QuantizationAwarePartitioningConfigs
)
from afe.core.calibrate_networks import calibrate_network
from afe.core.quantize_networks import quantize_network
from afe.core.evaluate_networks import GraphEvaluator
from afe.ir.debug import improve_quantized_net_performance
from afe.ir.defines import NodeName
from afe.ir.execute import execute_node, execute_node_quant
from afe.ir.net import AwesomeNet



[docs]
def select_quantization_aware_partition(fp32_net: AwesomeNet,
                                        model_config: ModelConfigs,
                                        opt_config: OptimizationConfigs,
                                        qap_configs: QuantizationAwarePartitioningConfigs,
                                        calibration_generator: DataGenerator,
                                        graph_evaluator: GraphEvaluator,
                                        ) -> List[str]:
    """
    Runs the quantization-aware partitioning of the input floating-point AwesomeNet.
    Selects the nodes that should be run in higher precision.

    Steps in QAP are as follows:
        - Analyze the performance of floating-point AwesomeNet for reference.
        - Calibrate the network.
        - Execute loop which quantizes the network, analyzes its performance and,
          if the performance is not sufficient, finds the node with the highest
          quantization error and fixes it to floating-point.

    :param fp32_net: AwesomeNet.
    :param model_config: ModelConfigs. Configuration parameters for model.
    :param opt_config: OptimizationConfigs. Configuration parameters used in
                       AwesomeNet calibration and quantization.
    :param qap_configs: QuantizationAwarePartitioningConfigs. Configuration parameters
                        for quantization-aware partitioning algorithm.
    :param calibration_generator: DataGenerator. Used to generate data used in calibration.
    :param graph_evaluator: GraphEvaluator. Holds objects used in graph evaluation.
    :return: List[str]. The list of nodes that should be set to floating point
             and executed on EV.
    """
    def _run_func(input_dict: [Dict[NodeName, np.ndarray]]) -> List[np.ndarray]:
        return fp32_net.run(input_dict, node_callable=execute_node)

    def _run_func_quant(input_dict: [Dict[NodeName, np.ndarray]]) -> List[np.ndarray]:
        return quantized_net.run(input_dict, node_callable=execute_node_quant)

    fp32_performance = graph_evaluator.evaluate(_run_func)
    assert fp32_performance > 0.0

    target_performance = qap_configs.performance_threshold.set_threshold(fp32_performance)

    summary = initialize_quantization_aware_partitioning_summary(model_config.name,
                                                                 fp32_performance,
                                                                 target_performance)

    calibrated_net = copy.deepcopy(fp32_net)
    calibrate_network(calibrated_net, opt_config, calibration_generator)

    quantized_net_performance = 0.0
    for _ in range(qap_configs.max_iterations):
        quantized_net = copy.deepcopy(calibrated_net)
        quantize_network(quantized_net, model_config, opt_config)

        quantized_net_performance = graph_evaluator.evaluate(_run_func_quant)

        if quantized_net_performance >= target_performance:
            break

        summary = add_performance_value_to_summary(quantized_net_performance, target_performance, summary)
        graph_analyzer_samples = \
            itertools.islice(graph_evaluator.input_generator, qap_configs.graph_analyzer_number_of_samples)
        fixed_node = improve_quantized_net_performance(calibrated_net, quantized_net, fp32_net,
                                                       graph_analyzer_samples,
                                                       qap_configs.graph_analyzer_mode,
                                                       qap_configs.graph_analyzer_metric)
        summary = add_fixed_node_info_to_summary(fixed_node, summary)

    summary = add_performance_value_to_summary(quantized_net_performance, target_performance, summary)

    finalize_and_print_summary(quantized_net_performance, target_performance, qap_configs.max_iterations, summary)

    return calibrated_net.float_node_list




[docs]
def initialize_quantization_aware_partitioning_summary(model_name: str,
                                                       fp32_performance: float,
                                                       target_performance: float) -> str:
    summary = colored("\n**************************************************\n", "yellow")
    summary += colored(f"QAP summary for model {model_name}\n", "yellow")
    summary += colored("**************************************************\n", "yellow")
    summary += colored("Floating-point model performance:\t", "yellow")
    summary += colored(f"{fp32_performance: >10.3f}\n", "cyan")
    summary += colored("Target performance:\t", "yellow")
    summary += colored(f"{target_performance: >26.3f}\n\n", "blue")
    summary += colored("--------------------------------------------------\n", "yellow")
    summary += colored("Quantized model performance:\t", "yellow")
    return summary




[docs]
def add_performance_value_to_summary(performance: float, target_performance: float, summary: str) -> str:
    color = "red" if performance < target_performance else "green"
    summary += colored(f"{performance: >18.3f}\n", color)
    return summary




[docs]
def add_fixed_node_info_to_summary(fixed_node: str, summary: str) -> str:
    summary += colored("\nFixed node ", "yellow") + \
               colored(f"{fixed_node}", "cyan") + \
               colored(" to floating point\n", "yellow")
    summary += colored("Quantized model performance:\t", "yellow")
    return summary




[docs]
def finalize_and_print_summary(achieved_performance: float, target_performance: float,
                               max_iterations: int, summary: str):
    if achieved_performance >= target_performance:
        summary += colored(f"\nQAP SUCCESS!\nAchieved performance of {achieved_performance: .3f} > "
                           f"{target_performance: .3f}\n", "green")
    else:
        summary += colored(f"\nQAP FAILURE.\nCould not achieve {target_performance: .3f} in "
                           f"{max_iterations} iterations.\n"
                           f"Final achieved performance is {achieved_performance: .3f}\n", "red")

    summary += colored("**************************************************\n", "yellow")
    print(summary)