Source code for afe.core.graph_manager

#########################################################
# Copyright (C) 2021 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Ljubomir Papuga
#########################################################
import copy
import json
import os
import tvm
from typing import Dict, Optional, List, Union, Tuple

from sima_utils.data.data_generator import DataGenerator
import afe
from afe.backends import Backend
from afe.backends.apu import APUChecker
from afe.backends.ev import EVChecker
from afe.backends.mla import MLAChecker
from afe.backends.cpu import CPUChecker
from afe.common_utils import get_index_from_node_name
from afe.core.configs import ModelConfigs, AfeProcessingConfigs
from afe.core.calibrate_networks import calibrate_network
from afe.core.quantize_networks import quantize_network
from afe.core.evaluate_networks import GraphEvaluator
from afe.core.quantization_aware_partitioning import select_quantization_aware_partition
from afe.core.utils import dump_configs_to_yaml
from afe.ir.net import AwesomeNet, inline_ev_subgraphs
from afe.ir.serializer.api import save_awesomenet, load_awesomenet
from afe.apis._sanitize_errors import sanitize_afe_error, sanitize_tvm_error
import afe._tvm._defines as tvm_def
from afe._tvm._relay_transform import get_default_relay_transforms
from afe._tvm._transformer import GraphTransformer
from afe._tvm._tvm_graph_partition import PartitionIRModule, GreedyPartitioner, map_expressions_to_backend, \
    get_compile_mode_backend_checkers
from afe._tvm._utils import serialize_relay_irmodule_to_json, deserialize_relay_irmodule
from afe.tvm_converter import serialize_irmod
from afe.tvm_converter._converter import get_expression_to_name_map_from_irmodule, \
    translate_expr_to_name_map_to_awesomenet
from afe.tvm_converter.parameters import TVMConverterParams

# Suffixes used in creating file names for dumping the data.
[docs] IR_MODEL_FILE_SUFFIX: str = "_ir_model.json"
[docs] FP32_AWESOMENET_SUFFIX: str = "_fp32_awesomenet"
def _save_ir_module_to_file(mod: tvm_def.TVMIRModule, model_configs: ModelConfigs): """ Saves the Relay IRModule to a file. The file path and name are derived from model_configs argument. :param mod: tvm_def.TVMIRModule. A Relay IRModule to be saved to file. :param model_configs: ModelConfigs. Specifies the file path and module name. :return: None. """ json_data = serialize_relay_irmodule_to_json(mod) os.makedirs(model_configs.output_directory, exist_ok=True) filename = os.path.join(model_configs.output_directory, model_configs.name + IR_MODEL_FILE_SUFFIX) with open(filename, 'w') as fp: json.dump(json_data, fp)
[docs] def load_ir_module_from_file(model_configs: ModelConfigs) -> tvm_def.TVMIRModule: """ Loads the Relay IRModule from file. The file path and name are derived from model_configs argument. :param model_configs: ModelConfigs. Specifies the file path and the model name. :return: tvm_def.TVMIRModule. A Relay IRModule loaded from file. """ filename = os.path.join(model_configs.output_directory, model_configs.name + IR_MODEL_FILE_SUFFIX) try: fp = open(filename, 'r') json_data = json.load(fp) except Exception as err: err_msg = str(err) if len(err.args) < 1 else str(err.args[0]) raise ImportError(f"Got : {err.__class__.__name__}: \n{err_msg}\n" f"Unable to load IRModule {model_configs.name} " f"from file {filename}.") mod = deserialize_relay_irmodule(json_data) return mod
[docs] def load_awesome_net_from_file(model_configs: ModelConfigs) -> AwesomeNet: """ Loads the AwesomeNet from file. The file path and name are derived from model_configs argument. :param model_configs: ModelConfigs. Specifies the file path and the model name. :return: AwesomeNet. An AwesomeNet loaded from file. """ net = load_awesomenet(model_configs.name + FP32_AWESOMENET_SUFFIX, model_configs.output_directory) return net
[docs] def load_quantized_awesome_net_from_file(model_configs: ModelConfigs) -> AwesomeNet: """ Loads the quantized AwesomeNet from file. The file path and name are derived from model_configs argument. :param model_configs: ModelConfigs. Specifies the file path and the model name. :return: AwesomeNet. A quantized AwesomeNet loaded from file. """ net = load_awesomenet(model_configs.name + afe.QUANTIZED_POSTFIX, model_configs.output_directory) return net
[docs] def transform_ir_module(mod: tvm_def.TVMIRModule, transforms: List[tvm.transform.Pass] ) -> tvm_def.TVMIRModule: """ Transforms the Relay IRModule using the list of TVM Relay transforms. Note that the partitioning transformation step is done separately, due to a possibility that quantization-aware partitioning might be run beforehand. :param mod: tvm_def.TVMIRModule. A Relay IRModule that is being transformed. :param transforms: List[tvm.transform.Pass]. Defines the list of transform passes that will be applied by transformer. :return: tvm_def.TVMIRModule. Transformed Relay IRModule. """ # Initialize GraphTransformer. transformer = GraphTransformer(transforms) # Run transforms try: return transformer.transform(mod) except Exception as e: sanitize_tvm_error("Error detected in tvm transformations", e)
def _map_fixed_nodes_to_target_backend(float_node_list: List[str], target_backend: Backend = Backend.CPU ) -> Dict[int, Backend]: """ Map the nodes from float_node_list to be executed on target Backend. :param float_node_list: List[str]. List of node names for nodes that should be executed on target Backend. :param target_backend: Backend. Default is Backend.EV. The Backend on which the nodes should be executed. :return: Dict[int, Backend]: Dictionary containing mapping of node indices to target Backend. """ indices_to_backend_dict: Dict[int, Backend] = {} for node_name in float_node_list: node_idx = get_index_from_node_name(node_name) indices_to_backend_dict.update({node_idx: target_backend}) return indices_to_backend_dict
[docs] def run_quantization_aware_partition_selector(mod: tvm_def.TVMIRModule, configs: AfeProcessingConfigs, calibration_generator: DataGenerator, graph_evaluator: GraphEvaluator ) -> Dict[int, Backend]: """ Runs the quantization-aware partitioning of the input Relay IRModule. Selects the nodes that should be run in higher precision and updates the TransformerConfig's indices_to_backend_dict. Steps in QAP are as follows: 1. Map the expressions in TVM Relay IRModule to targeted backend. 2. Translate the input Relay IRModule to AwesomeNet taking into account the expressions to backend mapping. Expressions mapped to non-MLA backend are translated to AwesomeNodes consisting of ExternalOps. 3. Analyze the performance of floating-point AwesomeNet for reference. 4. Calibrate the network. 5. Execute loop which quantizes the network, analyzes its performance and, if the performance is not sufficient, finds the node with the highest quantization error and fixes it to floating-point. 6. Updates the indices_to_backend_dict with list of nodes fixed to floating point from step 5. :param mod: Input IRModule :param configs: AfeProcessingConfigs. Contains processing configuration information. :param calibration_generator: DataGenerator. Used to generate data used in calibration. :param graph_evaluator: GraphEvaluator. Used to perform graph evaluation. :return Dict[int, Backend]. Dictionary containing mapping of node indices to target Backend. """ assert not configs.model_configs.is_quantized, "Cannot run quantization-aware partitioning on a quantized model" # Use user-tagged indices to backend dict as a starting point. user_tagged_backend_dict = configs.transformer_configs.indices_to_backend_dict expr_to_backend_dict = \ map_expressions_to_backend(mod, partitioner=GreedyPartitioner([Backend.MLA, Backend.EV, Backend.APU, Backend.CPU]), backend_checkers=[MLAChecker, EVChecker, APUChecker, CPUChecker], index_to_backend_dict=user_tagged_backend_dict) converter_params = TVMConverterParams(requantization_mode=configs.transformer_configs.requantization_mode, target=configs.target) fp32_net = translate_irmod_to_sima_ir(mod, converter_params, expr_to_backend_dict=expr_to_backend_dict) float_node_list = select_quantization_aware_partition(fp32_net, configs.model_configs, configs.optimization_configs, configs.qap_configs, calibration_generator, graph_evaluator) return _map_fixed_nodes_to_target_backend(float_node_list)
[docs] def partition_ir_module(mod: tvm_def.TVMIRModule, configs: AfeProcessingConfigs, calibration_generator: Optional[DataGenerator], graph_evaluator: Optional[GraphEvaluator] ) -> tvm_def.TVMIRModule: """ Partition the Relay IRModule according to the given TransformerConfigs. The resulting Relay IRModule consists of a series of functions that contain subgraphs of operations, each being annotated on any one of the backends. :param mod: tvm_def.TVMIRModule. A Relay IRModule that is being partitioned. :param configs: AfeProcessingConfigs. Contains configuration information controlling the full end-to-end processing flow. :param calibration_generator: Optional[DataGenerator]. A DataGenerator used in calibration step. Needs to be provided if the quantization-aware partitioning is enabled. :param graph_evaluator: Optional[GraphEvaluator]. An object used in graph evaluation. Needs to be provided if quantization-aware partitioning is enabled. :return: tvm_def.TVMIRModule. A partitioned Relay IRModule. """ indices_to_backend_dict = copy.deepcopy(configs.transformer_configs.indices_to_backend_dict) if configs.transformer_configs.enable_quantization_based_partitioning: assert not configs.model_configs.is_quantized for arg in (configs.optimization_configs, calibration_generator, graph_evaluator): assert arg is not None fixed_indices_to_backend_dict = \ run_quantization_aware_partition_selector(mod, configs, calibration_generator, graph_evaluator) indices_to_backend_dict.update(fixed_indices_to_backend_dict) backend_checkers = get_compile_mode_backend_checkers(configs.transformer_configs.enabled_backends, configs.model_configs.is_quantized) backends: List[Backend] = [c.get_backend() for c in backend_checkers] partitioner = PartitionIRModule(backend_checkers=backend_checkers, partitioner=GreedyPartitioner(backends), index_to_backend_dict=indices_to_backend_dict) try: mod = partitioner(mod) except Exception as e: sanitize_tvm_error("Error detected in partitioning.", e) return mod
[docs] def translate_irmod_to_sima_ir(mod: tvm_def.TVMIRModule, params: TVMConverterParams, dump_to_files: bool = False, model_name: str | None = None, output_directory: str | None = None, expr_to_backend_dict: dict[tvm_def.TVMRelayExpr, list[Backend]] | None = None, *, output_labels: list[str] | None = None, model_path: str | None = None ) -> AwesomeNet: """ Converts a TVM IRModule to an AwesomeNet in the following steps: 1. Create a Relay expression to name map. The map contains graph metadata that can be used to translate to a different representation 2. Use the Relay expression to name map to create an AwesomeNet Args: mod: tvm_def.TVMIRModule. A Relay IRModule being translated. params: Parameters that affect how Relay IR is translated to SiMa IR. dump_to_files: bool. If set to True, resulting AwesomeNet will be saved to file. model_name: Optional[str]. Default is None. Name of the model used to generate file name. Needs to be provided if dump_to_files is set to True. output_directory: Optional[str]. Default is None. The path to the directory where files should be stored. Needs to be provided if dump_to_files is set to True. expr_to_backend_dict: Optional[Dict[tvm_def.TVMRelayExpr, List[Backend]]]. Default is None. If given, represents mapping of expressions to Backend on which the expression is to be executed. output_labels: Names of the network outputs. These names are only used in debugging output. model_path: Original model path. Returns: AwesomeNet. A SimaIR representation of a model. """ # Extract the Relay expression to name map into a RelayExprToNameMap expr_to_name_map = get_expression_to_name_map_from_irmodule(mod, visit_global_var=True) # Read precision annotations and determine how those annotations apply to each expression from afe.tvm_converter._converter import ( propagate_annotated_precision, extract_qdq_hint_annotations, propagate_annotated_sensitivity ) annotated_precision = propagate_annotated_precision(mod) qdq_annotations = extract_qdq_hint_annotations(mod) annotated_sensitivity = propagate_annotated_sensitivity(mod) # Translate each Relay expression in the expr_to_name_map to an AwesomeNode and # used them to create an AwesomeNet try: awesome_net = translate_expr_to_name_map_to_awesomenet(expr_to_name_map, mod, params, awesome_net_name=model_name, expr_to_backend_dict=expr_to_backend_dict, backend=Backend.NONE, output_labels=output_labels, model_path=model_path, annotated_precision=annotated_precision, qdq_annotation=qdq_annotations, annotated_sensitivity=annotated_sensitivity) except Exception as e: sanitize_afe_error("Translation from TVM IR to Awesomenet failed.", e) inline_ev_subgraphs(awesome_net) if dump_to_files: assert model_name is not None assert output_directory is not None os.makedirs(output_directory, exist_ok=True) save_awesomenet(awesome_net, model_name + FP32_AWESOMENET_SUFFIX, output_directory) return awesome_net
[docs] def transform_and_partition_ir_module(mod: tvm_def.TVMIRModule, configs: AfeProcessingConfigs, calibration_generator: Optional[DataGenerator] = None, graph_evaluator: Optional[GraphEvaluator] = None, dump_to_files: bool = False ) -> tvm_def.TVMIRModule: """ Transforms and partitions the Relay IRModule according to the given TransformerConfigs. :param mod: tvm_def.TVMIRModule. An input Relay IRModule. :param configs: AfeProcessingConfigs. Contains processing configuration information. :param calibration_generator: Optional[DataGenerator]. Default is None. DataGenerator used in calibration step. Needs to be provided if quantization-aware partitioning is enabled. :param graph_evaluator: Optional[GraphEvaluator]. Default is None. An object used in graph evaluation. Needs to be provided if quantization-aware partitioning is enabled. :param dump_to_files: bool. Default is False. If set to True, intermediate and output Relay IRModules will be saved to appropriate files. :return: """ transforms = get_default_relay_transforms(configs.model_configs.layout, configs.transformer_configs.convert_layout_method, enable_graph_partition=False, index_to_backend_dict=None, is_keras=configs.model_configs.framework == "keras", enabled_backends=configs.transformer_configs.enabled_backends, is_quantized=configs.model_configs.is_quantized) mod = transform_ir_module(mod, transforms) if configs.transformer_configs.enable_graph_partition: mod = partition_ir_module(mod, configs, calibration_generator, graph_evaluator) if dump_to_files: _save_ir_module_to_file(mod, configs.model_configs) # Save Relay IRModule to a Netron viewable file serialize_irmod(mod=mod, model_name=configs.model_configs.name, output_directory=configs.model_configs.output_directory, show_partition=True, is_quantized=configs.model_configs.is_quantized) return mod
[docs] def transform_irmod_to_awesomenet(mod: tvm_def.TVMIRModule, configs: AfeProcessingConfigs, calibration_generator: DataGenerator | None = None, graph_evaluator: GraphEvaluator | None = None, dump_to_files: bool = False, return_irmod: bool = False, *, output_labels: list[str] | None = None, model_path: str | None = None ) -> Union[AwesomeNet, Tuple[AwesomeNet, tvm_def.TVMIRModule]]: """ Transforms and partitions the Relay IRModule according to the given TransformerConfigs and translates it into a SimaIR AwesomeNet. Args: mod: tvm_def.TVMIRModule. An input Relay IRModule. configs: AfeProcessingConfigs. Contains processing configuration information. calibration_generator: Optional[DataGenerator]. Default is None. DataGenerator used in calibration step. Needs to be provided if quantization-aware partitioning is enabled. graph_evaluator: Optional[GraphEvaluator]. Default is None. An object used in graph evaluation. Needs to be provided if quantization-aware partitioning is enabled. dump_to_files: bool. Default is False. If set to True, configuration and intermediate model representations (Relay IRModule and AwesomeNets) will be saved to appropriate files. return_irmod: bool. Default is False. Whether to return the transformed TVM IRModule. output_labels: Names of the network outputs. These names are only used in debugging output. model_path: Original model path. Returns: A Tuple consisting of generated AwesomeNet and transformed TVM IRModule if return_irmod parameter is set to True, otherwise only generated AwesomeNet. """ mod = transform_and_partition_ir_module(mod, configs, calibration_generator, graph_evaluator, dump_to_files) converter_params = TVMConverterParams(is_quantized=configs.model_configs.is_quantized, requantization_mode=configs.transformer_configs.requantization_mode, target=configs.target) net = translate_irmod_to_sima_ir(mod, converter_params, dump_to_files, configs.model_configs.name, configs.model_configs.output_directory, output_labels=output_labels, model_path=model_path) return (net, mod) if return_irmod else net
[docs] def run_end_to_end_processing(mod: tvm_def.TVMIRModule, configs: AfeProcessingConfigs, calibration_generator: DataGenerator, graph_evaluator: Optional[GraphEvaluator], dump_to_files: bool = False ) -> AwesomeNet: """ Runs end-to-end processing of a Relay IRModule which consists of following steps: 1. Transform Relay IRModule according to information contained in TransformerConfigs that is a part of AfeProcessingConfigs. 2. Partition Relay IRModule according to information contained in TransformerConfigs that is a part of AfeProcessingConfigs. 3. Translate the Relay IRModule to SimaIR AwesomeNet. 4. Calibrate the SimaIR AwesomeNet. 5. Quantize the SimaIR AwesomeNet. :param mod: tvm_def.TVMIRModule. An input Relay IRModule. :param configs: AfeProcessingConfigs. Contains the configuration information which control the full end-to-end processing flow. :param calibration_generator: DataGenerator. Object used to generate data used in calibration. :param graph_evaluator: Optional[GraphEvaluator]. Default is None. Object used for graph evaluation. Needs to be provided if quantization-aware partitioning is enabled. :param dump_to_files: bool. Default is False. If set to True, all configuration and intermediate graph representations will be written to appropriate files. :return: AwesomeNet. Final result of processing which is partitioned, calibrated and quantized AwesomeNet. """ if dump_to_files: os.makedirs(configs.model_configs.output_directory, exist_ok=True) dump_configs_to_yaml(configs.model_configs, configs.optimization_configs) net = transform_irmod_to_awesomenet(mod, configs, calibration_generator, graph_evaluator, dump_to_files=dump_to_files) calibrate_network(net, configs.optimization_configs, calibration_generator) quantize_network(net, configs.model_configs, configs.optimization_configs) return net