Source code for afe.core.compile_networks

#########################################################
# Copyright (C) 2020 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Joey Chou
#########################################################
import hashlib
import tarfile
from dataclasses import dataclass

import numpy as np
import os
import subprocess
import shutil
import tempfile
from pathlib import Path

from sima_utils.logging.sima_logger import sima_log_dbg, UserFacingException

from afe._tvm._runtime import apply_batch_dimension
from afe._tvm._utils import deserialize_relay_irmodule
from afe.backends import BackendIR, Backend
from afe.backends.apu.tvm_apu_compiler import LibType, compile_to_arm
from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_operations import (
    L2CachingMode, MLACompilerConfig, TessellateParameters
)

from afe.core.configs import ModelConfigs, OptimizationConfigs
from afe.core.utils import save_files, dump_configs_to_yaml
from afe.ir.attributes import ExternalAttrs
from afe.ir.defines import Status
from afe.ir.net import AwesomeNet
from afe.ir.operations import ExternalOp
from afe.ir.sima_ir import SiMaIR
from mlc.test_util.test_context import CompilerConfig
from sima_utils.common import Platform


[docs] def compile_network(net: AwesomeNet, model_config: ModelConfigs, opt_config: OptimizationConfigs, enable_large_tensors: bool = True) -> None: """ Compile the quantized AwesomeNet using run_l1_based_model. Generate MLC files for each layer and save to output_dir. Save the YAML, if the SIMA_AFE_SAVED_FILES environmental variable is set to `1`. This function is deprecated. Use translate_sub_awesome_net_to_modelgraph and compile_awesomenet to compile the AwesomeNet. Args: net: A quantized AwesomeNet. model_config: A ModelConfigs instance containing model related information and status. opt_config: Optimization configuration parameters enable_large_tensors: If true, the MLA will handle large tensors, otherwise large tensors will raise an exception """ from mlc.compiler.model_graph.l1_based import run_l1_based_model from afe.backends.mla.afe_to_n2a_compiler.defines import PlaceholderName, ModelGraph from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_operations import create_modelgraph mlc_output_dir = model_config.output_directory + "/mlc" # Remove the mlc folder if existed shutil.rmtree(mlc_output_dir, ignore_errors=True) os.makedirs(mlc_output_dir, exist_ok=True) # Convert to n2a_compiler model graph network_vertex = create_modelgraph(net) # Create fake input_data placeholder_values = {} for name, shape in model_config.shape_dict_hwc.items(): fake_data = np.ones((shape)) placeholder_values[PlaceholderName(name)] = fake_data.astype(np.int8) # Compile and generate MLC file mgr = ModelGraph([network_vertex]) config = CompilerConfig(mlc_output_dir) run_l1_based_model(config, mgr, placeholder_values, enable_large_tensors=enable_large_tensors) # Save the generated MLC files directory model_config.mlc_files = mlc_output_dir # Dump files if save_files(): dump_configs_to_yaml(model_config, opt_config)
[docs] def get_zip_file_path(output_dir: str, network_name: str) -> str: """ Function that constructs the name of the tar.gz archive Args: output_dir: Path in which the archive should be created. network_name: Name of the model Returns: String that represents name of the archive. """ return os.path.join(output_dir, network_name + '_mpk.tar.gz')
[docs] def compute_checksum(file_path: str) -> str: """ Compute the SHA-256 checksum of a file. Args: file_path: Path to the file. Returns: Hexadecimal checksum string. """ sha256 = hashlib.sha256() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): sha256.update(chunk) return sha256.hexdigest()
[docs] def compile_net_to_elf(net: AwesomeNet, output_elf_path: str, desired_batch_size: int = 1, compress: bool = True, tessellate_parameters: TessellateParameters | None = None, compute_dcmp_ratio: bool = False, enable_large_tensors: bool = True, l2_caching_mode: L2CachingMode = L2CachingMode.NONE, mlc_files_path: str | None = None, do_pack: bool = True, use_power_limits: bool = False, max_power: float | None = None, layer_norm_use_fp32_intermediates: bool = False, rms_norm_use_fp32_intermediates: bool = False) -> tuple[int, float]: """ Compile parts of a network to object code. Use the Product Compiler for the MLA. Use TVM for the APU. Args: net: an AwesomeNet. output_elf_path: Path in which output files should be created. desired_batch_size: The desired batch size of the input to the model. Compiler may use the smaller value, if it cannot support desired value. The value that is used is returned to the user as the first member of the returning Tuple value. compress: If True mlc file is compressed before generating .elf file. tessellate_parameters: Dictionary defining the tessellation parameters for inputs and outputs of the MLA segments. compute_dcmp_ratio: If True, function calculates and returns dcmp_ratio. Used only in get_performance_metrics. enable_large_tensors: If true, the MLA will handle large tensors, otherwise large tensors will raise an exception l2_caching_mode: Specifies mode of L2 caching in n2a compiler. mlc_files_path: Mlc files path. If provided .mlc files will be saved. do_pack: Whether to produce a tar.gz archive containing compiled files. If True, produce an archive file that contains the compiled files. If False, produce the compiled files. use_power_limits: If true, the compiler will schedule instructions to conform to power limits. max_power: Set to a positive float value to override default max power when power limits are used. layer_norm_use_fp32_intermediates: Use FP32 intermediate tensors in BF16 LayerNorm kernel. rms_norm_use_fp32_intermediates: Use FP32 intermediate tensors in BF16 RMSNorm kernel. Returns: Tuple[int, float] where the first value (int) represents the value of batch size used by compiler and the second value (float) represents data compression ratio. If compute_dcmp_ratio is True, function computes dcmp_ratio otherwise it returns 0f and this value should be ignored by user. """ # Imports placed here to avoid loading Python packages from n2a_compiler repository # until compile_net is called. This is a workaround for users' environment issue. # There should be a project building policy that deals with this issue comprehensively. from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_operations import ( translate_sub_awesome_net_to_modelgraph ) with tempfile.TemporaryDirectory() as tmpdirname: tmp_dir = os.path.join(tmpdirname, "performance_test_isim") os.makedirs(tmp_dir, exist_ok=True) # Transforming AwesomeNet to Backend IR of the compiler. backend_net = translate_sub_awesome_net_to_modelgraph(net) backend_config = BackendCompilerConfig(output_elf_path, tmp_dir, desired_batch_size=desired_batch_size, mla=MLACompilerConfig(tessellate_parameters=tessellate_parameters, enable_large_tensors=enable_large_tensors, l2_caching_mode=l2_caching_mode, platform_type=net.target, use_power_limits=use_power_limits, max_power=max_power, compress=compress, layer_norm_use_fp32_intermediates=layer_norm_use_fp32_intermediates, rms_norm_use_fp32_intermediates=rms_norm_use_fp32_intermediates)) compiler_batch_size = compile_backend_code(backend_config, backend_net) # Postprocess MLC files and produce output files mlc_files = list(Path(tmp_dir).glob("**/*mla.mlc")) mlc_files += list(Path(tmp_dir).glob("**/*l2_cache.mlc")) if mlc_files_path: os.makedirs(mlc_files_path, exist_ok=True) for file_name in Path(tmp_dir).iterdir(): if file_name.suffix in ('.mlc', '.elf'): shutil.copy(file_name, mlc_files_path) if do_pack: zip_file_path = get_zip_file_path(output_elf_path, net.name) try: tar_context_manager = tarfile.open(zip_file_path, "w:gz") except OSError as e: raise UserFacingException("Unable to open file '{}' for writing".format(zip_file_path)) from e with tar_context_manager as f: for file in Path(tmp_dir).iterdir(): if file.suffix in ['.elf', '.so', '.yaml']: f.add(file, arcname=file.name) else: for file in Path(tmp_dir).iterdir(): if file.suffix in ['.elf', '.so', '.yaml']: shutil.copy(file, output_elf_path) # FIXME return real dcmp_ratio return compiler_batch_size, 1.0
[docs] class APUCompilerConfig: # This is a stub to be implemented later pass
@dataclass
[docs] class BackendCompilerConfig: """ Parameters controlling how to run backend compilers for a network. If optional backends are omitted, the graph being compiled must not have any nodes that use that backend. Attributes: output_dir: Path of directory where compiled files will be created. temp_dir: Path of directory where temporary files will be created. The temporary directory may be deleted after compilation. This path may be the same as output_dir. desired_batch_size: The AwesomeNet inputs' desired batch size to be used in compilation. Compilation will query the backends for the batch size that they can support for the entire AwesomeNet. It will choose the largest supported batch size that is no larger than the desired batch size. mla: Configuration for MLA compiler apu: Configuration for APU compiler """
[docs] output_dir: str
[docs] temp_dir: str
[docs] desired_batch_size: int = 1
[docs] mla: MLACompilerConfig = MLACompilerConfig()
[docs] apu: APUCompilerConfig | None = None
def _make_lib_name(output_dir: str, name: str, filename_nonce: int) -> str: """ Make a filename to use for creating a new ARM shared object file. Args: output_dir: Directory where file will be placed name: Name used as the beginning of the filename filename_nonce: Number appended to make a unique filename Returns: Filename to use for creating a new file """ return os.path.join(output_dir, name + '_stage' + str(filename_nonce) + '_a65' + ".so")
[docs] def compile_backend_code(config: BackendCompilerConfig, net: AwesomeNet) -> int: """ Compile the nodes in an AwesomeNet that contain BackendIR. For the MLA backend, other parts of the model graph are modified to support changes in the code's behavior when it is compiled by the Production Compiler. Args: config: Parameters controlling how to run backends. net: Network whose backend code will be compiled. The network is modified. Returns: The batch size of the compiled code. It is equal to or smaller than the batch size in config. """ from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_operations import compile_mla_code os.makedirs(config.output_dir, exist_ok=True) os.makedirs(config.temp_dir, exist_ok=True) # Run the MLA compiler separately over the entire graph because it can modify more than # just the MLA backend nodes. # The output files are temporary assembly files. batch_size = compile_mla_code(net, config.temp_dir, config.mla, desired_batch_size=config.desired_batch_size) # Set the batch size, as it will be used for compiling other nodes net.set_batch_size(batch_size) stage = 1 for node_name in net.execution_order: node = net.nodes[node_name] if isinstance(node.ir, SiMaIR): if node.ir.backend == Backend.APU: # APU code is represented as ExternalOp. Compile it and replace it by BackendIR. assert isinstance(node.ir.operation, ExternalOp) external_attrs = node.ir.attrs assert isinstance(external_attrs, ExternalAttrs) assert external_attrs.backend == Backend.APU tvm_irmodule = deserialize_relay_irmodule(external_attrs.irmod_str) tvm_irmodule = apply_batch_dimension(external_attrs.node_type.inputs, tvm_irmodule) # Compile to a shared object file filename = _make_lib_name(config.temp_dir, net.name, stage) object_file = compile_to_arm(tvm_irmodule, filename, LibType.shared_object) node.ir = BackendIR(object_file, node.ir.get_type(), Backend.APU, stage) stage += 1 elif isinstance(node.ir, BackendIR): if node.ir.backend == Backend.MLA: # The node was handled in compile_mla_code stage += 1 else: raise ValueError("Unexpected compiled code was found in network") # Status is updated when compiling MLA code assert net.status == Status.BACKEND_IR_COMPILED return batch_size