Source code for afe.core.compile_networks

#########################################################
# Copyright (C) 2020 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Joey Chou
#########################################################
import hashlib
import tarfile
from dataclasses import dataclass

import numpy as np
import os
import subprocess
import shutil
import tempfile
from pathlib import Path

from sima_utils.logging.sima_logger import sima_log_dbg, UserFacingException

from afe._tvm._runtime import apply_batch_dimension
from afe._tvm._utils import deserialize_relay_irmodule
from afe.backends import BackendIR, Backend
from afe.backends.apu.tvm_apu_compiler import LibType, compile_to_arm
from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_operations import (
    L2CachingMode, MLACompilerConfig, TessellateParameters
)

from afe.core.configs import ModelConfigs, OptimizationConfigs
from afe.core.utils import save_files, dump_configs_to_yaml
from afe.ir.attributes import ExternalAttrs
from afe.ir.defines import Status
from afe.ir.net import AwesomeNet
from afe.ir.operations import ExternalOp
from afe.ir.sima_ir import SiMaIR
from mlc.test_util.test_context import CompilerConfig
from sima_utils.common import Platform



[docs]
def compile_network(net: AwesomeNet,
                    model_config: ModelConfigs,
                    opt_config: OptimizationConfigs,
                    enable_large_tensors: bool = True) -> None:
    """
    Compile the quantized AwesomeNet using run_l1_based_model.
    Generate MLC files for each layer and save to output_dir.
    Save the YAML, if the SIMA_AFE_SAVED_FILES environmental
    variable is set to `1`.

    This function is deprecated. Use translate_sub_awesome_net_to_modelgraph
    and compile_awesomenet to compile the AwesomeNet.

    Args:
        net: A quantized AwesomeNet.
        model_config: A ModelConfigs instance containing model related
            information and status.
        opt_config: Optimization configuration parameters
        enable_large_tensors: If true, the MLA will handle large tensors, otherwise
            large tensors will raise an exception
    """

    from mlc.compiler.model_graph.l1_based import run_l1_based_model
    from afe.backends.mla.afe_to_n2a_compiler.defines import PlaceholderName, ModelGraph
    from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_operations import create_modelgraph

    mlc_output_dir = model_config.output_directory + "/mlc"

    # Remove the mlc folder if existed
    shutil.rmtree(mlc_output_dir, ignore_errors=True)

    os.makedirs(mlc_output_dir, exist_ok=True)

    # Convert to n2a_compiler model graph
    network_vertex = create_modelgraph(net)

    # Create fake input_data
    placeholder_values = {}
    for name, shape in model_config.shape_dict_hwc.items():
        fake_data = np.ones((shape))
        placeholder_values[PlaceholderName(name)] = fake_data.astype(np.int8)

    # Compile and generate MLC file
    mgr = ModelGraph([network_vertex])
    config = CompilerConfig(mlc_output_dir)
    run_l1_based_model(config, mgr, placeholder_values, enable_large_tensors=enable_large_tensors)
    # Save the generated MLC files directory
    model_config.mlc_files = mlc_output_dir

    # Dump files
    if save_files():
        dump_configs_to_yaml(model_config, opt_config)




[docs]
def get_zip_file_path(output_dir: str, network_name: str) -> str:
    """
    Function that constructs the name of the tar.gz archive

    Args:
        output_dir: Path in which the archive should be created.
        network_name: Name of the model

    Returns:
        String that represents name of the archive.
    """

    return os.path.join(output_dir, network_name + '_mpk.tar.gz')




[docs]
def compute_checksum(file_path: str) -> str:
    """
    Compute the SHA-256 checksum of a file.

    Args:
        file_path: Path to the file.

    Returns:
        Hexadecimal checksum string.
    """
    sha256 = hashlib.sha256()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            sha256.update(chunk)
    return sha256.hexdigest()




[docs]
def compile_net_to_elf(net: AwesomeNet,
                       output_elf_path: str,
                       desired_batch_size: int = 1,
                       compress: bool = True,
                       tessellate_parameters: TessellateParameters | None = None,
                       compute_dcmp_ratio: bool = False,
                       enable_large_tensors: bool = True,
                       l2_caching_mode: L2CachingMode = L2CachingMode.NONE,
                       mlc_files_path: str | None = None,
                       do_pack: bool = True,
                       use_power_limits: bool = False,
                       max_power: float | None = None,
                       layer_norm_use_fp32_intermediates: bool = False,
                       rms_norm_use_fp32_intermediates: bool = False) -> tuple[int, float]:

    """
    Compile parts of a network to object code.  Use the Product Compiler for
    the MLA.  Use TVM for the APU.

    Args:
        net: an AwesomeNet.
        output_elf_path: Path in which output files should be created.
        desired_batch_size: The desired batch size of the input to the model.
            Compiler may use the smaller value, if it cannot support desired value.
            The value that is used is returned to the user as the first member of the
            returning Tuple value.
        compress: If True mlc file is compressed before generating .elf file.
        tessellate_parameters: Dictionary defining the tessellation parameters
            for inputs and outputs of the MLA segments.
        compute_dcmp_ratio: If True, function calculates and returns dcmp_ratio.
         Used only in get_performance_metrics.
        enable_large_tensors: If true, the MLA will handle large tensors, otherwise
            large tensors will raise an exception
        l2_caching_mode: Specifies mode of L2 caching in n2a compiler.
        mlc_files_path: Mlc files path. If provided .mlc files will be saved.
        do_pack: Whether to produce a tar.gz archive containing compiled files.  If True, produce
            an archive file that contains the compiled files.  If False, produce the compiled files.
        use_power_limits: If true, the compiler will schedule instructions to conform to power limits.
        max_power: Set to a positive float value to override default max power when power limits are used.
        layer_norm_use_fp32_intermediates: Use FP32 intermediate tensors in BF16 LayerNorm kernel.
        rms_norm_use_fp32_intermediates: Use FP32 intermediate tensors in BF16 RMSNorm kernel.

    Returns:
        Tuple[int, float] where the first value (int) represents the
        value of batch size used by compiler and the second value (float) represents
        data compression ratio. If compute_dcmp_ratio is True, function computes
        dcmp_ratio otherwise it returns 0f and this value should be ignored by user.
    """
    # Imports placed here to avoid loading Python packages from n2a_compiler repository
    # until compile_net is called.  This is a workaround for users' environment issue.
    # There should be a project building policy that deals with this issue comprehensively.
    from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_operations import (
        translate_sub_awesome_net_to_modelgraph
    )

    with tempfile.TemporaryDirectory() as tmpdirname:
        tmp_dir = os.path.join(tmpdirname, "performance_test_isim")
        os.makedirs(tmp_dir, exist_ok=True)

        # Transforming AwesomeNet to Backend IR of the compiler.
        backend_net = translate_sub_awesome_net_to_modelgraph(net)
        backend_config = BackendCompilerConfig(output_elf_path,
                                               tmp_dir,
                                               desired_batch_size=desired_batch_size,
                                               mla=MLACompilerConfig(tessellate_parameters=tessellate_parameters,
                                                                     enable_large_tensors=enable_large_tensors,
                                                                     l2_caching_mode=l2_caching_mode,
                                                                     platform_type=net.target,
                                                                     use_power_limits=use_power_limits,
                                                                     max_power=max_power,
                                                                     compress=compress,
                                                                     layer_norm_use_fp32_intermediates=layer_norm_use_fp32_intermediates,
                                                                     rms_norm_use_fp32_intermediates=rms_norm_use_fp32_intermediates))
        compiler_batch_size = compile_backend_code(backend_config, backend_net)

        # Postprocess MLC files and produce output files
        mlc_files = list(Path(tmp_dir).glob("**/*mla.mlc"))
        mlc_files += list(Path(tmp_dir).glob("**/*l2_cache.mlc"))

        if mlc_files_path:
            os.makedirs(mlc_files_path, exist_ok=True)

            for file_name in Path(tmp_dir).iterdir():
                if file_name.suffix in ('.mlc', '.elf'):
                    shutil.copy(file_name, mlc_files_path)

        if do_pack:
            zip_file_path = get_zip_file_path(output_elf_path, net.name)
            try:
                tar_context_manager = tarfile.open(zip_file_path, "w:gz")
            except OSError as e:
                raise UserFacingException("Unable to open file '{}' for writing".format(zip_file_path)) from e

            with tar_context_manager as f:
                for file in Path(tmp_dir).iterdir():
                    if file.suffix in ['.elf', '.so', '.yaml']:
                        f.add(file, arcname=file.name)
        else:
            for file in Path(tmp_dir).iterdir():
                if file.suffix in ['.elf', '.so', '.yaml']:
                    shutil.copy(file, output_elf_path)

    # FIXME return real dcmp_ratio
    return compiler_batch_size, 1.0




[docs]
class APUCompilerConfig:
    # This is a stub to be implemented later
    pass



@dataclass

[docs]
class BackendCompilerConfig:
    """
    Parameters controlling how to run backend compilers for a network.

    If optional backends are omitted, the graph being compiled must not have any nodes that use that backend.

    Attributes:
        output_dir: Path of directory where compiled files will be created.
        temp_dir: Path of directory where temporary files will be created.  The temporary directory may
            be deleted after compilation.  This path may be the same as output_dir.
        desired_batch_size: The AwesomeNet inputs' desired batch size to be used in compilation.
            Compilation will query the backends for the batch size that they can support for the entire
            AwesomeNet.  It will choose the largest supported batch size that is no larger than the
            desired batch size.
        mla: Configuration for MLA compiler
        apu: Configuration for APU compiler
    """

[docs]
    output_dir: str


[docs]
    temp_dir: str


[docs]
    desired_batch_size: int = 1


[docs]
    mla: MLACompilerConfig = MLACompilerConfig()


[docs]
    apu: APUCompilerConfig | None = None




def _make_lib_name(output_dir: str, name: str, filename_nonce: int) -> str:
    """
    Make a filename to use for creating a new ARM shared object file.

    Args:
        output_dir: Directory where file will be placed
        name: Name used as the beginning of the filename
        filename_nonce: Number appended to make a unique filename

    Returns:
        Filename to use for creating a new file
    """
    return os.path.join(output_dir, name + '_stage' + str(filename_nonce) + '_a65' + ".so")



[docs]
def compile_backend_code(config: BackendCompilerConfig, net: AwesomeNet) -> int:
    """
    Compile the nodes in an AwesomeNet that contain BackendIR.

    For the MLA backend, other parts of the model graph are modified to support changes in
    the code's behavior when it is compiled by the Production Compiler.

    Args:
        config: Parameters controlling how to run backends.
        net: Network whose backend code will be compiled.  The network is modified.

    Returns:
        The batch size of the compiled code.  It is equal to or smaller than the
        batch size in config.
    """
    from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_operations import compile_mla_code

    os.makedirs(config.output_dir, exist_ok=True)
    os.makedirs(config.temp_dir, exist_ok=True)

    # Run the MLA compiler separately over the entire graph because it can modify more than
    # just the MLA backend nodes.
    # The output files are temporary assembly files.
    batch_size = compile_mla_code(net, config.temp_dir, config.mla,
                                  desired_batch_size=config.desired_batch_size)

    # Set the batch size, as it will be used for compiling other nodes
    net.set_batch_size(batch_size)

    stage = 1
    for node_name in net.execution_order:
        node = net.nodes[node_name]

        if isinstance(node.ir, SiMaIR):
            if node.ir.backend == Backend.APU:
                # APU code is represented as ExternalOp.  Compile it and replace it by BackendIR.
                assert isinstance(node.ir.operation, ExternalOp)
                external_attrs = node.ir.attrs
                assert isinstance(external_attrs, ExternalAttrs)
                assert external_attrs.backend == Backend.APU

                tvm_irmodule = deserialize_relay_irmodule(external_attrs.irmod_str)
                tvm_irmodule = apply_batch_dimension(external_attrs.node_type.inputs, tvm_irmodule)

                # Compile to a shared object file
                filename = _make_lib_name(config.temp_dir, net.name, stage)

                object_file = compile_to_arm(tvm_irmodule, filename, LibType.shared_object)

                node.ir = BackendIR(object_file, node.ir.get_type(), Backend.APU, stage)
                stage += 1
        elif isinstance(node.ir, BackendIR):
            if node.ir.backend == Backend.MLA:
                # The node was handled in compile_mla_code
                stage += 1
            else:
                raise ValueError("Unexpected compiled code was found in network")

    # Status is updated when compiling MLA code
    assert net.status == Status.BACKEND_IR_COMPILED

    return batch_size