Source code for afe.backends.mla.afe_to_n2a_compiler.n2a_backend_runner

#########################################################
# Copyright (C) 2022 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Ljubomir Papuga
#########################################################
"""
Procedures to execute MLA nodes by using n2a_compiler backend.
"""
import numpy as np
import os
from collections import defaultdict
from enum import Enum, Flag, auto
from pathlib import Path
from typing import Dict, Union, Tuple, List, Optional, Any, Callable

from afe.backends.backends import BackendIR, Backend
from afe.backends.mla.afe_to_n2a_compiler.defines import (
    ModelGraph, PackParameters, SimRunner, TensorTessellateParameters, bfloat16,
    evaluate_model
)
from afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_utils import make_mlc_file_name
from afe.backends.mla.afe_to_n2a_compiler.create_ofm_chk_mlc import get_ofm_executable_script
import afe.ir.attributes as _afe_attrs
from afe.ir.defines import InputName, Status, get_expected_tensor_value
from afe.ir.tensor_type import NodeType, TensorType, scalar_type_from_dtype
from afe.ir.execute import execute_node_quant
from afe.ir.net import AwesomeNet
from afe.ir.node import AwesomeNode, NodeName
from afe.ir.operation_functions import (
    calculate_tessellated_tensor_shape, get_channel_aligned_shape, get_mla_padded_2d_shape, pack,
    reshape_from_mla_padded_2d_shape, reshape_to_mla_padded_2d_shape
)
from afe.ir.sima_ir import SiMaIRTensorTypes, SiMaIR
from ev_transforms.transforms import (
    detessellation as ev_detessellation, tessellation as ev_tessellation, unpack as ev_unpack
)
from mlc.test_util.test_context import CompilerConfig
from sima_utils.common import Platform
from sima_utils.logging.sima_logger import sima_log_dbg, sima_log_info
from devkit_inference_models.apis.pipeline import Pipeline


# Type of a callback to handel simulation errors
[docs] SimFailureReporter = Callable[[str], None]
[docs] class CompilationMode(Enum):
[docs] L1_BASED = 'l1_based'
@staticmethod
[docs] def from_str(mode): assert mode == 'l1_based' return CompilationMode.L1_BASED
[docs] class RunMode(Flag): """ Enum that is used to specify desired run mode. User can run AwesomeNet using n2a_compiler's evaluate model, simulator or directly on the devkit in accelerator mode. """
[docs] N2A_COMPILER = auto()
[docs] SIMULATOR = auto()
[docs] ACCELERATOR = auto()
[docs] class EvaluateTaskType(Enum): # Run evaluate in the same process, wait for its output.
[docs] SAME_PROCESS = auto()
# Run evaluate in a forked process, concurrently with mlc code generation. # Fork copies the memory of the main process, may run out of memory for large models.
[docs] FORK_PROCESS = auto()
# Export model graph to JSON, import and evaluate model in the new process. # Uses less memory than fork, but introduces export / import to JSON.
[docs] EXT_CMD = auto()
def _get_node_inputs(node: AwesomeNode, inputs: Dict[InputName, np.ndarray], batch_size: int) \ -> Dict[InputName, np.ndarray]: assert isinstance(node.ir, BackendIR), \ f"Expect node.ir to be a BackendIR. Got {type(node.ir)}" assert node.ir.backend == Backend.MLA, \ f"Cannot use the MLA backend to run code for {node.ir.backend}" new_inputs = {} for name in inputs.keys(): new_input = inputs[name] # Add a fake batch dimension by replicating the data assert new_input.dtype in (np.int8, np.int16, bfloat16), \ "Input to compiled MLA code must have type int8, int16, or bfloat16." if new_input.ndim > 2: new_input = np.array([new_input[0]] * batch_size) elif new_input.ndim == 2: new_input = np.array([new_input] * batch_size) new_inputs[name] = new_input return new_inputs def _convert_outputs_to_nhwc(outputs: List[np.ndarray], layout: str): if layout != "NC": for i in range(len(outputs)): outputs[i] = np.expand_dims(outputs[i][0], axis=0) else: for i in range(len(outputs)): outputs[i] = outputs[i][0] # Attributes of transform operators that are used at the input or output of MLA code _MLA_IO_TRANFORMS = (_afe_attrs.TessellationTransformAttrs, _afe_attrs.DetessellationTransformAttrs, _afe_attrs.PackTransformAttrs, _afe_attrs.UnpackTransformAttrs, _afe_attrs.TupleGetItemAttrs)
[docs] class N2ABackendRunner: """ An instance used to execute an AwesomeNet that contains sub-graphs that belong to SiMa's backends. A SiMa backend sub-graph is an IR that can be compiled by SiMa compilers. Compilation of the SiMa backend IR is done via compile_awesomenet function. N2ABackendRunner should be used for AwesomeNets that have not yet been compiled. To execute AwesomeNet that is already compiled, use N2ABackendSimulator A SiMa backend sub-graph will be wrapped by a BackendIR object and able to be executed with any frontend IRs. :param compilation_mode: Enum. * If L1_BASED, will use evaluate_model to use the numpy implementation in the compiler to run inference. Will use simulator API to run the precompiled model on ISIM. :param out_dir: str. Output directory to keep the generated files. Should contain the .mlc files that are generated during the compilation step. """ def __init__(self, compilation_mode: CompilationMode = CompilationMode.L1_BASED, layout: str = "", batch_size: int = 1): assert compilation_mode == CompilationMode.L1_BASED, \ f"Unsupported Compilation mode {compilation_mode}."
[docs] self.compilation_mode = compilation_mode
[docs] self.layout = layout
[docs] self.batch_size = batch_size
[docs] def execute_backend_ir_node(self, node: AwesomeNode, inputs: Dict[InputName, SiMaIRTensorTypes], node_outputs: Dict[NodeName, Union[np.ndarray, Tuple[np.ndarray, ...]]]): # Interpret the ModelGraph to compute the outputs placeholder_values = _get_node_inputs(node, inputs, self.batch_size) backend_outputs = evaluate_model(node.ir.graph, placeholder_values) # HWC to NHWC to match AwesomeNet output format _convert_outputs_to_nhwc(backend_outputs, self.layout) # Return a tuple of np.ndarray if there are multiple outputs. Otherwise, return # a single np.ndarray outputs = tuple(backend_outputs) if len(backend_outputs) > 1 else backend_outputs[0] node_outputs[node.name] = outputs
[docs] def execute_sima_ir_node(self, node: AwesomeNode, inputs: Dict[InputName, SiMaIRTensorTypes], node_outputs: Dict[NodeName, Union[np.ndarray, Tuple[np.ndarray, ...]]]): execute_node_quant(node, inputs, node_outputs)
[docs] def execute_node(self, node: AwesomeNode, inputs: Dict[InputName, SiMaIRTensorTypes], node_outputs: Dict[NodeName, Union[np.ndarray, Tuple[np.ndarray, ...]]]): if isinstance(node.ir, BackendIR): self.execute_backend_ir_node(node, inputs, node_outputs) elif isinstance(node.ir, SiMaIR): self.execute_sima_ir_node(node, inputs, node_outputs) else: raise TypeError(f"Unsupported sima.ir type: {type(node.ir)}")
def _get_mla_input_shapes_and_types( node: AwesomeNode, batch_size: int ) -> tuple[dict[str, list[tuple[int, ...]]], dict[str, list[tuple[int, ...]]], dict[str, list[type]]]: """ Helper function for getting mla input data shapes and types. """ untessellated_data_shapes = defaultdict(list) data_types = defaultdict(list) tessellated_data_shapes = defaultdict(list) for section_base_name, pack_input_list in ( node.ir.graph.compile_properties.pack_parameters.items() ): for input_id, tessellate_param in pack_input_list: # Get the untessellated data shape and type. mg_tensor_shape = node.ir.graph.inputs[input_id].tensor_shape data_type = mg_tensor_shape.dtype.np_dtype tensor_shape = (batch_size, *mg_tensor_shape.shape) data_types[section_base_name].append(data_type) untessellated_data_shapes[section_base_name].append(tensor_shape) # Calculate the tessellated data shape. if tessellate_param.enable_mla: tessellated_data_shape = get_mla_padded_2d_shape( tensor_shape, elem_size=np.dtype(data_type).itemsize ) else: tensor_type = TensorType( scalar=scalar_type_from_dtype(np.dtype(data_type).name), shape=tensor_shape ) tessellated_data_shape = calculate_tessellated_tensor_shape( tensor_type, tessellate_param.tile_shape, tessellate_param.dram_layout.align_c16 ) tessellated_data_shapes[section_base_name].append(tessellated_data_shape) return tessellated_data_shapes, untessellated_data_shapes, data_types
[docs] class N2ACompiledBackendRunner(N2ABackendRunner): """ An instance used to execute an AwesomeNet that contains sub-graphs that belong to SiMa's backends. A SiMa backend sub-graph is an IR that is already compiled, by SiMa compilers. Compilation of the SiMa backend IR is done prior to execution via compile_awesomenet function. Files that are generated during compilation step are generated in the directory that is given as an argument in the N2ABackendSimulator ctor. A SiMa backend sub-graph will is wrapped by a BackendIR object. During compilation step, nodes for tessellation/detessellation and packing/unpacking are inserted prior and before each MLA segment. For execution of the SiMa backend sub-graphs, n2a_compiler's evaluate model is used to save on processing time. For execution of SiMa backend subgraphs using simulator, use N2ABackendSimulator. :param compilation_mode: Enum. Only CompilationMode.L1_BASED is supported at this point. * If L1_BASED, will use evaluate_model to use the numpy implementation in the compiler to run inference. Will use simulator API to run the precompiled model on ISIM. :param out_dir: str. Output directory to keep the generated files. Should contain the .mlc files that are generated during the compilation step. :param layout: str. Data layout of the model. :param batch_size: Input data batch size. Default is 1. Only batch size of 1 is supported at this point. :param model_name: str. Name of the model. Should correspond to the model name that is used during compile_awesomenet step. """ def __init__(self, compilation_mode: CompilationMode = CompilationMode.L1_BASED, out_dir: str = "", layout: str = "", batch_size: int = 1, model_name: str = "", file_name_postfix: str = ""): super().__init__(compilation_mode, layout, batch_size) Path(out_dir).mkdir(parents=True, exist_ok=True)
[docs] self.out_dir = out_dir
[docs] self.model_name = model_name
[docs] self.file_name_postfix = file_name_postfix
def _get_untessellated_inputs_for_node( self, node: AwesomeNode, inputs: Dict[InputName, SiMaIRTensorTypes] ) -> dict[NodeName, SiMaIRTensorTypes]: """ Get dictionary of untessellated values needed for running the BackendIR node via evaluate_model. """ assert isinstance(node.ir, BackendIR) # Get data shapes and types. tessellated_data_shapes, untessellated_data_shapes, data_types = ( _get_mla_input_shapes_and_types(node, self.batch_size) ) untessellated_inputs = dict() for packed_data, (section_base_name, pack_input_list) in ( zip(inputs.values(), node.ir.graph.compile_properties.pack_parameters.items()) ): _data_types = data_types[section_base_name] _untessellated_data_shapes = untessellated_data_shapes[section_base_name] if len(pack_input_list) > 1: unpacked_data = ev_unpack( packed_data, [np.dtype(x).name for x in _data_types], tessellated_data_shapes[section_base_name] ) else: unpacked_data = [packed_data] for (input_id, tessellate_param), _data, _data_type, _untessellated_data_shape in ( zip(pack_input_list, unpacked_data, _data_types, _untessellated_data_shapes) ): if tessellate_param.enable_mla: untessellated_data = reshape_from_mla_padded_2d_shape( _data, _untessellated_data_shape, _data_type ) else: untessellated_data = ev_detessellation( _data, slice_shape=tessellate_param.tile_shape, frame_type=np.dtype(_data_type).name, frame_shape=_untessellated_data_shape, align_c16=tessellate_param.dram_layout.align_c16, cblock=tessellate_param.dram_layout.cblock ) untessellated_inputs[node.ir.graph.inputs[input_id].name] = untessellated_data return untessellated_inputs
[docs] def execute_backend_ir_node(self, node: AwesomeNode, inputs: Dict[InputName, SiMaIRTensorTypes], node_outputs: Dict[NodeName, Union[np.ndarray, Tuple[np.ndarray, ...]]]): # Reshape all input data to MLA 2D shape. inputs_2d = dict() for name, data in inputs.items(): if data.ndim != 2: inputs_2d[name] = reshape_to_mla_padded_2d_shape(data) else: inputs_2d[name] = data # Get untessellated inputs in order to run evaluate_model to get data for check file. untessellated_inputs = self._get_untessellated_inputs_for_node(node, inputs_2d) assert isinstance(node.ir, BackendIR) backend_output = self.evaluate_model_graph( node.ir.graph, inputs_2d, untessellated_inputs, node.ir.stage, node.get_type() ) assert backend_output is not None # Reshape from mla 2d shape to the desired shape for each output if the next node is not # unpack or detessellate. for unpack_output_list in node.ir.graph.compile_properties.unpack_parameters.values(): if len(unpack_output_list) > 1 or not unpack_output_list[0][1].enable_mla: # Unpack node or detessellate node will handle the reshape. continue # Reshape from mla 2d shape to desired shape. output_id = unpack_output_list[0][0] batch_size = backend_output[output_id].shape[0] output_shape = (batch_size, *node.ir.graph.outputs[output_id].operator.output_shape) backend_output[unpack_output_list[0][0]] = reshape_from_mla_padded_2d_shape( backend_output[output_id], output_shape ) if len(backend_output) == 1: node_outputs[node.name] = backend_output[0] else: node_outputs[node.name] = tuple(backend_output)
[docs] def execute_sima_ir_node(self, node: AwesomeNode, inputs: Dict[InputName, SiMaIRTensorTypes], node_outputs: Dict[NodeName, Union[np.ndarray, Tuple[np.ndarray, ...]]]): super().execute_sima_ir_node(node, inputs, node_outputs) # TODO: Make Unpack transform return Tuple instead of a List to avoid output transformation here. if isinstance(node.ir.attrs, _afe_attrs.UnpackTransformAttrs): assert isinstance(node_outputs[node.name], List) node_outputs[node.name] = tuple(node_outputs[node.name])
[docs] def get_ifm_data_file_name(self, stage: int) -> str: return os.path.join(self.out_dir, f"{self.model_name}_stage{stage}_mla.ifm{self.file_name_postfix}.mlc")
[docs] def get_ofm_chk_file_name(self, stage: int) -> str: return os.path.join(self.out_dir, f"{self.model_name}_stage{stage}_mla.ofm_chk{self.file_name_postfix}.mlc")
@staticmethod
[docs] def write_inputs_to_ifm_file( inputs: dict[InputName, np.ndarray], pack_parameters: PackParameters, ifm_file_name: str ): input_data = dict() for section_base_name, section_data in zip(pack_parameters, inputs.values()): input_data[section_base_name] = section_data _write_data_to_file(input_data, ifm_file_name, ".dram_data")
@staticmethod
[docs] def pack_and_write_outputs_to_ofm_chk_file( untessellated_outputs: list[np.ndarray], unpack_parameters: PackParameters, ofm_chk_file_name: str ): # Transform the tensors into a shape that will be output by MLA. packed_outputs_dict = dict() for section_base_name, unpack_output_list in unpack_parameters.items(): pack_inputs = list() for output_id, detessellate_param in unpack_output_list: output_data = untessellated_outputs[output_id] if detessellate_param.enable_mla: padded_shape = get_channel_aligned_shape( output_data.shape, output_data.dtype.itemsize ) pad_width = tuple( (0, ps - os) for ps, os in zip(padded_shape, output_data.shape) ) padded_data = np.pad(output_data, pad_width) pack_inputs.append(padded_data) else: slice_shape = detessellate_param.tile_shape assert len(slice_shape) + 1 >= output_data.ndim slice_shape = slice_shape[len(slice_shape) + 1 - output_data.ndim:] pack_inputs.append( ev_tessellation( output_data, slice_shape, detessellate_param.dram_layout.align_c16, detessellate_param.dram_layout.cblock ) ) packed_outputs_dict[section_base_name] = _pack_numpy_arrays(pack_inputs) # Write the packed outputs to the ofm_chk.mlc file _write_data_to_file(packed_outputs_dict, ofm_chk_file_name, ".dram_check") return list(packed_outputs_dict.values())
[docs] def evaluate_model_graph( self, model_graph: Any, inputs: dict[InputName, np.ndarray], untessellated_inputs: dict[str, np.ndarray] | None, stage: int, node_type: NodeType | None = None ) -> list[np.ndarray]: """ Execute a model graph that has been lowered to the n2a_compiler backend's representation and compiled using the compile_awesomenet function. Files that are produced in the compilation should reside in the self.out_dir directory and their names should correspond to names of the MLA subgraph nodes, using the .mlc file extension. :param model_graph: The ModelGraph to execute :param inputs: Input values of the model graph. These arrays are in MLA layout. :param untessellated_inputs: Dictionary containing the untessellated inputs of the MLA subgraph. Needed to generate reference output by running the evaluate_model in order to produce the mlc_chk file. :return: Result of model graph obtained by running the simulator. The resulting array is in MLA layout (N, H*W*16C). """ # Imports placed here to avoid loading from external repositories before this function is called. from afe.backends.mla.afe_to_n2a_compiler.defines import evaluate_model # Interpret the ModelGraph. assert isinstance(model_graph, ModelGraph) assert untessellated_inputs is not None untessellated_outputs = evaluate_model(model_graph, untessellated_inputs) self.write_inputs_to_ifm_file( inputs, model_graph.compile_properties.pack_parameters, self.get_ifm_data_file_name(stage) ) return self.pack_and_write_outputs_to_ofm_chk_file( untessellated_outputs, model_graph.compile_properties.unpack_parameters, self.get_ofm_chk_file_name(stage) )
[docs] class N2ABackendSimulator(N2ACompiledBackendRunner): """ An instance used to execute an AwesomeNet that contains sub-graphs that belong to SiMa's backends. A SiMa backend sub-graph is an IR that is already compiled, by SiMa compilers. Compilation of the SiMa backend IR is done prior to execution via compile_awesomenet function. Files that are generated during compilation step are generated in the directory that is given as an argument in the N2ABackendSimulator ctor. A SiMa backend sub-graph will is wrapped by a BackendIR object. During compilation step, for each SiMa backend sub-graph, corresponding .mlc file is created in out_dir path. The convention for naming the .mlc file is: {model_name}_stage{stage}_mla.mlc For execution of the SiMa backend sub-graphs, simulator is used. Input data is written directly into simulator dram using simulator's python api. Input data is written into the {model_name}_stage{stage}_mla.ifm.mlc file and the reference output obtained by using n2a_compiler.evaluate_model is written into the {model_name}_stage{stage}_mla.ofm_chk.mlc file. :param compilation_mode: Enum. Only CompilationMode.L1_BASED is supported at this point. * If L1_BASED, will use evaluate_model to use the numpy implementation in the compiler to run inference. Will use simulator API to run the precompiled model on ISIM. :param out_dir: str. Output directory to keep the generated files. Should contain the .mlc files that are generated during the compilation step. :param layout: str. Data layout of the model. :param batch_size: Input data batch size. Default is 1. Only batch size of 1 is supported at this point. :param platform_type: Architecture type. Default is Platform.GEN1. :param model_name: str. Name of the model. Should correspond to the model name that is used during compile_awesomenet step. :param report_sim_failure: Action to take when simulation fails. It receives a short description of what was run. If None, simulation failure is ignored. :param evaluate_task_type: if the output of evaluate_model_graph is not used directly, it may run asynchronously in another process. """ def __init__(self, compilation_mode: CompilationMode = CompilationMode.L1_BASED, out_dir: str = "", layout: str = "", batch_size: int = 1, platform_type: Platform = Platform.GEN1, model_name: str = "", file_name_postfix: str = "", *, report_sim_failure: Optional[SimFailureReporter] = None, evaluate_task_type: EvaluateTaskType = EvaluateTaskType.SAME_PROCESS): super().__init__(compilation_mode, out_dir, layout, batch_size, model_name, file_name_postfix) self._report_sim_failure = report_sim_failure
[docs] self.platform_type = platform_type
[docs] self.evaluate_task_type = evaluate_task_type
[docs] self.evaluate_task = None
[docs] def execute_sima_ir_node(self, node: AwesomeNode, inputs: Dict[InputName, SiMaIRTensorTypes], node_outputs: Dict[NodeName, Union[np.ndarray, Tuple[np.ndarray, ...]]]): super().execute_sima_ir_node(node, inputs, node_outputs) attrs = node.ir.attrs if isinstance(attrs, _afe_attrs.DetessellationTransformAttrs): assert len(node.input_node_names) == 1 input_node_name = node.input_node_names[0] tessellated_output = node_outputs[input_node_name] detessellated_output = node_outputs[node.name] inverse_detessellate = ev_tessellation( detessellated_output, attrs.slice_shape, attrs.align_c16, attrs.cblock ) assert np.array_equal(inverse_detessellate, tessellated_output)
[docs] def evaluate_model_graph( self, model_graph: Any, inputs: dict[InputName, np.ndarray], untessellated_inputs: dict[str, np.ndarray] | None, stage: int, node_type: NodeType | None = None ) -> list[np.ndarray] | None: """ Evaluate a model graph that has been lowered to the n2a_compiler backend's representation and compiled using the compile_awesomenet function. Files that are produced in the compilation should reside in the self.out_dir directory and their names should correspond to names of the MLA subgraph nodes, using the .mlc file extension. :param model_graph: The ModelGraph to evaluate :param inputs: Input values of the model graph. These arrays are in MLA layout. :param untessellated_inputs: Dictionary containing the untessellated inputs of the MLA subgraph. Needed to generate reference output by running the evaluate_model in order to produce the mlc_chk file. :return: Result of model graph obtained by running the simulator. The resulting array is in MLA layout (N, H*W*16C). """ if self.evaluate_task_type == EvaluateTaskType.EXT_CMD: # Create chk file with external command, in another process import subprocess, sys from afe.backends.mla.afe_to_n2a_compiler.defines import export_model_to_json self.write_inputs_to_ifm_file( inputs, model_graph.compile_properties.pack_parameters, self.get_ifm_data_file_name(stage) ) # Export model graph to JSON json_dir = Path(self.out_dir) / "model_graph_json" json_dir.mkdir(parents=True, exist_ok=True) json_name = json_dir / f"{self.model_name}_stage{stage}.json" export_model_to_json(model_graph, json_dir, json_name, untessellated_inputs) # Due to Nuitka, we cannot run a python module directly. # Instead, make a script and run it. ext_script = get_ofm_executable_script() evaluate_cmd = [sys.executable, '-c', ext_script, '--in', json_name, '--out', self.get_ofm_chk_file_name(stage)] sima_log_info("Start evaluate process to generate check file") sima_log_dbg(f" cmd = {evaluate_cmd}") self.evaluate_task = subprocess.Popen(evaluate_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return None elif self.evaluate_task_type == EvaluateTaskType.FORK_PROCESS: # Create chk file in a forked process. Faster than EXT_CMD, but uses more memory. from multiprocessing import Process sima_log_info("Start evaluate process to generate check file") self.evaluate_task = Process( target=super().evaluate_model_graph, args=(model_graph, inputs, untessellated_inputs, stage, node_type) ) self.evaluate_task.start() return None else: ofm_data = super().evaluate_model_graph( model_graph, inputs, untessellated_inputs, stage, node_type ) return ofm_data
[docs] def execute_model_graph(self, stage: int): base_mlc_file_name = make_mlc_file_name(self.out_dir, self.model_name, stage) mlc_model_file_name = base_mlc_file_name + '.elf' assert os.path.isfile(mlc_model_file_name) # Set L2 cache mlc file name if exists mlc_l2_cachefile_name = base_mlc_file_name + '_l2_cache.elf' mlc_l2_cachefile_name = None if not os.path.isfile(mlc_l2_cachefile_name) else mlc_l2_cachefile_name mlc_chk_file_name = self.get_ofm_chk_file_name(stage) mlc_data_file_name = self.get_ifm_data_file_name(stage) run_simulator( self.platform_type, mlc_model_file_name, mlc_data_file_name, mlc_chk_file_name, mlc_l2_cache_file_name=mlc_l2_cachefile_name, report_sim_failure=self._report_sim_failure)
[docs] class AcceleratorModeRunner(N2ACompiledBackendRunner): def __init__(self, compilation_mode: CompilationMode = CompilationMode.L1_BASED, out_dir: str = "", layout: str = "", batch_size: int = 1, model_name: str = "", accel_pipeline: Pipeline = None, file_name_postfix: str = ""): super().__init__(compilation_mode, out_dir, layout, batch_size, model_name, file_name_postfix)
[docs] self.accel_pipeline = accel_pipeline
[docs] def execute_sima_ir_node(self, node: AwesomeNode, inputs: Dict[InputName, SiMaIRTensorTypes], node_outputs: Dict[NodeName, Union[np.ndarray, Tuple[np.ndarray, ...]]]): super().execute_sima_ir_node(node, inputs, node_outputs) attrs = node.ir.attrs if isinstance(attrs, _afe_attrs.DetessellationTransformAttrs): assert len(node.input_node_names) == 1 input_node_name = node.input_node_names[0] tessellated_output = node_outputs[input_node_name] detessellated_output = node_outputs[node.name] inverse_detessellate = ev_tessellation( detessellated_output, attrs.slice_shape, attrs.align_c16, attrs.cblock ) assert np.array_equal(inverse_detessellate, tessellated_output)
# This is a helper function to get IFM and OFM size for MLA node. # This function is needed because IFM and OFM must be allocated on the devkit.
[docs] def get_ifm_and_ofm_size(self, node_type: NodeType) -> Tuple[int, int]: input_values = list(node_type.inputs.values()) assert len(input_values) == 1 _, ifm_size = get_expected_tensor_value(input_values[0]).shape _, ofm_size = get_expected_tensor_value(node_type.output).shape return ifm_size, ofm_size
# Execute model graph in accelerator mode
[docs] def evaluate_model_graph( self, model_graph: Any, inputs: dict[InputName, np.ndarray], untessellated_inputs: dict[str, np.ndarray] | None, stage: int, node_type: NodeType | None = None ) -> list[np.ndarray]: # Currently only support one ifm and ofm section in this mode. assert ( len(model_graph.compile_properties.pack_parameters) == len(model_graph.compile_properties.unpack_parameters) == 1 ) # Allocation of IFM and OFM on the DevKit happens only once: # during execution of the model for the first image from the # dataset. if not self.accel_pipeline.is_ifm_ofm_initialized(): ifm_size, ofm_size = self.get_ifm_and_ofm_size(node_type) self.accel_pipeline.init_ifm_ofm(ifm_size, ofm_size) assert len(inputs) == 1 ifm = list(inputs.values())[0] ofm_bytes, _, _, _ = self.accel_pipeline.run_inference(preprocessed_frame=ifm) # Postprocess Output - convert to np.ndarray and reshape sdk_outputs = self.accel_pipeline.postprocess_output(ofm_bytes) data = sdk_outputs.reshape(1, -1) return [data]
[docs] def create_n2a_backend_runner(net: AwesomeNet, compilation_mode: CompilationMode = CompilationMode.L1_BASED, out_dir: Optional[str] = None, layout: str = "NHWC", batch_size: int = 1, platform_type: Platform = Platform.GEN1, run_mode: RunMode = RunMode.N2A_COMPILER, accel_pipeline: Optional[Pipeline] = None, file_name_postfix: str = "", *, report_sim_failure: Optional[SimFailureReporter] = None) -> N2ABackendRunner: if net.status == Status.BACKEND_IR_LOWERED: return N2ABackendRunner(compilation_mode, layout, batch_size) elif net.status == Status.BACKEND_IR_COMPILED: assert out_dir is not None, "Please provide an output directory to which files would be generated." if run_mode == RunMode.ACCELERATOR: # User cannot run AwesomeNet in accelerator mode without providing object of Pipeline() # class to the create_n2a_backend_runner() function. assert accel_pipeline is not None, "Please provide Pipeline() for accelerator mode run." return AcceleratorModeRunner(compilation_mode, out_dir, layout, batch_size, net.name, accel_pipeline, file_name_postfix) elif run_mode == RunMode.SIMULATOR: assert accel_pipeline is None, "Pipeline() is only used in accelerator mode run." return N2ABackendSimulator(compilation_mode, out_dir, layout, batch_size, platform_type, net.name, file_name_postfix, report_sim_failure=report_sim_failure) else: assert accel_pipeline is None, "Pipeline() is only used in accelerator mode run." return N2ACompiledBackendRunner(compilation_mode, out_dir, layout, batch_size, net.name, file_name_postfix) else: raise ValueError(f"Cannot create backend runner for network with status {net.status}")
def _pack_numpy_arrays(data: List[np.ndarray]) -> np.ndarray: return pack(data) def _write_data_to_file(input_data: dict[str, np.ndarray], file_name: str, line_data_str: str): """ Writes the data into mlc file. Used for writing input data into ifm.mlc file and reference output data into chk.mlc file. :param input_data: Dictionary of data for each section that are written into the reference file. :param file_name: File name used to write data into it. :param line_data_str: String written before the address and the data. Should be either ".dram_data" or ".dram_check" """ assert line_data_str in (".dram_data", ".dram_check") _row_size: int = 16 batch_size: int | None = None with open(file_name, "w") as f: for section_name, section_data in input_data.items(): assert section_data.ndim == 2 and section_data.shape[1] % _row_size == 0 if batch_size is None: batch_size = section_data.shape[0] else: assert batch_size == section_data.shape[0] for b in range(batch_size): addr = 0 f.write(f".begin_section \"{section_name}.b{b}\"\n") for addr in range(0, section_data.shape[1], _row_size): row_data = section_data[b][addr:addr + _row_size] f.write(f"{line_data_str} {addr} {' '.join(map(str, row_data.tolist()))}\n") f.write(".end_section\n")
[docs] def run_simulator(platform_type: Platform, mlc_model_file_name: str, mlc_data_file_name: Optional[str] = None, mlc_chk_file_name: Optional[str] = None, *, mlc_l2_cache_file_name: Optional[str] = None, report_sim_failure: Optional[SimFailureReporter] = print_simulator_failure_warning) -> None: """ Run the MLA simulator. :param platform_type: Platform type. :param mlc_model_file_name: Name of model file to run :param mlc_data_file_name: Name of data file to load to initialize the input. If None, input is obtained from model file. :param mlc_chk_file_name: Name of chk file to load to verify the output. If None, output is not validated. :param mlc_l2_cache_file_name: Name of l2 cache file to load. :param report_sim_failure: Action when simulation fails. It receives a short description of what was run. If None, simulation failure is ignored. """ # Run the simulation. sim_runner = SimRunner( platform_type, mlc_model_file_name, mlc_data_file_name, mlc_chk_file_name, mlc_l2_cache_file_name ) ret = sim_runner.run() # When ret is non-zero, an error has occurred during simulation. if ret and report_sim_failure is not None: report_sim_failure("file " + mlc_model_file_name)