Source code for afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_utils

#########################################################
# Copyright (C) 2020 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Joey Chou
#########################################################
import os

import numpy as np
import numpy.typing
from typing import Tuple, List, Optional, Protocol
from typing_extensions import assert_never

from afe._tvm._utils import is_supported_mla_pool_size
from afe.ir.defines import InputShape
from afe.ir.utils import transpose_attr_according_to_layout_strings
from afe.backends.mla.afe_to_n2a_compiler.defines import (
    PlaceholderName, ConvolutionParameters, TensorType, compiler_operator, tuple_placeholder_vertex
)
from afe.backends.mla.afe_to_n2a_compiler.defines import (
    CompilerVertex, CompilerInputShapes, CompilerWeightDtype, CompilerBiasDtype,
    QuantizedWeightDtypes, CompilerBiasQuantizedDtype, Narrowing, bfloat16,
    get_id_requantization, is_integer_type, normalize, pool_requantization
)
from ml_kernels.math_helpers import RoundType, Activation, ArgMinMaxOp
from ml_kernels.requantization import BaseRequantization, FractionalZeroRequantization


[docs] def is_32_bit_vertex(vertex: CompilerVertex) -> bool: """ Checks if the output of the Vertex is 32-bit. :param vertex: The Vertex which output type is checked. :return: True if the Vertex output is 32-bit, otherwise False. """ return vertex.tensor_shape.dtype.num_bytes == 4
[docs] def cast_weight(tensor: np.ndarray) -> np.ndarray: if is_integer_type(np.dtype(tensor.dtype)): assert tensor.dtype in QuantizedWeightDtypes return tensor else: return normalize(tensor, CompilerWeightDtype)
[docs] def cast_bias(tensor: np.ndarray) -> np.ndarray: if is_integer_type(tensor.dtype): iinfo = np.iinfo(CompilerBiasQuantizedDtype) assert iinfo.min <= tensor.min() and tensor.max() <= iinfo.max return tensor.astype(CompilerBiasQuantizedDtype) else: return normalize(tensor, CompilerBiasDtype)
[docs] def to_conv2d_weights_layout(tensor: np.ndarray) -> np.ndarray: """ Transpose the tensor in weight vertex to DHWIO format. Model SDK uses HWIGO/DHWIGO layout for the weight, where "I" is input channels, "G" is groups, and "O" is output channels. For the compiler, the weight tensor is reshaped so that G and O form one dimension. Depthwise conv2d is treated as grouped conv2d. In this case, the group size is equal to the number of input channels. :param tensor: np.ndarray :param current_layout: The current layout of the tensor :return: A tensor in DHWIO layout """ tensor = cast_weight(tensor) tensor = np.reshape(tensor, tensor.shape[:-2] + (-1,)) if tensor.ndim == 4: tensor = np.expand_dims(tensor, axis=0) return tensor
[docs] def to_compiler_bias_layout(tensor: np.ndarray) -> np.ndarray: """ Squeeze the tensor in bias vertex to 1 dimension :param tensor: np.ndarray :return: Flatten tensor with dimension = 1 """ tensor = cast_bias(tensor) # When tensor is a 1-element array(ex: output channel is 1) if tensor.ndim == 0 or tensor.ndim > 1: # make sure only one dimension has shape > 1 assert [dim > 1 for dim in tensor.shape].count(True) <= 1 ch_size = max(tensor.shape) tensor = tensor.reshape(ch_size) assert tensor.ndim == 1 return tensor
[docs] def shape_to_compiler_data_layout(shape: InputShape, current_layout: str) -> CompilerInputShapes: """ Convert the shape to the "HWC". :param shape: Shapes of the input :param current_layout: Currently used layout in provided shape :return: Compiler input shape """ match len(shape): case 4: # NHWC/NCHW -> DHWC where D = 1. shape = tuple( transpose_attr_according_to_layout_strings( shape, current_layout, desired_layout="HWC" ) ) shape = (1, *shape) case 5: # NDHWC -> DHWC shape = shape[1:] case _ as unreachable: assert_never(unreachable) return shape
[docs] def axis_to_compiler_data_layout(axis: int, current_layout: str) -> int: """ Convert the axis to the axis in DHWC. :param axis: Axis of the input. :param current_layout: Currently used layout in provided axis. :return: position of axis in DHWC. """ s = current_layout[int(axis)] new_axis = "DHWC".index(s) assert new_axis is not None and new_axis >= 0, "Error: Invalid axis{}".format(axis) return new_axis
[docs] def get_fake_data(shapes: InputShape, dtype: np.typing.DTypeLike) -> np.ndarray: """ Generate new tensor without initializing values. :param shapes: Shape of the tensor that will be created. :return: Array with specified shape. """ return np.empty(shapes, dtype=dtype)
[docs] def fix_dtype(dtype: type): """ Change the float32 dtype to bfloat16. FIXME: should remove this function when the correct scalar type is passed to the gen_vertex functions. """ assert issubclass(dtype, (np.number, bfloat16)) if dtype == np.float32: return bfloat16 else: return dtype
############################################# # Functions to generate n2a_compiler vertex #############################################
[docs] def gen_compiler_placeholder_vertex(name: str, data: np.ndarray) -> CompilerVertex: return compiler_operator.placeholder_vertex( TensorType.from_shape(data.shape), data.dtype, data.shape, PlaceholderName(name), name=name)
[docs] def gen_compiler_constant_vertex(name: str, data: np.ndarray) -> CompilerVertex: assert isinstance(data, np.ndarray) tensor_type = TensorType.from_shape(data.shape) # Expand the dim for bias or shift-like tensors if tensor_type == TensorType.FM and data.ndim == 1: data = data.reshape(1, 1, 1, data.shape[0]) return compiler_operator.constant_vertex(tensor_type, data, name=name)
[docs] def gen_compiler_weight_vertex(name: str, data: np.ndarray) -> CompilerVertex: assert isinstance(data, np.ndarray) vertex_name = name + "/weights" tensor_type = TensorType.FILTER return compiler_operator.constant_vertex(tensor_type, data, name=vertex_name)
[docs] def gen_compiler_bias_vertex(name: str, data: np.ndarray) -> CompilerVertex: vertex_name = name + "/bias" return gen_compiler_constant_vertex(vertex_name, data)
[docs] def gen_compiler_prelu_alpha_vertex(name: str, data: np.ndarray) -> CompilerVertex: vertex_name = name + "/alpha" return gen_compiler_constant_vertex(vertex_name, cast_weight(data))
[docs] def gen_compiler_tuple_vertex(name: str, vertices: list[CompilerVertex]) -> CompilerVertex: return tuple_placeholder_vertex(vertices, name=name)
[docs] def gen_compiler_concat_vertex( name: str, vertices: list[CompilerVertex], requants: list[BaseRequantization], axis: int, split_axis: int | None = None, split_block: int | None = None ) -> CompilerVertex: return compiler_operator.concat_vertex( vertices, requants, axis=axis, split_axis=split_axis, split_block=split_block, name=name )
[docs] def gen_compiler_prelu_vertex( name: str, datav: CompilerVertex, alphav: CompilerVertex, zp: int, shift: int, rounding_type: RoundType ) -> CompilerVertex: return compiler_operator.prelu_vertex( datav, alphav, zp, shift, rounding_type=rounding_type, name=name )
[docs] class ConvVertexProtocol(Protocol): """ A wrapper around a n2a_compiler vertex constructor function for convolution. There are several functions for different variants of convolution. This protocol is used to share parts of the compiler interface's implementation. """ def __call__( self, name: str, output_shape: tuple[int, int, int, int], data_vertex: CompilerVertex, weight_vertex: CompilerVertex, bias_vertex: CompilerVertex | None, input_zp: int, output_zp: int, strides: tuple[int, int, int], padding: tuple[int, int, int, int, int, int], dilation: tuple[int, int, int], requant: BaseRequantization[np.ndarray], msb_left_shift: bool = False, activ: Activation = Activation.NONE, is_depthwise: bool = False, groups: int = 1, clip_range: tuple[int, int] | tuple[float, float] | None = None ) -> CompilerVertex: ...
[docs] def gen_compiler_conv2d_vertex( name: str, output_shape: tuple[int, int, int, int], data_vertex: CompilerVertex, weight_vertex: CompilerVertex, bias_vertex: CompilerVertex | None, input_zp: int, output_zp: int, strides: tuple[int, int, int], padding: tuple[int, int, int, int, int, int], dilation: tuple[int, int, int], requant: BaseRequantization[np.ndarray], msb_left_shift: bool = False, activ: Activation = Activation.NONE, is_depthwise: bool = False, groups: int = 1, clip_range: tuple[int, int] | tuple[float, float] | None = None, ) -> CompilerVertex: # Get convolution parameters data_shape = data_vertex.operator.shape.shape weight_shape = weight_vertex.operator.shape.shape if is_depthwise: # Depthwise convolution with groups = 1 is done as a regular convolution. assert groups > 1 assert len(data_shape) == len(output_shape) == 4 assert len(weight_shape) == 5 assert data_shape[-1] // groups == weight_shape[-2] assert weight_shape[-1] == output_shape[-1] conv_params = ConvolutionParameters( input_depth=data_shape[0], input_height=data_shape[1], input_width=data_shape[2], input_channels=data_shape[3], output_depth=output_shape[0], output_height=output_shape[1], output_width=output_shape[2], output_channels=output_shape[3], filter_depth=weight_shape[0], filter_height=weight_shape[1], filter_width=weight_shape[2], padding=padding, stride=strides, dilation=dilation, is_transposed=False, num_groups=groups ) return compiler_operator.conv2d_vertex( conv_params, data_vertex, weight_vertex, bias_vertex, input_zp, output_zp, requant, msb_left_shift=msb_left_shift, activation=activ, name=name, clip_range=clip_range )
[docs] def gen_compiler_add_subtract_vertex( name: str, lhs_vertex: CompilerVertex, rhs_vertex: CompilerVertex, in1_scale: int, in2_scale: int, requant: BaseRequantization[np.ndarray], op: str, activ: Activation = Activation.NONE, clip_range: tuple[int, int] | tuple[float, float] | None = None ) -> CompilerVertex: if activ == Activation.CLIP: assert clip_range is not None, "Clip range must be specified if clip activation is used." return compiler_operator.add_vertex( x=lhs_vertex, y=rhs_vertex, op=op, x_scale=in1_scale, y_scale=in2_scale, requantization=requant, activation=activ, clip_range=clip_range, name=name )
[docs] def gen_compiler_mul_vertex( name: str, lhs_vertex: CompilerVertex, rhs_vertex: CompilerVertex, zp_lhs: int, zp_rhs: int, requant: BaseRequantization[np.ndarray], intrinsic_shift: int ) -> CompilerVertex: return compiler_operator.mul_vertex( x=lhs_vertex, y=rhs_vertex, zp_a=zp_lhs, zp_b=zp_rhs, requantization=requant, intrinsic_shift=intrinsic_shift, name=name )
[docs] def gen_compiler_arg_min_max_vertex( name: str, input_vertex: CompilerVertex, is_max: bool, select_last_index: bool ) -> CompilerVertex: return compiler_operator.arg_min_max_vertex( input_vertex, ArgMinMaxOp.MAX if is_max else ArgMinMaxOp.MIN, select_last_index=select_last_index, name=name )
[docs] def gen_compiler_maxpool_vertex( name: str, output_shape: tuple[int, int, int, int], data_vertex: CompilerVertex, pool_size: tuple[int, int, int], strides: tuple[int, int, int], padding: tuple[int, int, int, int, int, int], output_dtype: type = np.int8, requant: BaseRequantization | None = None ) -> CompilerVertex: """ Assuming the input data_layout is DHWC """ # Get convolution parameters data_shape = data_vertex.operator.shape.shape assert is_supported_mla_pool_size(pool_size) pool_params = ConvolutionParameters( input_depth=data_shape[0], input_height=data_shape[1], input_width=data_shape[2], input_channels=data_shape[3], output_depth=output_shape[0], output_height=output_shape[1], output_width=output_shape[2], output_channels=output_shape[3], filter_depth=pool_size[0], filter_height=pool_size[1], filter_width=pool_size[2], padding=padding, stride=strides, dilation=(1, 1, 1), is_transposed=False ) requantization = get_id_requantization(output_dtype) if requant is None else requant return compiler_operator.pool_vertex( pool_params, data_vertex, op="max", requantization=requantization, name=name )
[docs] def gen_compiler_avgpool_vertex( name: str, output_shape: tuple[int, int, int, int], data_vertex: CompilerVertex, op: str = "average", pool_size: tuple[int, int, int] | None = None, strides: tuple[int, int, int] | None = None, padding: tuple[int, int, int, int, int, int] | None = None, rounding_type: RoundType = RoundType.TRUNC, output_dtype: type = np.int8, requant: BaseRequantization | None = None, pad_value: int = 0 ) -> CompilerVertex: """ Assuming the input data_layout is DHWC """ assert op in ["average", "global", "adaptive"] data_shape = data_vertex.operator.shape.shape if op == "average": assert all(e is not None for e in [pool_size, strides, padding]) assert is_supported_mla_pool_size(pool_size) elif op in ["global", "adaptive"]: assert all(x == 0 for x in padding) assert all(x in (1, y) or z == 1 for x, y, z in zip(strides, data_shape, output_shape)) strides = (1, 1, 1) pool_params = ConvolutionParameters( input_depth=data_shape[0], input_height=data_shape[1], input_width=data_shape[2], input_channels=data_shape[3], output_depth=output_shape[0], output_height=output_shape[1], output_width=output_shape[2], output_channels=output_shape[3], filter_depth=pool_size[0], filter_height=pool_size[1], filter_width=pool_size[2], padding=padding, stride=strides, dilation=(1, 1, 1), is_transposed=False ) if requant is None: requantization = pool_requantization( pool_size, "average", rounding_type=rounding_type, out_dtype=output_dtype ) else: requantization = requant return compiler_operator.pool_vertex( pool_params, data_vertex, op="average", requantization=requantization, ifm_zero_point=pad_value, name=name )
[docs] def gen_compiler_variance_vertex( name: str, data_vertex: CompilerVertex, mean_vertex: CompilerVertex, requantization: BaseRequantization, req_var: BaseRequantization | None = None ) -> CompilerVertex: """ Assuming the data_layouts are HWC. """ return compiler_operator.variance_vertex( input=data_vertex, mean=mean_vertex, requantization=requantization, req_var=req_var, name=name )
[docs] def gen_compiler_grid_sample_vertex( name: str, data_vertex: CompilerVertex, grid_vertex: CompilerVertex, padding_mode: str, align_corners: bool ) -> CompilerVertex: """Generate compiler vertex for GridSample. Args: name: Name of operator node. data_vertex: Input data tensor, in channel-last layout. grid_vertex: Flow field or grid tensor, in channel-last layout. padding_mode: Mode to pad if out of boundary, "zeros" or "border". "reflection" is not supported. align_corners: Whether to align the four corners, "True" or "False". Returns: CompilerVertex for GridSample. """ return compiler_operator.grid_sample_vertex( ifm=data_vertex, grid=grid_vertex, padding_mode=padding_mode, align_corners=align_corners, name=name )
[docs] def gen_compiler_lrn_vertex( name: str, data_vertex: CompilerVertex, lut_vertex: CompilerVertex, window_size: int, input_zp: int, lut_scale: int, lut_zp_corr: int, lut_sh: int, output_scale: int, output_zp_corr: int, output_sh: int, rounding_type: RoundType, output_dtype: type = np.int8 ) -> CompilerVertex: requant_lut = FractionalZeroRequantization( lut_scale, lut_zp_corr, Narrowing(lut_sh, rounding_type, output_dtype) ) requant_output = FractionalZeroRequantization( output_scale, output_zp_corr, Narrowing(output_sh, rounding_type, output_dtype) ) return compiler_operator.lrn_vertex( data_vertex, lut=lut_vertex, window_size=window_size, zp_input=input_zp, requant_lut=requant_lut, requant_output=requant_output, name=name )
[docs] def gen_compiler_softmax_vertex( name: str, data_vertex: CompilerVertex, lut_exp_vertex: CompilerVertex, lut_rec_vertex: CompilerVertex, axis: int, exp_zp: int | None, rec_zp: int | None, requant_lut: BaseRequantization[np.ndarray], requant_output: BaseRequantization[np.ndarray], lut_input_pre_shift: int | None = None, output_pre_shift: int | None = None ) -> CompilerVertex: return compiler_operator.softmax_vertex( data_vertex, lut_exp=lut_exp_vertex, lut_rec=lut_rec_vertex, axis=axis, zp_exp=exp_zp, zp_rec=rec_zp, requant_lut=requant_lut, requant_output=requant_output, lut_input_pre_shift=lut_input_pre_shift, output_pre_shift=output_pre_shift, name=name )
def _gen_compiler_conv2d_transpose_vertex( name: str, output_shape: tuple[int, int, int, int], data_vertex: CompilerVertex, weight_vertex: CompilerVertex, bias_vertex: CompilerVertex | None, input_zp: int, output_zp: int, strides: tuple[int, int, int], padding: tuple[int, int, int, int, int, int], dilation: tuple[int, int, int], requant: BaseRequantization[np.ndarray], msb_left_shift: bool = False, activ: Activation = Activation.NONE, groups: int = 1, clip_range: tuple[int, int] | tuple[float, float] | None = None ) -> CompilerVertex: # Get convolution parameters data_shape = data_vertex.operator.shape.shape weight_shape = weight_vertex.operator.shape.shape assert data_shape[-1] == weight_shape[-2] assert weight_shape[-1] == output_shape[-1] conv_params = ConvolutionParameters( input_depth=data_shape[0], input_height=data_shape[1], input_width=data_shape[2], input_channels=data_shape[3], output_depth=output_shape[0], output_height=output_shape[1], output_width=output_shape[2], output_channels=output_shape[3], filter_depth=weight_shape[0], filter_height=weight_shape[1], filter_width=weight_shape[2], padding=padding, stride=strides, dilation=dilation, is_transposed=True, num_groups=groups ) return compiler_operator.transposed_conv2d_vertex( conv_params, data_vertex, weight_vertex, bias=bias_vertex, ifm_zero_point=input_zp, ofm_zero_point=output_zp, requantization=requant, msb_left_shift=msb_left_shift, activation=activ, clip_range=clip_range, name=name ) def _gen_compiler_2x_depthwise_conv2d_transpose_vertex( name: str, output_shape: tuple[int, int, int, int], data_vertex: CompilerVertex, weight_vertex: CompilerVertex, bias_vertex: CompilerVertex | None, input_zp: int, output_zp: int, strides: tuple[int, int, int], padding: tuple[int, int, int, int, int, int], dilation: tuple[int, int, int], requant: BaseRequantization[np.ndarray], msb_left_shift: bool = False, activ: Activation = Activation.NONE, groups: int = 1, clip_range: tuple[int, int] | tuple[float, float] | None = None ) -> CompilerVertex: """ Translate a depthwise transpose conv2d to upscale + depthwise conv2d: * upscale: 2x zero-insertion upscale * depthwise conv2d """ # Create vertex names upscale_name = name + "/zero_insertion_upscale" conv2d_name = name + "/depthwise_conv2d" data_shape = data_vertex.operator.shape.shape weight_shape = weight_vertex.operator.shape.shape assert data_shape[-1] == weight_shape[-1] == output_shape[-1] # Current compiler limitation. assert data_shape[0] == strides[0] == 1 # Make sure the conv2d_transpose is doing 2x upsample is_upscale_h_2x = strides[1] == 2 is_upscale_w_2x = strides[2] == 2 # 1st - Create zero-insertion upscale vertex if is_upscale_h_2x and is_upscale_w_2x: axis = "h_w" elif is_upscale_h_2x: axis = "height" elif is_upscale_w_2x: axis = "width" else: axis = None if axis is not None: upscale_vertex = compiler_operator.upscale_vertex( data_vertex, op="zero", axis=axis, zp=input_zp, rounding=RoundType.TOEVEN, name=upscale_name ) else: upscale_vertex = data_vertex # 2nd - Create depthwise conv2D # Unlike simulation through ml_kernels, where paddings, converted from TransposedConv2d to Conv2d, # are zero-inserted before calling conv2d with no padding, here zero insertion is done by upscale vertex, # which can only resize by (H, W) times stride. Hence, we need an MLA specific adjustment here. # Theoretically: # Size after zero insertion = (HW - 1) * stride + 1 = HW * stride - (stride - 1) # NewPadding = K - p - 1 # MLA implementation: # Size after upscale = HW * stride, which has extra (stride - 1), to be deducted from paddings # NewPadding for top and left = K - p - 1, as usual # NewPadding for bottom and right = K - p - 1 - (stride - 1) trans_padding = ( weight_shape[0] - 1 - padding[0], weight_shape[0] - 1 - padding[1] - (strides[0] - 1), weight_shape[1] - 1 - padding[2], weight_shape[1] - 1 - padding[3] - (strides[1] - 1), weight_shape[2] - 1 - padding[4], weight_shape[2] - 1 - padding[5] - (strides[2] - 1) ) assert groups == data_shape[-1], ( "Number of groups is expected to be C for depthwise convolution." ) conv_params = ConvolutionParameters( input_depth=data_shape[0], input_height=(2 if is_upscale_h_2x else 1) * data_shape[1], input_width=(2 if is_upscale_w_2x else 1) * data_shape[2], input_channels=data_shape[3], output_depth=output_shape[0], output_height=output_shape[1], output_width=output_shape[2], output_channels=output_shape[3], filter_depth=weight_shape[0], filter_height=weight_shape[1], filter_width=weight_shape[2], padding=trans_padding, stride=(1, 1, 1), dilation=(1, 1, 1), is_transposed=False, num_groups=groups ) return compiler_operator.conv2d_vertex( conv_params, upscale_vertex, weight_vertex, bias=bias_vertex, ifm_zero_point=input_zp, ofm_zero_point=output_zp, requantization=requant, msb_left_shift=msb_left_shift, activation=activ, clip_range=clip_range, name=conv2d_name )
[docs] def gen_compiler_conv2d_transpose_vertex( name: str, output_shape: tuple[int, int, int, int], data_vertex: CompilerVertex, weight_vertex: CompilerVertex, bias_vertex: CompilerVertex | None, input_zp: int, output_zp: int, strides: tuple[int, int, int], padding: tuple[int, int, int, int, int, int], dilation: tuple[int, int, int], requant: BaseRequantization[np.ndarray], msb_left_shift: bool = False, activ: Activation = Activation.NONE, is_depthwise: bool = False, groups: int = 1, clip_range: tuple[int, int] | tuple[float, float] | None = None ) -> CompilerVertex: _gen_vertex_func: ConvVertexProtocol if is_depthwise: _gen_vertex_func = _gen_compiler_2x_depthwise_conv2d_transpose_vertex else: _gen_vertex_func = _gen_compiler_conv2d_transpose_vertex return _gen_vertex_func( name, output_shape, data_vertex, weight_vertex, bias_vertex, input_zp, output_zp, strides, padding, dilation, requant, msb_left_shift=msb_left_shift, activ=activ, groups=groups, clip_range=clip_range )
[docs] def gen_compiler_udf_vertex( name: str, data_vertex: CompilerVertex, lut_vertex: CompilerVertex ) -> CompilerVertex: return compiler_operator.udf_vertex_table(data_vertex, lut_vertex, name=name)
[docs] def gen_compiler_erf_vertex(data_vertex: CompilerVertex, name: str) -> CompilerVertex: return compiler_operator.erf_vertex(data_vertex, name)
[docs] def gen_compiler_upsampling_vertex( name: str, data_vertex: CompilerVertex, op: str, axis: str, zp: int, rounding: RoundType ) -> CompilerVertex: return compiler_operator.upscale_vertex(data_vertex, op, axis, zp, rounding, name)
[docs] def gen_compiler_resize_general_vertex( data_vertex: CompilerVertex, target_spatial_shape: tuple[int, int, int], zp: int, rounding: RoundType, mode: str, name: str | None = None ) -> CompilerVertex: input_shape: tuple[int, int, int, int] = data_vertex.tensor_shape.shape output_shape: tuple[int, int, int, int] = (*target_spatial_shape, input_shape[-1]) return compiler_operator.resize_general_vertex( ifm=data_vertex, total_input_shape=input_shape, total_output_shape=output_shape, zp=zp, rounding=rounding, name=name, mode=mode, segment_shape=output_shape, )
[docs] def gen_compiler_resize_integer_factor_vertex( data_vertex: CompilerVertex, scaling_factors: tuple[int, ...], zp: int, rounding: RoundType, name: str | None = None ) -> CompilerVertex: return compiler_operator.resize_integer_factor_vertex( data_vertex, scaling_factors, zp, rounding, name )
[docs] def generate_resize_vertex( data: CompilerVertex, method: str, target_spatial_shape: tuple[int, int, int], zp: int, rounding: str, tf_ver: int = 1, mode: str = 'half_pixel', name: str | None = None ) -> CompilerVertex: """ Generate ResizeGeneralVertex or ResizeIntegerFactorVertex or a series of UpscaleVertex. :param data: CompilerVertex that represents input :param method: Method used by upsampling algorithm :param target_spatial_shape: Spatial shape to which the output needs to be resized. :param zp: Zero point to use for zero interpolation :param rounding: Rounding mode, one of 'trunc', 'up', 'even' :param mode: ONNX coordinate_transformation_mode one of ['half_pixel', 'align_corners', 'asymmetric']. :param tf_ver: Version of TF algorithm, 1 or 2 """ assert tf_ver in [1, 2] output = data vertex_id = 0 scaling_factors: tuple[float, ...] = tuple( x / y for x, y in zip(target_spatial_shape, data.operator.output_shape[:-1]) ) is_integer_scaling: bool = all( x % y == 0 for x, y in zip(target_spatial_shape, data.operator.output_shape[:-1]) ) if ( (not is_integer_scaling or any(x >= 64 for x in scaling_factors) or mode != 'half_pixel') and method in ("linear", "bilinear") ): assert tf_ver == 2, ( f"Unsupported tf_ver ({tf_ver}) for method ({method}) and scaling factors" f"{scaling_factors}, with coordinate_transformation_mode {mode}." ) return gen_compiler_resize_general_vertex( data, target_spatial_shape, zp, rounding, mode, name ) scaling_factors: tuple[int, ...] = tuple(int(x) for x in scaling_factors) if ( tf_ver == 2 and method in ("linear", "bilinear") and ( any(x not in (1, 2, 4) for x in scaling_factors) or all(x == 1 for x in scaling_factors) ) ): return gen_compiler_resize_integer_factor_vertex(data, scaling_factors, zp, rounding, name) log2_scale: tuple[int, ...] = tuple(x.bit_length() - 1 for x in scaling_factors) if method == "nearest_neighbor" or tf_ver == 1: op = "nearest" if method == "nearest_neighbor" else "linear" while log2_scale != (0, 0, 0): d, h, w = log2_scale axis = [0, 0] if h > 0: h = h - 1 axis[0] = 1 if w > 0: w = w - 1 axis[1] = 1 axis = {(0, 1): "width", (1, 0): "height", (1, 1): "h_w"}[tuple(axis)] vertex_name = f"{name}/{vertex_id}" output = gen_compiler_upsampling_vertex(vertex_name, output, op, axis, zp, rounding) log2_scale = (d, h, w) vertex_id += 1 else: d, h, w = log2_scale if h > 2 or w > 2: raise NotImplementedError( "MLA only support 2x or 4x upscale in H and/or W dimension for Tensorflow V2." f" Got {2 ** log2_scale[1]} along H and {2 ** log2_scale[2]} along W" ) while log2_scale != (0, 0, 0): d, h, w = log2_scale axis = [0, 0] if h > 1: if h == w: w = w - 2 axis[1] = 2 h = h - 2 axis[0] = 2 op = "linear4" elif h > 0: if h == w: w = w - 1 axis[1] = 1 h = h - 1 axis[0] = 1 op = "linear2" elif w > 1: w = w - 2 axis[0] = 0 axis[1] = 2 op = "linear4" elif w > 0: w = w - 1 axis[0] = 0 axis[1] = 1 op = "linear2" if axis[0] == axis[1]: axis = "h_w" elif axis[1] > axis[0]: axis = "width" else: axis = "height" vertex_name = f"{name}/{vertex_id}" output = gen_compiler_upsampling_vertex(vertex_name, output, op, axis, zp, rounding) log2_scale = (d, h, w) vertex_id += 1 return output
[docs] def gen_compiler_relu_vertex( name: str, data_vertex: CompilerVertex, node_zp: int ) -> CompilerVertex: return compiler_operator.relu_vertex(ifm=data_vertex, zp=node_zp, name=name)
[docs] def gen_compiler_clip_vertex( name: str, data_vertex: CompilerVertex, clip_min: int, clip_max: int ) -> CompilerVertex: return compiler_operator.clip_vertex( ifm=data_vertex, clip_value_min=clip_min, clip_value_max=clip_max, name=name )
[docs] def gen_compiler_leaky_relu_vertex( name: str, data_vertex: CompilerVertex, alpha: int | float, node_zp: int, right_shift: int, rounding_type: RoundType ) -> CompilerVertex: return compiler_operator.leaky_relu_vertex( ifm=data_vertex, alpha=alpha, zp=node_zp, shift=right_shift, rounding_type=rounding_type, name=name )
[docs] def gen_compiler_slice_vertex( name: str, data_vertex: CompilerVertex, begin: tuple[int, int, int, int], size: tuple[int, int, int, int], stride: tuple[int, int, int, int] = (1, 1, 1, 1) ): return compiler_operator.slice_vertex( ifm=data_vertex, begin=begin, size=size, stride=stride, name=name )
[docs] def gen_compiler_layer_norm_vertex( name: str, data_vertex: CompilerVertex, lut_vertex: CompilerVertex, axis: int, epsilon: int | None = None, rsqrt_zp: int | None = None, requant_mean: BaseRequantization[np.ndarray] | None = None, requant_lut_input: BaseRequantization[np.ndarray] | None = None, requant_output: BaseRequantization[np.ndarray] | None = None ) -> CompilerVertex: return compiler_operator.layernorm_vertex( data_vertex, lut_rsqrt=lut_vertex, axis=axis, zp_rsqrt=rsqrt_zp, epsilon=epsilon, requant_mean=requant_mean, requant_lut=requant_lut_input, requant_output=requant_output, name=name )
[docs] def gen_compiler_rms_norm_vertex( name: str, data_vertex: CompilerVertex, lut_vertex: CompilerVertex, axis: int, *, zp_ifm: int | None = None, zp_rsqrt: int | None = None, requant_lut: BaseRequantization | None = None, requant_output: BaseRequantization | None = None, lut_input_pre_shift: int | None = None, output_pre_shift: int | None = None, epsilon: float | None = None ) -> CompilerVertex: return compiler_operator.rms_norm_vertex( data_vertex, lut_rsqrt=lut_vertex, axis=axis, zp_ifm=zp_ifm, zp_rsqrt=zp_rsqrt, requant_lut=requant_lut, requant_output=requant_output, lut_input_pre_shift=lut_input_pre_shift, output_pre_shift=output_pre_shift, epsilon=epsilon, name=name )
[docs] def gen_compiler_instance_norm_vertex( name: str, data_vertex: CompilerVertex, mean_vertex: CompilerVertex, variance_vertex: CompilerVertex, lut_rsqrt_vertex: CompilerVertex, zp_rsqrt: int | None = None, requant_output: BaseRequantization | None = None, epsilon: float | None = None ) -> CompilerVertex: return compiler_operator.instancenorm_vertex( ifm=data_vertex, mean=mean_vertex, var=variance_vertex, lut_rsqrt=lut_rsqrt_vertex, zp_rsqrt=zp_rsqrt, requant_output=requant_output, epsilon=epsilon, name=name )
[docs] def gen_compiler_batch_matmul_vertex( name: str, lhs: CompilerVertex, rhs: CompilerVertex, transpose_b: bool, input_zps: list[int], requant: BaseRequantization, intrinsic_shift: int ) -> CompilerVertex: return compiler_operator.batch_matmul_vertex( lhs, rhs, transpose_b=transpose_b, ifm_zp=input_zps, requant_ofm=requant, intrinsic_shift=intrinsic_shift, name=name )
[docs] def gen_compiler_output_vertex(data_vertex: CompilerVertex) -> CompilerVertex: return compiler_operator.output_vertex(data_vertex, name=f"{data_vertex.name}_output")
[docs] def gen_compiler_broadcast_to_vertex( name: str, input_vertex: CompilerVertex, output_shape: tuple[int, int, int, int] ) -> CompilerVertex: return compiler_operator.broadcast_vertex(input_vertex, output_shape, name=name)
[docs] def gen_compiler_requantization_vertex( name: str, input_vertex: CompilerVertex, requant: BaseRequantization ) -> CompilerVertex: return compiler_operator.requantization_vertex(input_vertex, requant, name=name)
[docs] def gen_compiler_transpose_vertex( name: str, input_vertex: CompilerVertex, perm: tuple[int, int, int, int] ) -> CompilerVertex: return compiler_operator.transpose_vertex(input_vertex, perm, name=name)
[docs] def gen_compiler_depth_to_space_vertex( name: str, input_vertex: CompilerVertex, block_size: int, mode: str ) -> CompilerVertex: return compiler_operator.depth_to_space_vertex(input_vertex, block_size=block_size, mode=mode, name=name)
[docs] def gen_compiler_sigmoid_vertex( name: str, input_vertex: CompilerVertex, lut_exp: CompilerVertex, lut_rec: CompilerVertex ) -> CompilerVertex: return compiler_operator.sigmoid_vertex(input_vertex, lut_exp, lut_rec, name=name)
[docs] def make_mlc_file_name(output_dir: str, name: str, stage: int) -> str: """ Make mlc file name. :param output_dir: Directory where file will be placed :param name: Name used as the beginning of the filename :param stage: Stage number of the graph, appended to make a unique filename. """ return os.path.join(output_dir, f"{name}_stage{str(stage)}_mla")