Source code for afe.backends.mla.afe_to_n2a_compiler.n2a_compiler_utils

#########################################################
# Copyright (C) 2020 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Joey Chou
#########################################################
import os

import numpy as np
import numpy.typing
from typing import Tuple, List, Optional, Protocol
from typing_extensions import assert_never

from afe._tvm._utils import is_supported_mla_pool_size
from afe.ir.defines import InputShape
from afe.ir.utils import transpose_attr_according_to_layout_strings
from afe.backends.mla.afe_to_n2a_compiler.defines import (
    PlaceholderName, ConvolutionParameters, TensorType, compiler_operator, tuple_placeholder_vertex
)
from afe.backends.mla.afe_to_n2a_compiler.defines import (
    CompilerVertex, CompilerInputShapes, CompilerWeightDtype, CompilerBiasDtype,
    QuantizedWeightDtypes, CompilerBiasQuantizedDtype, Narrowing, bfloat16,
    get_id_requantization, is_integer_type, normalize, pool_requantization
)
from ml_kernels.math_helpers import RoundType, Activation, ArgMinMaxOp
from ml_kernels.requantization import BaseRequantization, FractionalZeroRequantization



[docs]
def is_32_bit_vertex(vertex: CompilerVertex) -> bool:
    """
    Checks if the output of the Vertex is 32-bit.
    :param vertex: The Vertex which output type is checked.
    :return: True if the Vertex output is 32-bit, otherwise False.
    """
    return vertex.tensor_shape.dtype.num_bytes == 4




[docs]
def cast_weight(tensor: np.ndarray) -> np.ndarray:
    if is_integer_type(np.dtype(tensor.dtype)):
        assert tensor.dtype in QuantizedWeightDtypes
        return tensor
    else:
        return normalize(tensor, CompilerWeightDtype)




[docs]
def cast_bias(tensor: np.ndarray) -> np.ndarray:
    if is_integer_type(tensor.dtype):
        iinfo = np.iinfo(CompilerBiasQuantizedDtype)
        assert iinfo.min <= tensor.min() and tensor.max() <= iinfo.max
        return tensor.astype(CompilerBiasQuantizedDtype)
    else:
        return normalize(tensor, CompilerBiasDtype)




[docs]
def to_conv2d_weights_layout(tensor: np.ndarray) -> np.ndarray:
    """
    Transpose the tensor in weight vertex to DHWIO format.

    Model SDK uses HWIGO/DHWIGO layout for the weight, where "I" is input channels, "G" is groups,
    and "O" is output channels.  For the compiler, the weight tensor is reshaped so that G and O
    form one dimension.

    Depthwise conv2d is treated as grouped conv2d.  In this case, the group size is equal to
    the number of input channels.

    :param tensor: np.ndarray
    :param current_layout: The current layout of the tensor
    :return: A tensor in DHWIO layout
    """
    tensor = cast_weight(tensor)
    tensor = np.reshape(tensor, tensor.shape[:-2] + (-1,))

    if tensor.ndim == 4:
        tensor = np.expand_dims(tensor, axis=0)
    return tensor




[docs]
def to_compiler_bias_layout(tensor: np.ndarray) -> np.ndarray:
    """
    Squeeze the tensor in bias vertex to 1 dimension
    :param tensor: np.ndarray
    :return: Flatten tensor with dimension = 1
    """
    tensor = cast_bias(tensor)
    # When tensor is a 1-element array(ex: output channel is 1)
    if tensor.ndim == 0 or tensor.ndim > 1:
        # make sure only one dimension has shape > 1
        assert [dim > 1 for dim in tensor.shape].count(True) <= 1
        ch_size = max(tensor.shape)
        tensor = tensor.reshape(ch_size)
    assert tensor.ndim == 1
    return tensor




[docs]
def shape_to_compiler_data_layout(shape: InputShape, current_layout: str) -> CompilerInputShapes:
    """
    Convert the shape to the "HWC".
    :param shape: Shapes of the input
    :param current_layout: Currently used layout in provided shape
    :return: Compiler input shape
    """
    match len(shape):
        case 4:
            # NHWC/NCHW -> DHWC where D = 1.
            shape = tuple(
                transpose_attr_according_to_layout_strings(
                    shape, current_layout, desired_layout="HWC"
                )
            )
            shape = (1, *shape)
        case 5:
            # NDHWC -> DHWC
            shape = shape[1:]
        case _ as unreachable:
            assert_never(unreachable)
    return shape




[docs]
def axis_to_compiler_data_layout(axis: int, current_layout: str) -> int:
    """
    Convert the axis to the axis in DHWC.
    :param axis: Axis of the input.
    :param current_layout: Currently used layout in provided axis.
    :return: position of axis in DHWC.
    """
    s = current_layout[int(axis)]
    new_axis = "DHWC".index(s)
    assert new_axis is not None and new_axis >= 0, "Error: Invalid axis{}".format(axis)
    return new_axis




[docs]
def get_fake_data(shapes: InputShape, dtype: np.typing.DTypeLike) -> np.ndarray:
    """
    Generate new tensor without initializing values.
    :param shapes: Shape of the tensor that will be created.
    :return: Array with specified shape.
    """
    return np.empty(shapes, dtype=dtype)




[docs]
def fix_dtype(dtype: type):
    """
    Change the float32 dtype to bfloat16.
    FIXME: should remove this function when the correct scalar type is passed
    to the gen_vertex functions.
    """
    assert issubclass(dtype, (np.number, bfloat16))
    if dtype == np.float32:
        return bfloat16
    else:
        return dtype



#############################################
#  Functions to generate n2a_compiler vertex
#############################################

[docs]
def gen_compiler_placeholder_vertex(name: str, data: np.ndarray) -> CompilerVertex:
    return compiler_operator.placeholder_vertex(
        TensorType.from_shape(data.shape), data.dtype, data.shape,
        PlaceholderName(name), name=name)




[docs]
def gen_compiler_constant_vertex(name: str, data: np.ndarray) -> CompilerVertex:
    assert isinstance(data, np.ndarray)
    tensor_type = TensorType.from_shape(data.shape)

    # Expand the dim for bias or shift-like tensors
    if tensor_type == TensorType.FM and data.ndim == 1:
        data = data.reshape(1, 1, 1, data.shape[0])
    return compiler_operator.constant_vertex(tensor_type, data, name=name)




[docs]
def gen_compiler_weight_vertex(name: str, data: np.ndarray) -> CompilerVertex:
    assert isinstance(data, np.ndarray)
    vertex_name = name + "/weights"
    tensor_type = TensorType.FILTER
    return compiler_operator.constant_vertex(tensor_type, data, name=vertex_name)




[docs]
def gen_compiler_bias_vertex(name: str, data: np.ndarray) -> CompilerVertex:
    vertex_name = name + "/bias"
    return gen_compiler_constant_vertex(vertex_name, data)




[docs]
def gen_compiler_prelu_alpha_vertex(name: str, data: np.ndarray) -> CompilerVertex:
    vertex_name = name + "/alpha"
    return gen_compiler_constant_vertex(vertex_name, cast_weight(data))




[docs]
def gen_compiler_tuple_vertex(name: str, vertices: list[CompilerVertex]) -> CompilerVertex:
    return tuple_placeholder_vertex(vertices, name=name)




[docs]
def gen_compiler_concat_vertex(
    name: str, vertices: list[CompilerVertex], requants: list[BaseRequantization], axis: int,
    split_axis: int | None = None, split_block: int | None = None
) -> CompilerVertex:
    return compiler_operator.concat_vertex(
        vertices, requants, axis=axis, split_axis=split_axis, split_block=split_block, name=name
    )




[docs]
def gen_compiler_prelu_vertex(
    name: str, datav: CompilerVertex, alphav: CompilerVertex, zp: int, shift: int,
    rounding_type: RoundType
) -> CompilerVertex:
    return compiler_operator.prelu_vertex(
        datav, alphav, zp, shift, rounding_type=rounding_type, name=name
    )




[docs]
class ConvVertexProtocol(Protocol):
    """
    A wrapper around a n2a_compiler vertex constructor function for convolution.
    There are several functions for different variants of convolution.  This protocol is used
    to share parts of the compiler interface's implementation.
    """
    def __call__(
        self,
        name: str,
        output_shape: tuple[int, int, int, int],
        data_vertex: CompilerVertex,
        weight_vertex: CompilerVertex,
        bias_vertex: CompilerVertex | None,
        input_zp: int,
        output_zp: int,
        strides: tuple[int, int, int],
        padding: tuple[int, int, int, int, int, int],
        dilation: tuple[int, int, int],
        requant: BaseRequantization[np.ndarray],
        msb_left_shift: bool = False,
        activ: Activation = Activation.NONE,
        is_depthwise: bool = False,
        groups: int = 1,
        clip_range: tuple[int, int] | tuple[float, float] | None = None
    ) -> CompilerVertex:
        ...




[docs]
def gen_compiler_conv2d_vertex(
    name: str,
    output_shape: tuple[int, int, int, int],
    data_vertex: CompilerVertex,
    weight_vertex: CompilerVertex,
    bias_vertex: CompilerVertex | None,
    input_zp: int,
    output_zp: int,
    strides: tuple[int, int, int],
    padding: tuple[int, int, int, int, int, int],
    dilation: tuple[int, int, int],
    requant: BaseRequantization[np.ndarray],
    msb_left_shift: bool = False,
    activ: Activation = Activation.NONE,
    is_depthwise: bool = False,
    groups: int = 1,
    clip_range: tuple[int, int] | tuple[float, float] | None = None,
) -> CompilerVertex:
    # Get convolution parameters
    data_shape = data_vertex.operator.shape.shape
    weight_shape = weight_vertex.operator.shape.shape

    if is_depthwise:
        # Depthwise convolution with groups = 1 is done as a regular convolution.
        assert groups > 1

    assert len(data_shape) == len(output_shape) == 4
    assert len(weight_shape) == 5
    assert data_shape[-1] // groups == weight_shape[-2]
    assert weight_shape[-1] == output_shape[-1]

    conv_params = ConvolutionParameters(
        input_depth=data_shape[0],
        input_height=data_shape[1],
        input_width=data_shape[2],
        input_channels=data_shape[3],
        output_depth=output_shape[0],
        output_height=output_shape[1],
        output_width=output_shape[2],
        output_channels=output_shape[3],
        filter_depth=weight_shape[0],
        filter_height=weight_shape[1],
        filter_width=weight_shape[2],
        padding=padding,
        stride=strides,
        dilation=dilation,
        is_transposed=False,
        num_groups=groups
    )

    return compiler_operator.conv2d_vertex(
        conv_params, data_vertex, weight_vertex, bias_vertex, input_zp, output_zp, requant,
        msb_left_shift=msb_left_shift, activation=activ, name=name, clip_range=clip_range
    )




[docs]
def gen_compiler_add_subtract_vertex(
    name: str,
    lhs_vertex: CompilerVertex,
    rhs_vertex: CompilerVertex,
    in1_scale: int,
    in2_scale: int,
    requant: BaseRequantization[np.ndarray],
    op: str,
    activ: Activation = Activation.NONE,
    clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:

    if activ == Activation.CLIP:
        assert clip_range is not None, "Clip range must be specified if clip activation is used."

    return compiler_operator.add_vertex(
        x=lhs_vertex, y=rhs_vertex, op=op, x_scale=in1_scale, y_scale=in2_scale,
        requantization=requant, activation=activ, clip_range=clip_range, name=name
    )




[docs]
def gen_compiler_mul_vertex(
    name: str,
    lhs_vertex: CompilerVertex,
    rhs_vertex: CompilerVertex,
    zp_lhs: int,
    zp_rhs: int,
    requant: BaseRequantization[np.ndarray],
    intrinsic_shift: int
) -> CompilerVertex:
    return compiler_operator.mul_vertex(
        x=lhs_vertex, y=rhs_vertex, zp_a=zp_lhs, zp_b=zp_rhs, requantization=requant,
        intrinsic_shift=intrinsic_shift, name=name
    )




[docs]
def gen_compiler_arg_min_max_vertex(
    name: str, input_vertex: CompilerVertex, is_max: bool, select_last_index: bool
) -> CompilerVertex:
    return compiler_operator.arg_min_max_vertex(
        input_vertex, ArgMinMaxOp.MAX if is_max else ArgMinMaxOp.MIN,
        select_last_index=select_last_index, name=name
    )




[docs]
def gen_compiler_maxpool_vertex(
    name: str,
    output_shape: tuple[int, int, int, int],
    data_vertex: CompilerVertex,
    pool_size: tuple[int, int, int],
    strides: tuple[int, int, int],
    padding: tuple[int, int, int, int, int, int],
    output_dtype: type = np.int8,
    requant: BaseRequantization | None = None
) -> CompilerVertex:
    """
        Assuming the input data_layout is DHWC
    """
    # Get convolution parameters
    data_shape = data_vertex.operator.shape.shape

    assert is_supported_mla_pool_size(pool_size)
    pool_params = ConvolutionParameters(
        input_depth=data_shape[0],
        input_height=data_shape[1],
        input_width=data_shape[2],
        input_channels=data_shape[3],
        output_depth=output_shape[0],
        output_height=output_shape[1],
        output_width=output_shape[2],
        output_channels=output_shape[3],
        filter_depth=pool_size[0],
        filter_height=pool_size[1],
        filter_width=pool_size[2],
        padding=padding,
        stride=strides,
        dilation=(1, 1, 1),
        is_transposed=False
    )
    requantization = get_id_requantization(output_dtype) if requant is None else requant

    return compiler_operator.pool_vertex(
        pool_params, data_vertex, op="max", requantization=requantization, name=name
    )




[docs]
def gen_compiler_avgpool_vertex(
    name: str,
    output_shape: tuple[int, int, int, int],
    data_vertex: CompilerVertex,
    op: str = "average",
    pool_size: tuple[int, int, int] | None = None,
    strides: tuple[int, int, int] | None = None,
    padding: tuple[int, int, int, int, int, int] | None = None,
    rounding_type: RoundType = RoundType.TRUNC,
    output_dtype: type = np.int8,
    requant: BaseRequantization | None = None,
    pad_value: int = 0
) -> CompilerVertex:
    """
        Assuming the input data_layout is DHWC
    """
    assert op in ["average", "global", "adaptive"]

    data_shape = data_vertex.operator.shape.shape
    if op == "average":
        assert all(e is not None for e in [pool_size, strides, padding])
        assert is_supported_mla_pool_size(pool_size)
    elif op in ["global", "adaptive"]:
        assert all(x == 0 for x in padding)
        assert all(x in (1, y) or z == 1 for x, y, z in zip(strides, data_shape, output_shape))
        strides = (1, 1, 1)

    pool_params = ConvolutionParameters(
        input_depth=data_shape[0],
        input_height=data_shape[1],
        input_width=data_shape[2],
        input_channels=data_shape[3],
        output_depth=output_shape[0],
        output_height=output_shape[1],
        output_width=output_shape[2],
        output_channels=output_shape[3],
        filter_depth=pool_size[0],
        filter_height=pool_size[1],
        filter_width=pool_size[2],
        padding=padding,
        stride=strides,
        dilation=(1, 1, 1),
        is_transposed=False
    )

    if requant is None:
        requantization = pool_requantization(
            pool_size, "average", rounding_type=rounding_type, out_dtype=output_dtype
        )
    else:
        requantization = requant

    return compiler_operator.pool_vertex(
        pool_params, data_vertex, op="average", requantization=requantization,
        ifm_zero_point=pad_value, name=name
    )




[docs]
def gen_compiler_variance_vertex(
    name: str, data_vertex: CompilerVertex, mean_vertex: CompilerVertex,
    requantization: BaseRequantization, req_var: BaseRequantization | None = None
) -> CompilerVertex:
    """
    Assuming the data_layouts are HWC.
    """
    return compiler_operator.variance_vertex(
        input=data_vertex, mean=mean_vertex, requantization=requantization, req_var=req_var,
        name=name
    )




[docs]
def gen_compiler_grid_sample_vertex(
    name: str, data_vertex: CompilerVertex, grid_vertex: CompilerVertex, padding_mode: str,
    align_corners: bool
) -> CompilerVertex:
    """Generate compiler vertex for GridSample.

    Args:
        name: Name of operator node.
        data_vertex: Input data tensor, in channel-last layout.
        grid_vertex: Flow field or grid tensor, in channel-last layout.
        padding_mode: Mode to pad if out of boundary, "zeros" or "border".
            "reflection" is not supported.
        align_corners: Whether to align the four corners, "True" or "False".
    Returns:
        CompilerVertex for GridSample.
    """
    return compiler_operator.grid_sample_vertex(
        ifm=data_vertex,
        grid=grid_vertex,
        padding_mode=padding_mode,
        align_corners=align_corners,
        name=name
    )




[docs]
def gen_compiler_lrn_vertex(
    name: str,
    data_vertex: CompilerVertex,
    lut_vertex: CompilerVertex,
    window_size: int,
    input_zp: int,
    lut_scale: int,
    lut_zp_corr: int,
    lut_sh: int,
    output_scale: int,
    output_zp_corr: int,
    output_sh: int,
    rounding_type: RoundType,
    output_dtype: type = np.int8
) -> CompilerVertex:
    requant_lut = FractionalZeroRequantization(
        lut_scale, lut_zp_corr, Narrowing(lut_sh, rounding_type, output_dtype)
    )
    requant_output = FractionalZeroRequantization(
        output_scale, output_zp_corr, Narrowing(output_sh, rounding_type, output_dtype)
    )
    return compiler_operator.lrn_vertex(
        data_vertex, lut=lut_vertex, window_size=window_size, zp_input=input_zp,
        requant_lut=requant_lut, requant_output=requant_output, name=name
    )




[docs]
def gen_compiler_softmax_vertex(
    name: str, data_vertex: CompilerVertex, lut_exp_vertex: CompilerVertex,
    lut_rec_vertex: CompilerVertex, axis: int, exp_zp: int | None, rec_zp: int | None,
    requant_lut: BaseRequantization[np.ndarray], requant_output: BaseRequantization[np.ndarray],
    lut_input_pre_shift: int | None = None, output_pre_shift: int | None = None
) -> CompilerVertex:
    return compiler_operator.softmax_vertex(
        data_vertex, lut_exp=lut_exp_vertex, lut_rec=lut_rec_vertex, axis=axis, zp_exp=exp_zp,
        zp_rec=rec_zp, requant_lut=requant_lut, requant_output=requant_output,
        lut_input_pre_shift=lut_input_pre_shift, output_pre_shift=output_pre_shift, name=name
    )



def _gen_compiler_conv2d_transpose_vertex(
    name: str,
    output_shape: tuple[int, int, int, int],
    data_vertex: CompilerVertex,
    weight_vertex: CompilerVertex,
    bias_vertex: CompilerVertex | None,
    input_zp: int,
    output_zp: int,
    strides: tuple[int, int, int],
    padding: tuple[int, int, int, int, int, int],
    dilation: tuple[int, int, int],
    requant: BaseRequantization[np.ndarray],
    msb_left_shift: bool = False,
    activ: Activation = Activation.NONE,
    groups: int = 1,
    clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:
    # Get convolution parameters
    data_shape = data_vertex.operator.shape.shape
    weight_shape = weight_vertex.operator.shape.shape

    assert data_shape[-1] == weight_shape[-2]
    assert weight_shape[-1] == output_shape[-1]

    conv_params = ConvolutionParameters(
        input_depth=data_shape[0],
        input_height=data_shape[1],
        input_width=data_shape[2],
        input_channels=data_shape[3],
        output_depth=output_shape[0],
        output_height=output_shape[1],
        output_width=output_shape[2],
        output_channels=output_shape[3],
        filter_depth=weight_shape[0],
        filter_height=weight_shape[1],
        filter_width=weight_shape[2],
        padding=padding,
        stride=strides,
        dilation=dilation,
        is_transposed=True,
        num_groups=groups
    )

    return compiler_operator.transposed_conv2d_vertex(
        conv_params, data_vertex, weight_vertex, bias=bias_vertex, ifm_zero_point=input_zp,
        ofm_zero_point=output_zp, requantization=requant, msb_left_shift=msb_left_shift,
        activation=activ, clip_range=clip_range, name=name
    )


def _gen_compiler_2x_depthwise_conv2d_transpose_vertex(
    name: str,
    output_shape: tuple[int, int, int, int],
    data_vertex: CompilerVertex,
    weight_vertex: CompilerVertex,
    bias_vertex: CompilerVertex | None,
    input_zp: int,
    output_zp: int,
    strides: tuple[int, int, int],
    padding: tuple[int, int, int, int, int, int],
    dilation: tuple[int, int, int],
    requant: BaseRequantization[np.ndarray],
    msb_left_shift: bool = False,
    activ: Activation = Activation.NONE,
    groups: int = 1,
    clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:
    """
    Translate a depthwise transpose conv2d to upscale + depthwise conv2d:
        * upscale: 2x zero-insertion upscale
        * depthwise conv2d
    """

    # Create vertex names
    upscale_name = name + "/zero_insertion_upscale"
    conv2d_name = name + "/depthwise_conv2d"

    data_shape = data_vertex.operator.shape.shape
    weight_shape = weight_vertex.operator.shape.shape

    assert data_shape[-1] == weight_shape[-1] == output_shape[-1]

    # Current compiler limitation.
    assert data_shape[0] == strides[0] == 1

    # Make sure the conv2d_transpose is doing 2x upsample
    is_upscale_h_2x = strides[1] == 2
    is_upscale_w_2x = strides[2] == 2

    # 1st - Create zero-insertion upscale vertex
    if is_upscale_h_2x and is_upscale_w_2x:
        axis = "h_w"
    elif is_upscale_h_2x:
        axis = "height"
    elif is_upscale_w_2x:
        axis = "width"
    else:
        axis = None

    if axis is not None:
        upscale_vertex = compiler_operator.upscale_vertex(
            data_vertex, op="zero", axis=axis, zp=input_zp, rounding=RoundType.TOEVEN,
            name=upscale_name
        )
    else:
        upscale_vertex = data_vertex

    # 2nd - Create depthwise conv2D
    # Unlike simulation through ml_kernels, where paddings, converted from TransposedConv2d to Conv2d,
    # are zero-inserted before calling conv2d with no padding, here zero insertion is done by upscale vertex,
    # which can only resize by (H, W) times stride. Hence, we need an MLA specific adjustment here.
    # Theoretically:
    #   Size after zero insertion = (HW - 1) * stride + 1 = HW * stride - (stride - 1)
    #   NewPadding = K - p - 1
    # MLA implementation:
    #   Size after upscale = HW * stride, which has extra (stride - 1), to be deducted from paddings
    #   NewPadding for top and left = K - p - 1, as usual
    #   NewPadding for bottom and right = K - p - 1 - (stride - 1)
    trans_padding = (
        weight_shape[0] - 1 - padding[0],
        weight_shape[0] - 1 - padding[1] - (strides[0] - 1),
        weight_shape[1] - 1 - padding[2],
        weight_shape[1] - 1 - padding[3] - (strides[1] - 1),
        weight_shape[2] - 1 - padding[4],
        weight_shape[2] - 1 - padding[5] - (strides[2] - 1)
    )

    assert groups == data_shape[-1], (
        "Number of groups is expected to be C for depthwise convolution."
    )
    conv_params = ConvolutionParameters(
        input_depth=data_shape[0],
        input_height=(2 if is_upscale_h_2x else 1) * data_shape[1],
        input_width=(2 if is_upscale_w_2x else 1) * data_shape[2],
        input_channels=data_shape[3],
        output_depth=output_shape[0],
        output_height=output_shape[1],
        output_width=output_shape[2],
        output_channels=output_shape[3],
        filter_depth=weight_shape[0],
        filter_height=weight_shape[1],
        filter_width=weight_shape[2],
        padding=trans_padding,
        stride=(1, 1, 1),
        dilation=(1, 1, 1),
        is_transposed=False,
        num_groups=groups
    )

    return compiler_operator.conv2d_vertex(
        conv_params, upscale_vertex, weight_vertex, bias=bias_vertex, ifm_zero_point=input_zp,
        ofm_zero_point=output_zp, requantization=requant, msb_left_shift=msb_left_shift,
        activation=activ, clip_range=clip_range, name=conv2d_name
    )



[docs]
def gen_compiler_conv2d_transpose_vertex(
    name: str,
    output_shape: tuple[int, int, int, int],
    data_vertex: CompilerVertex,
    weight_vertex: CompilerVertex,
    bias_vertex: CompilerVertex | None,
    input_zp: int,
    output_zp: int,
    strides: tuple[int, int, int],
    padding: tuple[int, int, int, int, int, int],
    dilation: tuple[int, int, int],
    requant: BaseRequantization[np.ndarray],
    msb_left_shift: bool = False,
    activ: Activation = Activation.NONE,
    is_depthwise: bool = False,
    groups: int = 1,
    clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:
    _gen_vertex_func: ConvVertexProtocol
    if is_depthwise:
        _gen_vertex_func = _gen_compiler_2x_depthwise_conv2d_transpose_vertex
    else:
        _gen_vertex_func = _gen_compiler_conv2d_transpose_vertex

    return _gen_vertex_func(
        name, output_shape, data_vertex, weight_vertex, bias_vertex, input_zp, output_zp, strides,
        padding, dilation, requant, msb_left_shift=msb_left_shift, activ=activ, groups=groups,
        clip_range=clip_range
    )




[docs]
def gen_compiler_udf_vertex(
    name: str, data_vertex: CompilerVertex, lut_vertex: CompilerVertex
) -> CompilerVertex:
    return compiler_operator.udf_vertex_table(data_vertex, lut_vertex, name=name)




[docs]
def gen_compiler_erf_vertex(data_vertex: CompilerVertex, name: str) -> CompilerVertex:
    return compiler_operator.erf_vertex(data_vertex, name)




[docs]
def gen_compiler_upsampling_vertex(
    name: str, data_vertex: CompilerVertex, op: str, axis: str, zp: int, rounding: RoundType
) -> CompilerVertex:
    return compiler_operator.upscale_vertex(data_vertex, op, axis, zp, rounding, name)




[docs]
def gen_compiler_resize_general_vertex(
    data_vertex: CompilerVertex, target_spatial_shape: tuple[int, int, int], zp: int,
    rounding: RoundType, mode: str, name: str | None = None
) -> CompilerVertex:
    input_shape: tuple[int, int, int, int] = data_vertex.tensor_shape.shape
    output_shape: tuple[int, int, int, int] = (*target_spatial_shape, input_shape[-1])
    return compiler_operator.resize_general_vertex(
        ifm=data_vertex,
        total_input_shape=input_shape,
        total_output_shape=output_shape,
        zp=zp,
        rounding=rounding,
        name=name,
        mode=mode,
        segment_shape=output_shape,
    )




[docs]
def gen_compiler_resize_integer_factor_vertex(
    data_vertex: CompilerVertex, scaling_factors: tuple[int, ...], zp: int, rounding: RoundType,
    name: str | None = None
) -> CompilerVertex:
    return compiler_operator.resize_integer_factor_vertex(
        data_vertex, scaling_factors, zp, rounding, name
    )




[docs]
def generate_resize_vertex(
    data: CompilerVertex, method: str, target_spatial_shape: tuple[int, int, int], zp: int,
    rounding: str, tf_ver: int = 1, mode: str = 'half_pixel', name: str | None = None
) -> CompilerVertex:
    """
    Generate ResizeGeneralVertex or ResizeIntegerFactorVertex or a series of UpscaleVertex.
    :param data: CompilerVertex that represents input
    :param method: Method used by upsampling algorithm
    :param target_spatial_shape: Spatial shape to which the output needs to be resized.
    :param zp: Zero point to use for zero interpolation
    :param rounding: Rounding mode, one of 'trunc', 'up', 'even'
    :param mode: ONNX coordinate_transformation_mode one of ['half_pixel', 'align_corners', 'asymmetric'].
    :param tf_ver: Version of TF algorithm, 1 or 2
    """
    assert tf_ver in [1, 2]
    output = data
    vertex_id = 0
    scaling_factors: tuple[float, ...] = tuple(
        x / y for x, y in zip(target_spatial_shape, data.operator.output_shape[:-1])
    )
    is_integer_scaling: bool = all(
        x % y == 0 for x, y in zip(target_spatial_shape, data.operator.output_shape[:-1])
    )

    if (
        (not is_integer_scaling or any(x >= 64 for x in scaling_factors) or mode != 'half_pixel')
        and method in ("linear", "bilinear")
    ):
        assert tf_ver == 2, (
            f"Unsupported tf_ver ({tf_ver}) for method ({method}) and scaling factors"
            f"{scaling_factors}, with coordinate_transformation_mode {mode}."
        )
        return gen_compiler_resize_general_vertex(
            data, target_spatial_shape, zp, rounding, mode, name
        )

    scaling_factors: tuple[int, ...] = tuple(int(x) for x in scaling_factors)
    if (
        tf_ver == 2
        and method in ("linear", "bilinear")
        and (
            any(x not in (1, 2, 4) for x in scaling_factors)
            or all(x == 1 for x in scaling_factors)
        )
    ):
        return gen_compiler_resize_integer_factor_vertex(data, scaling_factors, zp, rounding, name)

    log2_scale: tuple[int, ...] = tuple(x.bit_length() - 1 for x in scaling_factors)
    if method == "nearest_neighbor" or tf_ver == 1:
        op = "nearest" if method == "nearest_neighbor" else "linear"
        while log2_scale != (0, 0, 0):
            d, h, w = log2_scale
            axis = [0, 0]

            if h > 0:
                h = h - 1
                axis[0] = 1
            if w > 0:
                w = w - 1
                axis[1] = 1

            axis = {(0, 1): "width", (1, 0): "height", (1, 1): "h_w"}[tuple(axis)]
            vertex_name = f"{name}/{vertex_id}"
            output = gen_compiler_upsampling_vertex(vertex_name, output, op, axis, zp, rounding)
            log2_scale = (d, h, w)
            vertex_id += 1
    else:
        d, h, w = log2_scale
        if h > 2 or w > 2:
            raise NotImplementedError(
                "MLA only support 2x or 4x upscale in H and/or W dimension for Tensorflow V2."
                f" Got {2 ** log2_scale[1]} along H and {2 ** log2_scale[2]} along W"
            )
        while log2_scale != (0, 0, 0):
            d, h, w = log2_scale
            axis = [0, 0]
            if h > 1:
                if h == w:
                    w = w - 2
                    axis[1] = 2
                h = h - 2
                axis[0] = 2
                op = "linear4"
            elif h > 0:
                if h == w:
                    w = w - 1
                    axis[1] = 1
                h = h - 1
                axis[0] = 1
                op = "linear2"
            elif w > 1:
                w = w - 2
                axis[0] = 0
                axis[1] = 2
                op = "linear4"
            elif w > 0:
                w = w - 1
                axis[0] = 0
                axis[1] = 1
                op = "linear2"

            if axis[0] == axis[1]:
                axis = "h_w"
            elif axis[1] > axis[0]:
                axis = "width"
            else:
                axis = "height"
            vertex_name = f"{name}/{vertex_id}"
            output = gen_compiler_upsampling_vertex(vertex_name, output, op, axis, zp, rounding)
            log2_scale = (d, h, w)
            vertex_id += 1
    return output




[docs]
def gen_compiler_relu_vertex(
    name: str, data_vertex: CompilerVertex, node_zp: int
) -> CompilerVertex:
    return compiler_operator.relu_vertex(ifm=data_vertex, zp=node_zp, name=name)




[docs]
def gen_compiler_clip_vertex(
    name: str, data_vertex: CompilerVertex, clip_min: int, clip_max: int
) -> CompilerVertex:
    return compiler_operator.clip_vertex(
        ifm=data_vertex, clip_value_min=clip_min, clip_value_max=clip_max, name=name
    )




[docs]
def gen_compiler_leaky_relu_vertex(
    name: str, data_vertex: CompilerVertex, alpha: int | float, node_zp: int, right_shift: int,
    rounding_type: RoundType
) -> CompilerVertex:
    return compiler_operator.leaky_relu_vertex(
        ifm=data_vertex, alpha=alpha, zp=node_zp, shift=right_shift, rounding_type=rounding_type,
        name=name
    )




[docs]
def gen_compiler_slice_vertex(
    name: str, data_vertex: CompilerVertex, begin: tuple[int, int, int, int],
    size: tuple[int, int, int, int], stride: tuple[int, int, int, int] = (1, 1, 1, 1)
):
    return compiler_operator.slice_vertex(
        ifm=data_vertex, begin=begin, size=size, stride=stride, name=name
    )




[docs]
def gen_compiler_layer_norm_vertex(
    name: str,
    data_vertex: CompilerVertex,
    lut_vertex: CompilerVertex,
    axis: int,
    epsilon: int | None = None,
    rsqrt_zp: int | None = None,
    requant_mean: BaseRequantization[np.ndarray] | None = None,
    requant_lut_input: BaseRequantization[np.ndarray] | None = None,
    requant_output: BaseRequantization[np.ndarray] | None = None
) -> CompilerVertex:
    return compiler_operator.layernorm_vertex(
        data_vertex, lut_rsqrt=lut_vertex, axis=axis, zp_rsqrt=rsqrt_zp, epsilon=epsilon,
        requant_mean=requant_mean, requant_lut=requant_lut_input, requant_output=requant_output,
        name=name
    )




[docs]
def gen_compiler_rms_norm_vertex(
    name: str,
    data_vertex: CompilerVertex,
    lut_vertex: CompilerVertex,
    axis: int,
    *,
    zp_ifm: int | None = None,
    zp_rsqrt: int | None = None,
    requant_lut: BaseRequantization | None = None,
    requant_output: BaseRequantization | None = None,
    lut_input_pre_shift: int | None = None,
    output_pre_shift: int | None = None,
    epsilon: float | None = None
) -> CompilerVertex:
    return compiler_operator.rms_norm_vertex(
        data_vertex, lut_rsqrt=lut_vertex, axis=axis, zp_ifm=zp_ifm, zp_rsqrt=zp_rsqrt,
        requant_lut=requant_lut, requant_output=requant_output,
        lut_input_pre_shift=lut_input_pre_shift, output_pre_shift=output_pre_shift, epsilon=epsilon,
        name=name
    )




[docs]
def gen_compiler_instance_norm_vertex(
    name: str,
    data_vertex: CompilerVertex,
    mean_vertex: CompilerVertex,
    variance_vertex: CompilerVertex,
    lut_rsqrt_vertex: CompilerVertex,
    zp_rsqrt: int | None = None,
    requant_output: BaseRequantization | None = None,
    epsilon: float | None = None
) -> CompilerVertex:
    return compiler_operator.instancenorm_vertex(
        ifm=data_vertex, mean=mean_vertex, var=variance_vertex, lut_rsqrt=lut_rsqrt_vertex,
        zp_rsqrt=zp_rsqrt, requant_output=requant_output, epsilon=epsilon, name=name
    )




[docs]
def gen_compiler_batch_matmul_vertex(
    name: str, lhs: CompilerVertex, rhs: CompilerVertex, transpose_b: bool, input_zps: list[int],
    requant: BaseRequantization, intrinsic_shift: int
) -> CompilerVertex:
    return compiler_operator.batch_matmul_vertex(
        lhs, rhs, transpose_b=transpose_b, ifm_zp=input_zps, requant_ofm=requant,
        intrinsic_shift=intrinsic_shift, name=name
    )




[docs]
def gen_compiler_output_vertex(data_vertex: CompilerVertex) -> CompilerVertex:
    return compiler_operator.output_vertex(data_vertex, name=f"{data_vertex.name}_output")




[docs]
def gen_compiler_broadcast_to_vertex(
    name: str, input_vertex: CompilerVertex, output_shape: tuple[int, int, int, int]
) -> CompilerVertex:
    return compiler_operator.broadcast_vertex(input_vertex, output_shape, name=name)




[docs]
def gen_compiler_requantization_vertex(
    name: str, input_vertex: CompilerVertex, requant: BaseRequantization
) -> CompilerVertex:
    return compiler_operator.requantization_vertex(input_vertex, requant, name=name)




[docs]
def gen_compiler_transpose_vertex(
    name: str, input_vertex: CompilerVertex, perm: tuple[int, int, int, int]
) -> CompilerVertex:
    return compiler_operator.transpose_vertex(input_vertex, perm, name=name)




[docs]
def gen_compiler_depth_to_space_vertex(
    name: str, input_vertex: CompilerVertex, block_size: int, mode: str
) -> CompilerVertex:
    return compiler_operator.depth_to_space_vertex(input_vertex, block_size=block_size, mode=mode, name=name)




[docs]
def gen_compiler_sigmoid_vertex(
    name: str, input_vertex: CompilerVertex, lut_exp: CompilerVertex, lut_rec: CompilerVertex
) -> CompilerVertex:
    return compiler_operator.sigmoid_vertex(input_vertex, lut_exp, lut_rec, name=name)




[docs]
def make_mlc_file_name(output_dir: str, name: str, stage: int) -> str:
    """
    Make mlc file name.

    :param output_dir: Directory where file will be placed
    :param name: Name used as the beginning of the filename
    :param stage: Stage number of the graph, appended to make a unique filename.
    """
    return os.path.join(output_dir, f"{name}_stage{str(stage)}_mla")