#########################################################
# Copyright (C) 2020 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Joey Chou
#########################################################
import os
import numpy as np
import numpy.typing
from typing import Tuple, List, Optional, Protocol
from typing_extensions import assert_never
from afe._tvm._utils import is_supported_mla_pool_size
from afe.ir.defines import InputShape
from afe.ir.utils import transpose_attr_according_to_layout_strings
from afe.backends.mla.afe_to_n2a_compiler.defines import (
PlaceholderName, ConvolutionParameters, TensorType, compiler_operator, tuple_placeholder_vertex
)
from afe.backends.mla.afe_to_n2a_compiler.defines import (
CompilerVertex, CompilerInputShapes, CompilerWeightDtype, CompilerBiasDtype,
QuantizedWeightDtypes, CompilerBiasQuantizedDtype, Narrowing, bfloat16,
get_id_requantization, is_integer_type, normalize, pool_requantization
)
from ml_kernels.math_helpers import RoundType, Activation, ArgMinMaxOp
from ml_kernels.requantization import BaseRequantization, FractionalZeroRequantization
[docs]
def is_32_bit_vertex(vertex: CompilerVertex) -> bool:
"""
Checks if the output of the Vertex is 32-bit.
:param vertex: The Vertex which output type is checked.
:return: True if the Vertex output is 32-bit, otherwise False.
"""
return vertex.tensor_shape.dtype.num_bytes == 4
[docs]
def cast_weight(tensor: np.ndarray) -> np.ndarray:
if is_integer_type(np.dtype(tensor.dtype)):
assert tensor.dtype in QuantizedWeightDtypes
return tensor
else:
return normalize(tensor, CompilerWeightDtype)
[docs]
def cast_bias(tensor: np.ndarray) -> np.ndarray:
if is_integer_type(tensor.dtype):
iinfo = np.iinfo(CompilerBiasQuantizedDtype)
assert iinfo.min <= tensor.min() and tensor.max() <= iinfo.max
return tensor.astype(CompilerBiasQuantizedDtype)
else:
return normalize(tensor, CompilerBiasDtype)
[docs]
def to_conv2d_weights_layout(tensor: np.ndarray) -> np.ndarray:
"""
Transpose the tensor in weight vertex to DHWIO format.
Model SDK uses HWIGO/DHWIGO layout for the weight, where "I" is input channels, "G" is groups,
and "O" is output channels. For the compiler, the weight tensor is reshaped so that G and O
form one dimension.
Depthwise conv2d is treated as grouped conv2d. In this case, the group size is equal to
the number of input channels.
:param tensor: np.ndarray
:param current_layout: The current layout of the tensor
:return: A tensor in DHWIO layout
"""
tensor = cast_weight(tensor)
tensor = np.reshape(tensor, tensor.shape[:-2] + (-1,))
if tensor.ndim == 4:
tensor = np.expand_dims(tensor, axis=0)
return tensor
[docs]
def to_compiler_bias_layout(tensor: np.ndarray) -> np.ndarray:
"""
Squeeze the tensor in bias vertex to 1 dimension
:param tensor: np.ndarray
:return: Flatten tensor with dimension = 1
"""
tensor = cast_bias(tensor)
# When tensor is a 1-element array(ex: output channel is 1)
if tensor.ndim == 0 or tensor.ndim > 1:
# make sure only one dimension has shape > 1
assert [dim > 1 for dim in tensor.shape].count(True) <= 1
ch_size = max(tensor.shape)
tensor = tensor.reshape(ch_size)
assert tensor.ndim == 1
return tensor
[docs]
def shape_to_compiler_data_layout(shape: InputShape, current_layout: str) -> CompilerInputShapes:
"""
Convert the shape to the "HWC".
:param shape: Shapes of the input
:param current_layout: Currently used layout in provided shape
:return: Compiler input shape
"""
match len(shape):
case 4:
# NHWC/NCHW -> DHWC where D = 1.
shape = tuple(
transpose_attr_according_to_layout_strings(
shape, current_layout, desired_layout="HWC"
)
)
shape = (1, *shape)
case 5:
# NDHWC -> DHWC
shape = shape[1:]
case _ as unreachable:
assert_never(unreachable)
return shape
[docs]
def axis_to_compiler_data_layout(axis: int, current_layout: str) -> int:
"""
Convert the axis to the axis in DHWC.
:param axis: Axis of the input.
:param current_layout: Currently used layout in provided axis.
:return: position of axis in DHWC.
"""
s = current_layout[int(axis)]
new_axis = "DHWC".index(s)
assert new_axis is not None and new_axis >= 0, "Error: Invalid axis{}".format(axis)
return new_axis
[docs]
def get_fake_data(shapes: InputShape, dtype: np.typing.DTypeLike) -> np.ndarray:
"""
Generate new tensor without initializing values.
:param shapes: Shape of the tensor that will be created.
:return: Array with specified shape.
"""
return np.empty(shapes, dtype=dtype)
[docs]
def fix_dtype(dtype: type):
"""
Change the float32 dtype to bfloat16.
FIXME: should remove this function when the correct scalar type is passed
to the gen_vertex functions.
"""
assert issubclass(dtype, (np.number, bfloat16))
if dtype == np.float32:
return bfloat16
else:
return dtype
#############################################
# Functions to generate n2a_compiler vertex
#############################################
[docs]
def gen_compiler_placeholder_vertex(name: str, data: np.ndarray) -> CompilerVertex:
return compiler_operator.placeholder_vertex(
TensorType.from_shape(data.shape), data.dtype, data.shape,
PlaceholderName(name), name=name)
[docs]
def gen_compiler_constant_vertex(name: str, data: np.ndarray) -> CompilerVertex:
assert isinstance(data, np.ndarray)
tensor_type = TensorType.from_shape(data.shape)
# Expand the dim for bias or shift-like tensors
if tensor_type == TensorType.FM and data.ndim == 1:
data = data.reshape(1, 1, 1, data.shape[0])
return compiler_operator.constant_vertex(tensor_type, data, name=name)
[docs]
def gen_compiler_weight_vertex(name: str, data: np.ndarray) -> CompilerVertex:
assert isinstance(data, np.ndarray)
vertex_name = name + "/weights"
tensor_type = TensorType.FILTER
return compiler_operator.constant_vertex(tensor_type, data, name=vertex_name)
[docs]
def gen_compiler_bias_vertex(name: str, data: np.ndarray) -> CompilerVertex:
vertex_name = name + "/bias"
return gen_compiler_constant_vertex(vertex_name, data)
[docs]
def gen_compiler_prelu_alpha_vertex(name: str, data: np.ndarray) -> CompilerVertex:
vertex_name = name + "/alpha"
return gen_compiler_constant_vertex(vertex_name, cast_weight(data))
[docs]
def gen_compiler_tuple_vertex(name: str, vertices: list[CompilerVertex]) -> CompilerVertex:
return tuple_placeholder_vertex(vertices, name=name)
[docs]
def gen_compiler_concat_vertex(
name: str, vertices: list[CompilerVertex], requants: list[BaseRequantization], axis: int,
split_axis: int | None = None, split_block: int | None = None
) -> CompilerVertex:
return compiler_operator.concat_vertex(
vertices, requants, axis=axis, split_axis=split_axis, split_block=split_block, name=name
)
[docs]
def gen_compiler_prelu_vertex(
name: str, datav: CompilerVertex, alphav: CompilerVertex, zp: int, shift: int,
rounding_type: RoundType
) -> CompilerVertex:
return compiler_operator.prelu_vertex(
datav, alphav, zp, shift, rounding_type=rounding_type, name=name
)
[docs]
class ConvVertexProtocol(Protocol):
"""
A wrapper around a n2a_compiler vertex constructor function for convolution.
There are several functions for different variants of convolution. This protocol is used
to share parts of the compiler interface's implementation.
"""
def __call__(
self,
name: str,
output_shape: tuple[int, int, int, int],
data_vertex: CompilerVertex,
weight_vertex: CompilerVertex,
bias_vertex: CompilerVertex | None,
input_zp: int,
output_zp: int,
strides: tuple[int, int, int],
padding: tuple[int, int, int, int, int, int],
dilation: tuple[int, int, int],
requant: BaseRequantization[np.ndarray],
msb_left_shift: bool = False,
activ: Activation = Activation.NONE,
is_depthwise: bool = False,
groups: int = 1,
clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:
...
[docs]
def gen_compiler_conv2d_vertex(
name: str,
output_shape: tuple[int, int, int, int],
data_vertex: CompilerVertex,
weight_vertex: CompilerVertex,
bias_vertex: CompilerVertex | None,
input_zp: int,
output_zp: int,
strides: tuple[int, int, int],
padding: tuple[int, int, int, int, int, int],
dilation: tuple[int, int, int],
requant: BaseRequantization[np.ndarray],
msb_left_shift: bool = False,
activ: Activation = Activation.NONE,
is_depthwise: bool = False,
groups: int = 1,
clip_range: tuple[int, int] | tuple[float, float] | None = None,
) -> CompilerVertex:
# Get convolution parameters
data_shape = data_vertex.operator.shape.shape
weight_shape = weight_vertex.operator.shape.shape
if is_depthwise:
# Depthwise convolution with groups = 1 is done as a regular convolution.
assert groups > 1
assert len(data_shape) == len(output_shape) == 4
assert len(weight_shape) == 5
assert data_shape[-1] // groups == weight_shape[-2]
assert weight_shape[-1] == output_shape[-1]
conv_params = ConvolutionParameters(
input_depth=data_shape[0],
input_height=data_shape[1],
input_width=data_shape[2],
input_channels=data_shape[3],
output_depth=output_shape[0],
output_height=output_shape[1],
output_width=output_shape[2],
output_channels=output_shape[3],
filter_depth=weight_shape[0],
filter_height=weight_shape[1],
filter_width=weight_shape[2],
padding=padding,
stride=strides,
dilation=dilation,
is_transposed=False,
num_groups=groups
)
return compiler_operator.conv2d_vertex(
conv_params, data_vertex, weight_vertex, bias_vertex, input_zp, output_zp, requant,
msb_left_shift=msb_left_shift, activation=activ, name=name, clip_range=clip_range
)
[docs]
def gen_compiler_add_subtract_vertex(
name: str,
lhs_vertex: CompilerVertex,
rhs_vertex: CompilerVertex,
in1_scale: int,
in2_scale: int,
requant: BaseRequantization[np.ndarray],
op: str,
activ: Activation = Activation.NONE,
clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:
if activ == Activation.CLIP:
assert clip_range is not None, "Clip range must be specified if clip activation is used."
return compiler_operator.add_vertex(
x=lhs_vertex, y=rhs_vertex, op=op, x_scale=in1_scale, y_scale=in2_scale,
requantization=requant, activation=activ, clip_range=clip_range, name=name
)
[docs]
def gen_compiler_mul_vertex(
name: str,
lhs_vertex: CompilerVertex,
rhs_vertex: CompilerVertex,
zp_lhs: int,
zp_rhs: int,
requant: BaseRequantization[np.ndarray],
intrinsic_shift: int
) -> CompilerVertex:
return compiler_operator.mul_vertex(
x=lhs_vertex, y=rhs_vertex, zp_a=zp_lhs, zp_b=zp_rhs, requantization=requant,
intrinsic_shift=intrinsic_shift, name=name
)
[docs]
def gen_compiler_arg_min_max_vertex(
name: str, input_vertex: CompilerVertex, is_max: bool, select_last_index: bool
) -> CompilerVertex:
return compiler_operator.arg_min_max_vertex(
input_vertex, ArgMinMaxOp.MAX if is_max else ArgMinMaxOp.MIN,
select_last_index=select_last_index, name=name
)
[docs]
def gen_compiler_maxpool_vertex(
name: str,
output_shape: tuple[int, int, int, int],
data_vertex: CompilerVertex,
pool_size: tuple[int, int, int],
strides: tuple[int, int, int],
padding: tuple[int, int, int, int, int, int],
output_dtype: type = np.int8,
requant: BaseRequantization | None = None
) -> CompilerVertex:
"""
Assuming the input data_layout is DHWC
"""
# Get convolution parameters
data_shape = data_vertex.operator.shape.shape
assert is_supported_mla_pool_size(pool_size)
pool_params = ConvolutionParameters(
input_depth=data_shape[0],
input_height=data_shape[1],
input_width=data_shape[2],
input_channels=data_shape[3],
output_depth=output_shape[0],
output_height=output_shape[1],
output_width=output_shape[2],
output_channels=output_shape[3],
filter_depth=pool_size[0],
filter_height=pool_size[1],
filter_width=pool_size[2],
padding=padding,
stride=strides,
dilation=(1, 1, 1),
is_transposed=False
)
requantization = get_id_requantization(output_dtype) if requant is None else requant
return compiler_operator.pool_vertex(
pool_params, data_vertex, op="max", requantization=requantization, name=name
)
[docs]
def gen_compiler_avgpool_vertex(
name: str,
output_shape: tuple[int, int, int, int],
data_vertex: CompilerVertex,
op: str = "average",
pool_size: tuple[int, int, int] | None = None,
strides: tuple[int, int, int] | None = None,
padding: tuple[int, int, int, int, int, int] | None = None,
rounding_type: RoundType = RoundType.TRUNC,
output_dtype: type = np.int8,
requant: BaseRequantization | None = None,
pad_value: int = 0
) -> CompilerVertex:
"""
Assuming the input data_layout is DHWC
"""
assert op in ["average", "global", "adaptive"]
data_shape = data_vertex.operator.shape.shape
if op == "average":
assert all(e is not None for e in [pool_size, strides, padding])
assert is_supported_mla_pool_size(pool_size)
elif op in ["global", "adaptive"]:
assert all(x == 0 for x in padding)
assert all(x in (1, y) or z == 1 for x, y, z in zip(strides, data_shape, output_shape))
strides = (1, 1, 1)
pool_params = ConvolutionParameters(
input_depth=data_shape[0],
input_height=data_shape[1],
input_width=data_shape[2],
input_channels=data_shape[3],
output_depth=output_shape[0],
output_height=output_shape[1],
output_width=output_shape[2],
output_channels=output_shape[3],
filter_depth=pool_size[0],
filter_height=pool_size[1],
filter_width=pool_size[2],
padding=padding,
stride=strides,
dilation=(1, 1, 1),
is_transposed=False
)
if requant is None:
requantization = pool_requantization(
pool_size, "average", rounding_type=rounding_type, out_dtype=output_dtype
)
else:
requantization = requant
return compiler_operator.pool_vertex(
pool_params, data_vertex, op="average", requantization=requantization,
ifm_zero_point=pad_value, name=name
)
[docs]
def gen_compiler_variance_vertex(
name: str, data_vertex: CompilerVertex, mean_vertex: CompilerVertex,
requantization: BaseRequantization, req_var: BaseRequantization | None = None
) -> CompilerVertex:
"""
Assuming the data_layouts are HWC.
"""
return compiler_operator.variance_vertex(
input=data_vertex, mean=mean_vertex, requantization=requantization, req_var=req_var,
name=name
)
[docs]
def gen_compiler_grid_sample_vertex(
name: str, data_vertex: CompilerVertex, grid_vertex: CompilerVertex, padding_mode: str,
align_corners: bool
) -> CompilerVertex:
"""Generate compiler vertex for GridSample.
Args:
name: Name of operator node.
data_vertex: Input data tensor, in channel-last layout.
grid_vertex: Flow field or grid tensor, in channel-last layout.
padding_mode: Mode to pad if out of boundary, "zeros" or "border".
"reflection" is not supported.
align_corners: Whether to align the four corners, "True" or "False".
Returns:
CompilerVertex for GridSample.
"""
return compiler_operator.grid_sample_vertex(
ifm=data_vertex,
grid=grid_vertex,
padding_mode=padding_mode,
align_corners=align_corners,
name=name
)
[docs]
def gen_compiler_lrn_vertex(
name: str,
data_vertex: CompilerVertex,
lut_vertex: CompilerVertex,
window_size: int,
input_zp: int,
lut_scale: int,
lut_zp_corr: int,
lut_sh: int,
output_scale: int,
output_zp_corr: int,
output_sh: int,
rounding_type: RoundType,
output_dtype: type = np.int8
) -> CompilerVertex:
requant_lut = FractionalZeroRequantization(
lut_scale, lut_zp_corr, Narrowing(lut_sh, rounding_type, output_dtype)
)
requant_output = FractionalZeroRequantization(
output_scale, output_zp_corr, Narrowing(output_sh, rounding_type, output_dtype)
)
return compiler_operator.lrn_vertex(
data_vertex, lut=lut_vertex, window_size=window_size, zp_input=input_zp,
requant_lut=requant_lut, requant_output=requant_output, name=name
)
[docs]
def gen_compiler_softmax_vertex(
name: str, data_vertex: CompilerVertex, lut_exp_vertex: CompilerVertex,
lut_rec_vertex: CompilerVertex, axis: int, exp_zp: int | None, rec_zp: int | None,
requant_lut: BaseRequantization[np.ndarray], requant_output: BaseRequantization[np.ndarray],
lut_input_pre_shift: int | None = None, output_pre_shift: int | None = None
) -> CompilerVertex:
return compiler_operator.softmax_vertex(
data_vertex, lut_exp=lut_exp_vertex, lut_rec=lut_rec_vertex, axis=axis, zp_exp=exp_zp,
zp_rec=rec_zp, requant_lut=requant_lut, requant_output=requant_output,
lut_input_pre_shift=lut_input_pre_shift, output_pre_shift=output_pre_shift, name=name
)
def _gen_compiler_conv2d_transpose_vertex(
name: str,
output_shape: tuple[int, int, int, int],
data_vertex: CompilerVertex,
weight_vertex: CompilerVertex,
bias_vertex: CompilerVertex | None,
input_zp: int,
output_zp: int,
strides: tuple[int, int, int],
padding: tuple[int, int, int, int, int, int],
dilation: tuple[int, int, int],
requant: BaseRequantization[np.ndarray],
msb_left_shift: bool = False,
activ: Activation = Activation.NONE,
groups: int = 1,
clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:
# Get convolution parameters
data_shape = data_vertex.operator.shape.shape
weight_shape = weight_vertex.operator.shape.shape
assert data_shape[-1] == weight_shape[-2]
assert weight_shape[-1] == output_shape[-1]
conv_params = ConvolutionParameters(
input_depth=data_shape[0],
input_height=data_shape[1],
input_width=data_shape[2],
input_channels=data_shape[3],
output_depth=output_shape[0],
output_height=output_shape[1],
output_width=output_shape[2],
output_channels=output_shape[3],
filter_depth=weight_shape[0],
filter_height=weight_shape[1],
filter_width=weight_shape[2],
padding=padding,
stride=strides,
dilation=dilation,
is_transposed=True,
num_groups=groups
)
return compiler_operator.transposed_conv2d_vertex(
conv_params, data_vertex, weight_vertex, bias=bias_vertex, ifm_zero_point=input_zp,
ofm_zero_point=output_zp, requantization=requant, msb_left_shift=msb_left_shift,
activation=activ, clip_range=clip_range, name=name
)
def _gen_compiler_2x_depthwise_conv2d_transpose_vertex(
name: str,
output_shape: tuple[int, int, int, int],
data_vertex: CompilerVertex,
weight_vertex: CompilerVertex,
bias_vertex: CompilerVertex | None,
input_zp: int,
output_zp: int,
strides: tuple[int, int, int],
padding: tuple[int, int, int, int, int, int],
dilation: tuple[int, int, int],
requant: BaseRequantization[np.ndarray],
msb_left_shift: bool = False,
activ: Activation = Activation.NONE,
groups: int = 1,
clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:
"""
Translate a depthwise transpose conv2d to upscale + depthwise conv2d:
* upscale: 2x zero-insertion upscale
* depthwise conv2d
"""
# Create vertex names
upscale_name = name + "/zero_insertion_upscale"
conv2d_name = name + "/depthwise_conv2d"
data_shape = data_vertex.operator.shape.shape
weight_shape = weight_vertex.operator.shape.shape
assert data_shape[-1] == weight_shape[-1] == output_shape[-1]
# Current compiler limitation.
assert data_shape[0] == strides[0] == 1
# Make sure the conv2d_transpose is doing 2x upsample
is_upscale_h_2x = strides[1] == 2
is_upscale_w_2x = strides[2] == 2
# 1st - Create zero-insertion upscale vertex
if is_upscale_h_2x and is_upscale_w_2x:
axis = "h_w"
elif is_upscale_h_2x:
axis = "height"
elif is_upscale_w_2x:
axis = "width"
else:
axis = None
if axis is not None:
upscale_vertex = compiler_operator.upscale_vertex(
data_vertex, op="zero", axis=axis, zp=input_zp, rounding=RoundType.TOEVEN,
name=upscale_name
)
else:
upscale_vertex = data_vertex
# 2nd - Create depthwise conv2D
# Unlike simulation through ml_kernels, where paddings, converted from TransposedConv2d to Conv2d,
# are zero-inserted before calling conv2d with no padding, here zero insertion is done by upscale vertex,
# which can only resize by (H, W) times stride. Hence, we need an MLA specific adjustment here.
# Theoretically:
# Size after zero insertion = (HW - 1) * stride + 1 = HW * stride - (stride - 1)
# NewPadding = K - p - 1
# MLA implementation:
# Size after upscale = HW * stride, which has extra (stride - 1), to be deducted from paddings
# NewPadding for top and left = K - p - 1, as usual
# NewPadding for bottom and right = K - p - 1 - (stride - 1)
trans_padding = (
weight_shape[0] - 1 - padding[0],
weight_shape[0] - 1 - padding[1] - (strides[0] - 1),
weight_shape[1] - 1 - padding[2],
weight_shape[1] - 1 - padding[3] - (strides[1] - 1),
weight_shape[2] - 1 - padding[4],
weight_shape[2] - 1 - padding[5] - (strides[2] - 1)
)
assert groups == data_shape[-1], (
"Number of groups is expected to be C for depthwise convolution."
)
conv_params = ConvolutionParameters(
input_depth=data_shape[0],
input_height=(2 if is_upscale_h_2x else 1) * data_shape[1],
input_width=(2 if is_upscale_w_2x else 1) * data_shape[2],
input_channels=data_shape[3],
output_depth=output_shape[0],
output_height=output_shape[1],
output_width=output_shape[2],
output_channels=output_shape[3],
filter_depth=weight_shape[0],
filter_height=weight_shape[1],
filter_width=weight_shape[2],
padding=trans_padding,
stride=(1, 1, 1),
dilation=(1, 1, 1),
is_transposed=False,
num_groups=groups
)
return compiler_operator.conv2d_vertex(
conv_params, upscale_vertex, weight_vertex, bias=bias_vertex, ifm_zero_point=input_zp,
ofm_zero_point=output_zp, requantization=requant, msb_left_shift=msb_left_shift,
activation=activ, clip_range=clip_range, name=conv2d_name
)
[docs]
def gen_compiler_conv2d_transpose_vertex(
name: str,
output_shape: tuple[int, int, int, int],
data_vertex: CompilerVertex,
weight_vertex: CompilerVertex,
bias_vertex: CompilerVertex | None,
input_zp: int,
output_zp: int,
strides: tuple[int, int, int],
padding: tuple[int, int, int, int, int, int],
dilation: tuple[int, int, int],
requant: BaseRequantization[np.ndarray],
msb_left_shift: bool = False,
activ: Activation = Activation.NONE,
is_depthwise: bool = False,
groups: int = 1,
clip_range: tuple[int, int] | tuple[float, float] | None = None
) -> CompilerVertex:
_gen_vertex_func: ConvVertexProtocol
if is_depthwise:
_gen_vertex_func = _gen_compiler_2x_depthwise_conv2d_transpose_vertex
else:
_gen_vertex_func = _gen_compiler_conv2d_transpose_vertex
return _gen_vertex_func(
name, output_shape, data_vertex, weight_vertex, bias_vertex, input_zp, output_zp, strides,
padding, dilation, requant, msb_left_shift=msb_left_shift, activ=activ, groups=groups,
clip_range=clip_range
)
[docs]
def gen_compiler_udf_vertex(
name: str, data_vertex: CompilerVertex, lut_vertex: CompilerVertex
) -> CompilerVertex:
return compiler_operator.udf_vertex_table(data_vertex, lut_vertex, name=name)
[docs]
def gen_compiler_erf_vertex(data_vertex: CompilerVertex, name: str) -> CompilerVertex:
return compiler_operator.erf_vertex(data_vertex, name)
[docs]
def gen_compiler_upsampling_vertex(
name: str, data_vertex: CompilerVertex, op: str, axis: str, zp: int, rounding: RoundType
) -> CompilerVertex:
return compiler_operator.upscale_vertex(data_vertex, op, axis, zp, rounding, name)
[docs]
def gen_compiler_resize_general_vertex(
data_vertex: CompilerVertex, target_spatial_shape: tuple[int, int, int], zp: int,
rounding: RoundType, mode: str, name: str | None = None
) -> CompilerVertex:
input_shape: tuple[int, int, int, int] = data_vertex.tensor_shape.shape
output_shape: tuple[int, int, int, int] = (*target_spatial_shape, input_shape[-1])
return compiler_operator.resize_general_vertex(
ifm=data_vertex,
total_input_shape=input_shape,
total_output_shape=output_shape,
zp=zp,
rounding=rounding,
name=name,
mode=mode,
segment_shape=output_shape,
)
[docs]
def gen_compiler_resize_integer_factor_vertex(
data_vertex: CompilerVertex, scaling_factors: tuple[int, ...], zp: int, rounding: RoundType,
name: str | None = None
) -> CompilerVertex:
return compiler_operator.resize_integer_factor_vertex(
data_vertex, scaling_factors, zp, rounding, name
)
[docs]
def generate_resize_vertex(
data: CompilerVertex, method: str, target_spatial_shape: tuple[int, int, int], zp: int,
rounding: str, tf_ver: int = 1, mode: str = 'half_pixel', name: str | None = None
) -> CompilerVertex:
"""
Generate ResizeGeneralVertex or ResizeIntegerFactorVertex or a series of UpscaleVertex.
:param data: CompilerVertex that represents input
:param method: Method used by upsampling algorithm
:param target_spatial_shape: Spatial shape to which the output needs to be resized.
:param zp: Zero point to use for zero interpolation
:param rounding: Rounding mode, one of 'trunc', 'up', 'even'
:param mode: ONNX coordinate_transformation_mode one of ['half_pixel', 'align_corners', 'asymmetric'].
:param tf_ver: Version of TF algorithm, 1 or 2
"""
assert tf_ver in [1, 2]
output = data
vertex_id = 0
scaling_factors: tuple[float, ...] = tuple(
x / y for x, y in zip(target_spatial_shape, data.operator.output_shape[:-1])
)
is_integer_scaling: bool = all(
x % y == 0 for x, y in zip(target_spatial_shape, data.operator.output_shape[:-1])
)
if (
(not is_integer_scaling or any(x >= 64 for x in scaling_factors) or mode != 'half_pixel')
and method in ("linear", "bilinear")
):
assert tf_ver == 2, (
f"Unsupported tf_ver ({tf_ver}) for method ({method}) and scaling factors"
f"{scaling_factors}, with coordinate_transformation_mode {mode}."
)
return gen_compiler_resize_general_vertex(
data, target_spatial_shape, zp, rounding, mode, name
)
scaling_factors: tuple[int, ...] = tuple(int(x) for x in scaling_factors)
if (
tf_ver == 2
and method in ("linear", "bilinear")
and (
any(x not in (1, 2, 4) for x in scaling_factors)
or all(x == 1 for x in scaling_factors)
)
):
return gen_compiler_resize_integer_factor_vertex(data, scaling_factors, zp, rounding, name)
log2_scale: tuple[int, ...] = tuple(x.bit_length() - 1 for x in scaling_factors)
if method == "nearest_neighbor" or tf_ver == 1:
op = "nearest" if method == "nearest_neighbor" else "linear"
while log2_scale != (0, 0, 0):
d, h, w = log2_scale
axis = [0, 0]
if h > 0:
h = h - 1
axis[0] = 1
if w > 0:
w = w - 1
axis[1] = 1
axis = {(0, 1): "width", (1, 0): "height", (1, 1): "h_w"}[tuple(axis)]
vertex_name = f"{name}/{vertex_id}"
output = gen_compiler_upsampling_vertex(vertex_name, output, op, axis, zp, rounding)
log2_scale = (d, h, w)
vertex_id += 1
else:
d, h, w = log2_scale
if h > 2 or w > 2:
raise NotImplementedError(
"MLA only support 2x or 4x upscale in H and/or W dimension for Tensorflow V2."
f" Got {2 ** log2_scale[1]} along H and {2 ** log2_scale[2]} along W"
)
while log2_scale != (0, 0, 0):
d, h, w = log2_scale
axis = [0, 0]
if h > 1:
if h == w:
w = w - 2
axis[1] = 2
h = h - 2
axis[0] = 2
op = "linear4"
elif h > 0:
if h == w:
w = w - 1
axis[1] = 1
h = h - 1
axis[0] = 1
op = "linear2"
elif w > 1:
w = w - 2
axis[0] = 0
axis[1] = 2
op = "linear4"
elif w > 0:
w = w - 1
axis[0] = 0
axis[1] = 1
op = "linear2"
if axis[0] == axis[1]:
axis = "h_w"
elif axis[1] > axis[0]:
axis = "width"
else:
axis = "height"
vertex_name = f"{name}/{vertex_id}"
output = gen_compiler_upsampling_vertex(vertex_name, output, op, axis, zp, rounding)
log2_scale = (d, h, w)
vertex_id += 1
return output
[docs]
def gen_compiler_relu_vertex(
name: str, data_vertex: CompilerVertex, node_zp: int
) -> CompilerVertex:
return compiler_operator.relu_vertex(ifm=data_vertex, zp=node_zp, name=name)
[docs]
def gen_compiler_clip_vertex(
name: str, data_vertex: CompilerVertex, clip_min: int, clip_max: int
) -> CompilerVertex:
return compiler_operator.clip_vertex(
ifm=data_vertex, clip_value_min=clip_min, clip_value_max=clip_max, name=name
)
[docs]
def gen_compiler_leaky_relu_vertex(
name: str, data_vertex: CompilerVertex, alpha: int | float, node_zp: int, right_shift: int,
rounding_type: RoundType
) -> CompilerVertex:
return compiler_operator.leaky_relu_vertex(
ifm=data_vertex, alpha=alpha, zp=node_zp, shift=right_shift, rounding_type=rounding_type,
name=name
)
[docs]
def gen_compiler_slice_vertex(
name: str, data_vertex: CompilerVertex, begin: tuple[int, int, int, int],
size: tuple[int, int, int, int], stride: tuple[int, int, int, int] = (1, 1, 1, 1)
):
return compiler_operator.slice_vertex(
ifm=data_vertex, begin=begin, size=size, stride=stride, name=name
)
[docs]
def gen_compiler_layer_norm_vertex(
name: str,
data_vertex: CompilerVertex,
lut_vertex: CompilerVertex,
axis: int,
epsilon: int | None = None,
rsqrt_zp: int | None = None,
requant_mean: BaseRequantization[np.ndarray] | None = None,
requant_lut_input: BaseRequantization[np.ndarray] | None = None,
requant_output: BaseRequantization[np.ndarray] | None = None
) -> CompilerVertex:
return compiler_operator.layernorm_vertex(
data_vertex, lut_rsqrt=lut_vertex, axis=axis, zp_rsqrt=rsqrt_zp, epsilon=epsilon,
requant_mean=requant_mean, requant_lut=requant_lut_input, requant_output=requant_output,
name=name
)
[docs]
def gen_compiler_rms_norm_vertex(
name: str,
data_vertex: CompilerVertex,
lut_vertex: CompilerVertex,
axis: int,
*,
zp_ifm: int | None = None,
zp_rsqrt: int | None = None,
requant_lut: BaseRequantization | None = None,
requant_output: BaseRequantization | None = None,
lut_input_pre_shift: int | None = None,
output_pre_shift: int | None = None,
epsilon: float | None = None
) -> CompilerVertex:
return compiler_operator.rms_norm_vertex(
data_vertex, lut_rsqrt=lut_vertex, axis=axis, zp_ifm=zp_ifm, zp_rsqrt=zp_rsqrt,
requant_lut=requant_lut, requant_output=requant_output,
lut_input_pre_shift=lut_input_pre_shift, output_pre_shift=output_pre_shift, epsilon=epsilon,
name=name
)
[docs]
def gen_compiler_instance_norm_vertex(
name: str,
data_vertex: CompilerVertex,
mean_vertex: CompilerVertex,
variance_vertex: CompilerVertex,
lut_rsqrt_vertex: CompilerVertex,
zp_rsqrt: int | None = None,
requant_output: BaseRequantization | None = None,
epsilon: float | None = None
) -> CompilerVertex:
return compiler_operator.instancenorm_vertex(
ifm=data_vertex, mean=mean_vertex, var=variance_vertex, lut_rsqrt=lut_rsqrt_vertex,
zp_rsqrt=zp_rsqrt, requant_output=requant_output, epsilon=epsilon, name=name
)
[docs]
def gen_compiler_batch_matmul_vertex(
name: str, lhs: CompilerVertex, rhs: CompilerVertex, transpose_b: bool, input_zps: list[int],
requant: BaseRequantization, intrinsic_shift: int
) -> CompilerVertex:
return compiler_operator.batch_matmul_vertex(
lhs, rhs, transpose_b=transpose_b, ifm_zp=input_zps, requant_ofm=requant,
intrinsic_shift=intrinsic_shift, name=name
)
[docs]
def gen_compiler_output_vertex(data_vertex: CompilerVertex) -> CompilerVertex:
return compiler_operator.output_vertex(data_vertex, name=f"{data_vertex.name}_output")
[docs]
def gen_compiler_broadcast_to_vertex(
name: str, input_vertex: CompilerVertex, output_shape: tuple[int, int, int, int]
) -> CompilerVertex:
return compiler_operator.broadcast_vertex(input_vertex, output_shape, name=name)
[docs]
def gen_compiler_requantization_vertex(
name: str, input_vertex: CompilerVertex, requant: BaseRequantization
) -> CompilerVertex:
return compiler_operator.requantization_vertex(input_vertex, requant, name=name)
[docs]
def gen_compiler_transpose_vertex(
name: str, input_vertex: CompilerVertex, perm: tuple[int, int, int, int]
) -> CompilerVertex:
return compiler_operator.transpose_vertex(input_vertex, perm, name=name)
[docs]
def gen_compiler_depth_to_space_vertex(
name: str, input_vertex: CompilerVertex, block_size: int, mode: str
) -> CompilerVertex:
return compiler_operator.depth_to_space_vertex(input_vertex, block_size=block_size, mode=mode, name=name)
[docs]
def gen_compiler_sigmoid_vertex(
name: str, input_vertex: CompilerVertex, lut_exp: CompilerVertex, lut_rec: CompilerVertex
) -> CompilerVertex:
return compiler_operator.sigmoid_vertex(input_vertex, lut_exp, lut_rec, name=name)
[docs]
def make_mlc_file_name(output_dir: str, name: str, stage: int) -> str:
"""
Make mlc file name.
:param output_dir: Directory where file will be placed
:param name: Name used as the beginning of the filename
:param stage: Stage number of the graph, appended to make a unique filename.
"""
return os.path.join(output_dir, f"{name}_stage{str(stage)}_mla")