Source code for afe.core.configs

#########################################################
# Copyright (C) 2020 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Joey Chou, Onkar Chougule
#########################################################
import os
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Tuple, Union, Any, TypeVar, Generic

from afe._tvm._tvm_graph_partition import CompileMode
from afe.apis.defines import CalibrationMethod, gen1_target, MinMaxMethod
from afe.backends import Backend
from afe.common_utils import parse_indices
from afe.core.graph_analyzer.utils import QuantizedGraphAnalyzerMode, Metric
from afe.ir.defines import NodeName, InputShape, RequantizationMode, BiasCorrectionType
from afe.ir.tensor_type import ScalarType
from afe.ir.utils import transpose_attr_according_to_layout_strings
from sima_utils.common import Platform


@dataclass

[docs]
class ModelConfigs:

[docs]
    name: str


[docs]
    framework: str


[docs]
    input_names: List[str]


[docs]
    input_shapes: List[InputShape]


[docs]
    input_dtypes: List[ScalarType]


[docs]
    layout: str


[docs]
    model_path: str = ""  # Used in cases where there is a single file path


[docs]
    model_file_paths: List[str] = field(default_factory=list)  # Used in cases where there are multiple files


[docs]
    is_quantized: bool = False  # Whether the model is already quantized.  If true, AFE will not quantize it.


[docs]
    output_names: Optional[List[str]] = None


[docs]
    output_directory: Optional[str] = None


[docs]
    toolbox_config: bool = False


    # Generated files location

[docs]
    mlc_files: Optional[str] = None


[docs]
    trace_files: Optional[str] = None


    def __post_init__(self):
        # Yaml does not support tuples, so we must convert tuple to list.
        self.input_shapes = [tuple(shape) for shape in self.input_shapes]


[docs]
    def set_default_output_directory(self, output_directory_path: str):
        if self.output_directory is None:
            self.output_directory = os.path.join(output_directory_path, self.name)



[docs]
    def set_absolute_model_path(self, model_path: str):
        if not self.toolbox_config:
            self.model_path = os.path.join(model_path, self.model_path)
            self.model_file_paths = [os.path.join(model_path, p) for p in self.model_file_paths]


    @property

[docs]
    def shape_dict(self) -> Dict[NodeName, InputShape]:
        return {NodeName(name): shape for name, shape in zip(self.input_names, self.input_shapes)}


    @property

[docs]
    def dtype_dict(self) -> Dict[NodeName, ScalarType]:
        return {NodeName(name): dtype for name, dtype in zip(self.input_names, self.input_dtypes)}


    @property

[docs]
    def input_shapes_hwc(self) -> Tuple[int, int, int]:
        return tuple([transpose_attr_according_to_layout_strings(shape, self.layout, "HWC")
                      for shape in self.input_shapes])


    @property

[docs]
    def shape_dict_hwc(self) -> Dict[NodeName, Tuple[int, int, int]]:
        return {NodeName(name): shape for name, shape in zip(self.input_names, self.input_shapes_hwc)}





[docs]
class QuantizationPrecision(Enum):

[docs]
    INT_8 = "int8"


[docs]
    INT_16 = "int16"


[docs]
    BFLOAT_16 = "bfloat16"


[docs]
    BFLOAT_16_INT8_WEIGHTS = "bfloat16_int8_weights"


[docs]
    BFLOAT_16_INT4_WEIGHTS = "bfloat16_int4_weights"


    @staticmethod

[docs]
    def from_string(precision: str) -> "QuantizationPrecision":
        if precision in ("int8", "int_8"):
            return QuantizationPrecision.INT_8
        elif precision in ("int16", "int_16"):
            return QuantizationPrecision.INT_16
        elif precision in ("bfloat16", "bfloat_16"):
            return QuantizationPrecision.BFLOAT_16
        else:
            raise ValueError(
                f"Unsupported string ({precision}) for conversion to QuantizationPrecision type."
            )



[docs]
    def to_scalar_type(self) -> ScalarType:
        return _QUANTIZATION_PRECISION_TO_SCALAR_TYPE[self]



[docs]
    def to_expected_int_scalar_type(self) -> ScalarType:
        scalar_type = self.to_scalar_type()
        if scalar_type not in (ScalarType.int8, ScalarType.int16):
            raise TypeError(f"Unexpected scalar type: {scalar_type}. "
                            f"Expecting ScalarType.int8 or ScalarType.int16")
        return scalar_type



[docs]
    def is_int8_precision(self) -> bool:
        return self == QuantizationPrecision.INT_8



[docs]
    def is_int16_precision(self) -> bool:
        return self == QuantizationPrecision.INT_16



[docs]
    def is_bfloat16_precision(self) -> bool:
        return self == QuantizationPrecision.BFLOAT_16



[docs]
    def is_bfloat16_with_int8_weights(self) -> bool:
        return self == QuantizationPrecision.BFLOAT_16_INT8_WEIGHTS



[docs]
    def is_bfloat16_with_int_weights(self) -> bool:
        return self in [
            QuantizationPrecision.BFLOAT_16_INT4_WEIGHTS,
            QuantizationPrecision.BFLOAT_16_INT8_WEIGHTS
        ]



_QUANTIZATION_PRECISION_TO_SCALAR_TYPE: Dict[QuantizationPrecision, ScalarType] = {
    QuantizationPrecision.INT_8: ScalarType.int8,
    QuantizationPrecision.INT_16: ScalarType.int16,
    QuantizationPrecision.BFLOAT_16: ScalarType.bfloat16,
    QuantizationPrecision.BFLOAT_16_INT8_WEIGHTS: ScalarType.bfloat16,
    QuantizationPrecision.BFLOAT_16_INT4_WEIGHTS: ScalarType.bfloat16
}

_T = TypeVar("_T")


@dataclass(frozen=True)

[docs]
class EmptyValue:
    """
    An empty value class, used to initialize an empty Opt class.
    """

[docs]
    empty: str = ''




@dataclass(frozen=True)

[docs]
class Opt(Generic[_T]):
    """
    Generic immutable container class having either one value or no value.
    It is used for storing values of QuantizationConfigs fields.
    """

[docs]
    value: Union[_T, EmptyValue] = EmptyValue()



[docs]
    def merge(self, option: "Opt") -> "Opt":
        if self.is_empty():
            return option
        else:
            return self



[docs]
    def get(self):
        if self.is_empty():
            raise ValueError('No value set.')
        return self.value



[docs]
    def is_empty(self):
        return isinstance(self.value, EmptyValue)




@dataclass

[docs]
class QuantizationConfigs:
    """
    Parameters controlling how to quantize a network.

    Instances should be constructed using one of the construction functions, not using
    the class constructor.

    Fields can be overridden for specific nodes using the custom_quantization_configs
    parameter of UpdateQuantizationConfigs.  This parameter is accepted by several
    other functions, as well.  See individual fields for restrictions on overriding.
    """

    # Whether to use asymmetric quantization.  This value may not be overridden for
    # network nodes, as it will lead to inconsistent assumptions about quantization.

[docs]
    asymmetry: Opt[bool] = Opt()


    # Whether to use per-channel quantization in a convolution or matrix multiply.

[docs]
    per_channel: Opt[bool] = Opt()


    # Whether to use UDF in the quantized implementation of LeakyRelu.
    # If False, arithmetic is used.

[docs]
    leaky_relu_uses_udf: Opt[bool] = Opt()


    # Quantization precision used in quantization.  When this precision is not supported,
    # the quantizer will use a different precision.
    # The output_int32 flag overrides this one.

[docs]
    quantization_precision: Opt[QuantizationPrecision] = Opt()


    # Sensitivity ranking for mixed precision quantization.  Nodes with high sensitivity
    # have higher priority to be quantized with high precision.  Nodes with no sensitivity
    # are not candidates for high precision.

[docs]
    quantization_sensitivity: Opt[int] = Opt()


    # Whether to use wide node outputs during quantization.  Only has an effect when
    # quantization_precision's value is INT_16.
    # A quantized node's output type should be int32 if this flag is true, int16 if this flag is false.
    # This affects the quantization pass; subsequent passes may change the type.

[docs]
    intermediate_int32: Opt[bool] = Opt()


    # Type of correction for quantization-induced bias: None/Regular/Iterative (see PA-1029 and https://arxiv.org/abs/1906.04721, Section 4.2)

[docs]
    biascorr_type: Opt[BiasCorrectionType] = Opt()


    # Whether to use the int32 numeric type in the output of convolution related operators.
    # Skip requantization and output the layer in int32 instead of int8.

[docs]
    output_int32: Opt[bool] = Opt()


    # A way of doing quantized arithmetic.

[docs]
    requantization_mode: Opt[RequantizationMode] = Opt()


    # Whether to enable channel equalization.  This value must not be overridden for individual nodes.

[docs]
    channel_equalization: Opt[bool] = Opt()


    # Whether to enable smooth quant.  This value must not be overridden for individual nodes.

[docs]
    smooth_quant: Opt[bool] = Opt()





[docs]
def initialize_empty_quant_config():
    """
    Helper function for initializing empty QuantizationConfigs.
    """
    opt = Opt()
    return QuantizationConfigs(asymmetry=opt,
                               per_channel=opt,
                               leaky_relu_uses_udf=opt,
                               quantization_precision=opt,
                               quantization_sensitivity=opt,
                               intermediate_int32=opt,
                               biascorr_type=opt,
                               output_int32=opt,
                               requantization_mode=opt)




[docs]
def merge_quantization_configs(*, config1: QuantizationConfigs, config2: QuantizationConfigs) -> QuantizationConfigs:
    """
    Merge 2 QuantizationConfigs.
    When merging, values from first config have higher priority and values from second are discarded
    so QuantizationConfigs with higher priority should be config1.
    """

    asymmetry = config1.asymmetry.merge(config2.asymmetry)
    per_channel = config1.per_channel.merge(config2.per_channel)
    leaky_relu_uses_udf = config1.leaky_relu_uses_udf.merge(config2.leaky_relu_uses_udf)
    quantization_precision = config1.quantization_precision.merge(config2.quantization_precision)
    quantization_sensitivity = config1.quantization_precision.merge(config2.quantization_sensitivity)
    intermediate_int32 = config1.intermediate_int32.merge(config2.intermediate_int32)
    biascorr_type = config1.biascorr_type.merge(config2.biascorr_type)
    output_int32 = config1.output_int32.merge(config2.output_int32)
    requantization_mode = config1.requantization_mode.merge(config2.requantization_mode)

    return QuantizationConfigs(asymmetry=asymmetry, per_channel=per_channel, leaky_relu_uses_udf=leaky_relu_uses_udf,
                               quantization_precision=quantization_precision,
                               quantization_sensitivity=quantization_sensitivity, intermediate_int32=intermediate_int32,
                               biascorr_type=biascorr_type,
                               output_int32=output_int32, requantization_mode=requantization_mode)




[docs]
def create_quantization_configs(*, asymmetry: bool = True, per_channel: bool = False, leaky_relu_uses_udf: bool = True,
                                quantization_precision: QuantizationPrecision = QuantizationPrecision.INT_8,
                                quantization_sensitivity: int = 0,
                                requantization_mode: RequantizationMode = RequantizationMode.sima,
                                intermediate_int32: bool = False, biascorr_type: BiasCorrectionType = BiasCorrectionType.NONE,
                                output_int32: bool = False, channel_equalization: bool = False,
                                smooth_quant: bool = False) -> QuantizationConfigs:
    """
    Construct QuantizationConfigs.

    :param asymmetry: Whether to use asymmetric quantization.
    :param per_channel: Whether to use per-channel quantization.
    :param leaky_relu_uses_udf: Whether to use UDF instead of arithmetic instructions for quantization.
    :param quantization_precision: Precision used during quantization.
    :param quantization_sensitivity: Sensitivity for mixed precision quantization.
    :param requantization_mode: A way of doing quantized arithmetic.
    :param intermediate_int32: Whether to use wide node outputs during quantization.
    :param biascorr_type: Method to correct for quantization-induced bias: None/Regular/Iterative.
    :param output_int32:  Whether to use the int32 numeric type in the output of convolution related operators.
    :param channel_equalization: Whether to enable channel equalization.
    :param smooth_quant: Whether to enable smooth quant.
    :return: QuantizationConfigs
    """
    return QuantizationConfigs(asymmetry=Opt(asymmetry), per_channel=Opt(per_channel),
                               leaky_relu_uses_udf=Opt(leaky_relu_uses_udf),
                               quantization_precision=Opt(quantization_precision),
                               quantization_sensitivity=Opt(quantization_sensitivity),
                               intermediate_int32=Opt(intermediate_int32),
                               biascorr_type=Opt(biascorr_type),
                               output_int32=Opt(output_int32),
                               requantization_mode=Opt(requantization_mode),
                               channel_equalization=Opt(channel_equalization),
                               smooth_quant=Opt(smooth_quant))




[docs]
def update_quantization_configs(quantization_configs: QuantizationConfigs,
                                field_name: str, value: Any) -> None:
    """
    Given a field name and a value, if the QuantizationConfigs has attribute name same
    as the given field name, update the attribute with the given value.

    Parameters
    ----------
    :param quantization_configs: QuantizationConfigs.
    :param field_name: str. Name of the target attribute in the given QuantizationConfigs object.
    :param value: Any. Target value that is going to be assigned to the attribute.
    """
    if not hasattr(quantization_configs, field_name):
        raise AttributeError(f"QuantizationConfigs does not have attribute name '{field_name}'.")

    old_value = quantization_configs.__dict__[field_name]
    if not old_value.is_empty() and not isinstance(old_value.get(), type(value)):
        raise AttributeError(f"QuantizationConfigs' {field_name} attribute must have type ",
                             f"{type(old_value.get())}. Got {type(value)}")
    setattr(quantization_configs, field_name, Opt(value))



@dataclass

[docs]
class CompressionConfigs:

[docs]
    compress: bool = False




@dataclass(frozen=True)

[docs]
class CalibrationConfigs:
    """
    Parameters for calibration.

    Instances should be constructed using one of the construction functions.

    Attribute
    ----------
        :attribute calibration_method: CalibrationMethod used during calibration.  See the
            CalibrationMethod Enum class for currently supported methods.
        :attribute num_calibration_samples: int. Limit on number of data samples that we
            use to feed inputs to the AwesomeNet during calibration.  If None, all data samples
            that are passed to calibration are used.
        :attribute percentile_value: Optional[float]. Percentage of values to keep when using histogram percentile.
    """

[docs]
    calibration_method: CalibrationMethod


[docs]
    num_calibration_samples: Optional[int]





[docs]
def create_testcase_calibration_configs(num_calibration_samples: int,
                                        calibration_method: CalibrationMethod = MinMaxMethod()) \
        -> CalibrationConfigs:
    """
    Construct CalibrationConfigs using parameters from the network test configuration data.

    :param num_calibration_samples: Maximum number of calibration data samples to use for calibration.
    :param calibration_method: CalibrationMethod used in calibration. See the CalibrationMethod Enum
        class for supported values.
    :return: Constructed value
    """
    return CalibrationConfigs(calibration_method=calibration_method,
                              num_calibration_samples=num_calibration_samples)




[docs]
def api_calibration_configs(calibration_method: CalibrationMethod = MinMaxMethod()) \
        -> CalibrationConfigs:
    """
    Construct CalibrationConfigs using user-specified parameters.

    :param calibration_method: CalibrationMethod used in calibration.  See the CalibrationMethod
        Enum class for supported values.
    :param percentile_value: Optional[float]. In case of Histogram percentile observer, configures percentage
        of values to keep in the histogram.
    :return: Constructed value
    """
    return CalibrationConfigs(calibration_method=calibration_method,
                              num_calibration_samples=None)




[docs]
class PerfThreshold(object):
    """
    Used to store the threshold value for quantized model performance.
    """

[docs]
    def set_threshold(self, fp32_perf: float) -> float:
        raise NotImplementedError("PerfThreshold class is abstract, use either RelativePerfThreshold or "
                                  "AbsolutePerfThreshold to set the quantized model performance threshold.")




@dataclass

[docs]
class RelativePerfThreshold(PerfThreshold):
    """
    Used to store the threshold value for quantized model performance. The threshold value is given as a value
    relative to the floating-point model performance.

    :param rel_value: float. Quantized model performance threshold value relative to the floating-point model
                      performance.
    """

[docs]
    rel_value: float



[docs]
    def set_threshold(self, fp32_perf: float) -> float:
        """
        Returns the threshold for quantized model performance relative to the floating-point model performance.

        :param fp32_perf: float. Floating-point model performance.
        :return: float. Threshold value relative to floating-point model performance.
        """
        return self.rel_value * fp32_perf




@dataclass

[docs]
class AbsolutePerfThreshold(PerfThreshold):
    """
    Used to store the threshold value for quantized model performance. The threshold value is given as an absolute
    value.

    :param abs_value: float. Quantized model performance threshold value.
    """

[docs]
    abs_value: float



[docs]
    def set_threshold(self, fp32_perf: float) -> float:
        """
        Returns the threshold for quantized model performance.

        :param fp32_perf: float. Unused. Floating-point model performance.
        :return: float. Quantized model performance threshold value.
        """
        return self.abs_value




@dataclass

[docs]
class QuantizationAwarePartitioningConfigs:
    """
    Config used for quantization-aware partitioning

    Attribute
    ----------
        :attribute performance_threshold: PerfThreshold. Value used as a target for quantized model
                                          performance. Given either as an absolute value or as a value
                                          relative to the floating point model performance.
        :attribute target_performance_mode: PerformanceMode. Whether the target performance is given as
                                            an absolute value or as a value relative to the floating
                                            point model performance.
        :attribute max_iterations: int. Maximal number of iterations in the QAP loop. Represents the
                                        maximal number of layers that are to be fixed to floating point
                                        while performing the quantization-aware partitioning.
        :attribute graph_analyzer_mode: QuantizedGraphAnalyzerMode. Graph analysis execution mode.
        :attribute graph_analyzer_metric: str. Metric used in graph analysis.
        :attribute graph_analyzer_number_of_samples: int. Number of input samples to be used in graph
                                                     analysis.
    """

[docs]
    performance_threshold: PerfThreshold = RelativePerfThreshold(0.95)


[docs]
    max_iterations: int = 1


[docs]
    graph_analyzer_mode: QuantizedGraphAnalyzerMode = QuantizedGraphAnalyzerMode.local_feed


[docs]
    graph_analyzer_metric: Metric = Metric.mse


[docs]
    graph_analyzer_number_of_samples: int = 2




@dataclass

[docs]
class OptimizationConfigs:
    """Class for holding the configuration information used by the OptimizerClass"""

[docs]
    strategy: str = 'sequential'


    # Calibration

[docs]
    calibration_configs: CalibrationConfigs = api_calibration_configs()


    # Quantization

[docs]
    quantization_configs: QuantizationConfigs = create_quantization_configs()


    # Compression

[docs]
    compression_configs: CompressionConfigs = CompressionConfigs()




def _process_backend_indices_dict(
        backend_indices_dict: Optional[Dict[Backend, List[Union[int, Tuple[int, ...]]]]] = None
                                  ) -> Optional[Dict[int, Backend]]:
    """
    Parse the backend_indices_dict into an indices_to_backend_dict.

    :param backend_indices_dict: Optional[Dict[Backend, List[Union[int, Tuple[int, ...]]]]].
                                 Default is None. Dictionary of targeted Backend to indices.
                                 For each Backend key, provides the list of indices or index
                                 tuples that are being mapped to the targeted Backend. If the
                                 tuple is being given in the list, all indices in range
                                 tuple[0]..tuple[1] will be mapped to targeted Backend.
    :return: Optional[Dict[int, Backend]]. Dictionary containing index: Backend mapping for
             all layer indices that are mapped to certain Backend.
    """
    if backend_indices_dict is None:
        return {}

    # Handle indices tagged to a Backend by user
    indices_to_backend_dict = {}

    for backend, indices in backend_indices_dict.items():
        parsed_indices = parse_indices(indices)
        for idx in parsed_indices:
            indices_to_backend_dict[idx] = backend
    return indices_to_backend_dict


@dataclass

[docs]
class RunConfigs:
    """
    Configuration parameters for how to execute networks in
    software emulation.

    :attribute fast_mode: If True, use a fast implementation of
       an operator.  Results may not match the result of executing
       on the MLA.
       If False, use an implementation that exactly matches execution on the MLA.
    """

[docs]
    fast_mode: bool = False





[docs]
class ConvertLayoutMethod(Enum):
    """Enumeration specifying the layout conversion algorithm.

    Common processing flow requires the model to be converted to MLA's native
    layout, that is 'NHWC'.  This enumeration specifies what algorithm shall be
    used for layout conversion.  Currently, the 'legacy' algorithm is tested
    and proven on most CNN models.  The 'automated' algorithm is being
    developed and is aimed mostly for ViT models.  The 'none' option is used in
    internal test cases, and it specifies the processing flow where layout
    conversion is skipped.  It should not be used in the general model
    processing pipeline.
    """


[docs]
    NONE = 'none'

    """No layout conversion.  This is backward-compatible value designating the case where layout
    conversion transform is disabled in some test cases.  Since we would want to run the layout
    conversion algorithm for all of the test cases, this value would get deprecated in the future.
    """


[docs]
    LEGACY = 'legacy'

    """Legacy algorithm using TVM's ConvertLayout pass"""


[docs]
    AUTOMATED = 'automated'

    """Algorithm doing automatic rewrite for MLA suported layouts"""

    @staticmethod

[docs]
    def from_str(method: str) -> "ConvertLayoutMethod":
        """Helper method for constructing an enumeration instance from string.

        Args:
            method: String parameter defining the layout conversion method.

        Returns:
            The ConvertLayoutMethod Enum instance.
        """

        match method:
            case 'legacy':
                return ConvertLayoutMethod.LEGACY
            case 'automated':
                return ConvertLayoutMethod.AUTOMATED
            case 'none' | _:
                return ConvertLayoutMethod.NONE




@dataclass

[docs]
class TransformerConfigs:
    """
    Class holding the configuration information used by GraphTransformer and Partitioner.

    Attributes
    ----------
    :attribute convert_layout_method: Specifies the algorithm used for converting the model layout.
    :attribute enable_graph_partition: bool. Whether to apply graph partitioning on the model.
    :attribute indices_to_backend_dict: Optional[Dict[int, afe.backends.Backend]]. Dictionary
                                        containing mapping of layer indices to it's targeted
                                        backend, if any. If a layer {index: target_backend}
                                        pair is present in the dictionary, it means that the
                                        layer with given index will be executed on the
                                        target_backend Backend. If the index is absent from
                                        the dictionary, it means that the layer with that index
                                        will be executed on the highest-priority backend that
                                        is supported for that layer.
    :attribute enable_quantization_based_partitioning: bool. Flag containing information whether
                                                       to apply quantization-based partitioning.
    :attribute requantization_mode: How to convert TVM quantized operators to SiMa IR quantized
        operators.  Only quantized TVM operators that are assigned the MLA are affected.
    :attribute enabled_backends: Which set of backends to assign nodes to in graph partitioning.
            Any assignment in backend_indices_dict overrides this parameter.

    Example
    -------
    The example shows how to create a TransformerConfigs that will convert the layout to NHWC,
    enable the graph partitioning, and set the node with index = [1, 13, 22] to APU:
        backend_indices_dict = {Backend.APU: [1, 13, 22]}
        transformer_configs = TransformerConfigs(convert_layout=True,
                                                 enable_graph_partition=True,
                                                 backend_indices_dict=backend_indices_dict)
    """

[docs]
    convert_layout_method: ConvertLayoutMethod


[docs]
    enable_graph_partition: bool


[docs]
    indices_to_backend_dict: dict[int, Backend]


[docs]
    enable_quantization_based_partitioning: bool


[docs]
    requantization_mode: RequantizationMode


[docs]
    enabled_backends: CompileMode


    def __init__(
            self,
            convert_layout_method: ConvertLayoutMethod = ConvertLayoutMethod.LEGACY,
            enable_graph_partition: bool = True,
            backend_indices_dict: dict[Backend, list[int | tuple[int, ...]]] | None = None,
            enable_quantization_based_partitioning: bool = False, *,
            requantization_mode: RequantizationMode = RequantizationMode.sima,
            enabled_backends: CompileMode = CompileMode.MLA_EV74_CPU
    ) -> None:
        """
        Constructor for the TransformerConfigs object.

        Parameters
        ----------
        :param convert_layout_method: Specifies the algorithm used for converting the model layout.
        :param enable_graph_partition: Whether to apply graph partitioning on the model.
        :param backend_indices_dict: Default is None. Dictionary of targeted Backend to indices.
                                     For each Backend key, provides the list of indices or index tuples
                                     that are being mapped to the targeted Backend. If the tuple is
                                     being given in the list, all indices in range tuple[0]..tuple[1]
                                     will be mapped to targeted Backend. For layers with indices not
                                     covered by the dictionary, the algorithm will select the highest
                                     priority Backend supported for that layer.
        :param enable_quantization_based_partitioning: Default value is False. Set to True if user wants
                                                       to apply quantization-based partitioning.
        :param requantization_mode: How to convert TVM quantized operators to SiMa IR quantized
            operators.  Only quantized operators are affected.
        :param enabled_backends: Which set of backends to assign nodes to in graph partitioning.
            Any assignment in backend_indices_dict overrides this parameter.

        Example
        -------
        The example shows how to create a TransformerConfigs that will convert the layout to NHWC,
        enable the graph partitioning, and set the node with index = [1, 13, 22] to APU:
            backend_indices_dict = {Backend.APU: [1, 13, 22]}
            transformer_configs = TransformerConfigs(convert_layout=True,
                                                     enable_graph_partition=True,
                                                     backend_indices_dict=backend_indices_dict)
        """

        self.convert_layout_method = convert_layout_method
        self.enable_graph_partition = enable_graph_partition
        self.indices_to_backend_dict = _process_backend_indices_dict(backend_indices_dict)
        self.enable_quantization_based_partitioning = enable_quantization_based_partitioning
        self.requantization_mode = requantization_mode
        self.enabled_backends = enabled_backends

    @property

[docs]
    def convert_layout(self) -> bool:
        """Property defining whether layout conversion algorithm is enabled.

        This property is defined due to backward portability issues, and should be used only in
        some helper test functions (i.e. determining whether model inputs and/or outputs should be
        transposed during the test run).  This property should not be used to define any aspect of
        tvm transformations, as it would be deprecated in near future.

        Returns:
            Boolean flag determining whether layout conversion algorithm is run during tvm
            transformations.
        """
        return self.convert_layout_method != ConvertLayoutMethod.NONE




@dataclass

[docs]
class AfeProcessingConfigs:
    """
    Dataclass holding all the configuration information used in end-to-end processing.

    Attributes
    ----------
    :attribute model_configs: ModelConfigs. Configuration information on model that is being processed.
    :attribute transformer_configs: TransformerConfigs. Configuration information on transformations
                                    being used in model processing.
    :attribute optimization_configs: OptimizationConfigs. Configuration information on optimizations
                                     being used in processing.
    :attribute qap_configs: QuantizationAwarePartitioningConfigs. Configuration information being used
                            in quantization-aware partitioning algorithm.
    :attribute target: A target platform that a model is compiled for.
    """

[docs]
    model_configs: ModelConfigs


[docs]
    transformer_configs: TransformerConfigs = TransformerConfigs()


[docs]
    optimization_configs: OptimizationConfigs = OptimizationConfigs()


[docs]
    qap_configs: QuantizationAwarePartitioningConfigs = QuantizationAwarePartitioningConfigs()


[docs]
    target: Platform = gen1_target