Source code for afe.core.configs

#########################################################
# Copyright (C) 2020 SiMa Technologies, Inc.
#
# This material is SiMa proprietary and confidential.
#
# This material may not be copied or distributed without
# the express prior written permission of SiMa.
#
# All rights reserved.
#########################################################
# Code owner: Joey Chou, Onkar Chougule
#########################################################
import os
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Tuple, Union, Any, TypeVar, Generic

from afe._tvm._tvm_graph_partition import CompileMode
from afe.apis.defines import CalibrationMethod, gen1_target, MinMaxMethod
from afe.backends import Backend
from afe.common_utils import parse_indices
from afe.core.graph_analyzer.utils import QuantizedGraphAnalyzerMode, Metric
from afe.ir.defines import NodeName, InputShape, RequantizationMode, BiasCorrectionType
from afe.ir.tensor_type import ScalarType
from afe.ir.utils import transpose_attr_according_to_layout_strings
from sima_utils.common import Platform


@dataclass
[docs] class ModelConfigs:
[docs] name: str
[docs] framework: str
[docs] input_names: List[str]
[docs] input_shapes: List[InputShape]
[docs] input_dtypes: List[ScalarType]
[docs] layout: str
[docs] model_path: str = "" # Used in cases where there is a single file path
[docs] model_file_paths: List[str] = field(default_factory=list) # Used in cases where there are multiple files
[docs] is_quantized: bool = False # Whether the model is already quantized. If true, AFE will not quantize it.
[docs] output_names: Optional[List[str]] = None
[docs] output_directory: Optional[str] = None
[docs] toolbox_config: bool = False
# Generated files location
[docs] mlc_files: Optional[str] = None
[docs] trace_files: Optional[str] = None
def __post_init__(self): # Yaml does not support tuples, so we must convert tuple to list. self.input_shapes = [tuple(shape) for shape in self.input_shapes]
[docs] def set_default_output_directory(self, output_directory_path: str): if self.output_directory is None: self.output_directory = os.path.join(output_directory_path, self.name)
[docs] def set_absolute_model_path(self, model_path: str): if not self.toolbox_config: self.model_path = os.path.join(model_path, self.model_path) self.model_file_paths = [os.path.join(model_path, p) for p in self.model_file_paths]
@property
[docs] def shape_dict(self) -> Dict[NodeName, InputShape]: return {NodeName(name): shape for name, shape in zip(self.input_names, self.input_shapes)}
@property
[docs] def dtype_dict(self) -> Dict[NodeName, ScalarType]: return {NodeName(name): dtype for name, dtype in zip(self.input_names, self.input_dtypes)}
@property
[docs] def input_shapes_hwc(self) -> Tuple[int, int, int]: return tuple([transpose_attr_according_to_layout_strings(shape, self.layout, "HWC") for shape in self.input_shapes])
@property
[docs] def shape_dict_hwc(self) -> Dict[NodeName, Tuple[int, int, int]]: return {NodeName(name): shape for name, shape in zip(self.input_names, self.input_shapes_hwc)}
[docs] class QuantizationPrecision(Enum):
[docs] INT_8 = "int8"
[docs] INT_16 = "int16"
[docs] BFLOAT_16 = "bfloat16"
[docs] BFLOAT_16_INT8_WEIGHTS = "bfloat16_int8_weights"
[docs] BFLOAT_16_INT4_WEIGHTS = "bfloat16_int4_weights"
@staticmethod
[docs] def from_string(precision: str) -> "QuantizationPrecision": if precision in ("int8", "int_8"): return QuantizationPrecision.INT_8 elif precision in ("int16", "int_16"): return QuantizationPrecision.INT_16 elif precision in ("bfloat16", "bfloat_16"): return QuantizationPrecision.BFLOAT_16 else: raise ValueError( f"Unsupported string ({precision}) for conversion to QuantizationPrecision type." )
[docs] def to_scalar_type(self) -> ScalarType: return _QUANTIZATION_PRECISION_TO_SCALAR_TYPE[self]
[docs] def to_expected_int_scalar_type(self) -> ScalarType: scalar_type = self.to_scalar_type() if scalar_type not in (ScalarType.int8, ScalarType.int16): raise TypeError(f"Unexpected scalar type: {scalar_type}. " f"Expecting ScalarType.int8 or ScalarType.int16") return scalar_type
[docs] def is_int8_precision(self) -> bool: return self == QuantizationPrecision.INT_8
[docs] def is_int16_precision(self) -> bool: return self == QuantizationPrecision.INT_16
[docs] def is_bfloat16_precision(self) -> bool: return self == QuantizationPrecision.BFLOAT_16
[docs] def is_bfloat16_with_int8_weights(self) -> bool: return self == QuantizationPrecision.BFLOAT_16_INT8_WEIGHTS
[docs] def is_bfloat16_with_int_weights(self) -> bool: return self in [ QuantizationPrecision.BFLOAT_16_INT4_WEIGHTS, QuantizationPrecision.BFLOAT_16_INT8_WEIGHTS ]
_QUANTIZATION_PRECISION_TO_SCALAR_TYPE: Dict[QuantizationPrecision, ScalarType] = { QuantizationPrecision.INT_8: ScalarType.int8, QuantizationPrecision.INT_16: ScalarType.int16, QuantizationPrecision.BFLOAT_16: ScalarType.bfloat16, QuantizationPrecision.BFLOAT_16_INT8_WEIGHTS: ScalarType.bfloat16, QuantizationPrecision.BFLOAT_16_INT4_WEIGHTS: ScalarType.bfloat16 } _T = TypeVar("_T") @dataclass(frozen=True)
[docs] class EmptyValue: """ An empty value class, used to initialize an empty Opt class. """
[docs] empty: str = ''
@dataclass(frozen=True)
[docs] class Opt(Generic[_T]): """ Generic immutable container class having either one value or no value. It is used for storing values of QuantizationConfigs fields. """
[docs] value: Union[_T, EmptyValue] = EmptyValue()
[docs] def merge(self, option: "Opt") -> "Opt": if self.is_empty(): return option else: return self
[docs] def get(self): if self.is_empty(): raise ValueError('No value set.') return self.value
[docs] def is_empty(self): return isinstance(self.value, EmptyValue)
@dataclass
[docs] class QuantizationConfigs: """ Parameters controlling how to quantize a network. Instances should be constructed using one of the construction functions, not using the class constructor. Fields can be overridden for specific nodes using the custom_quantization_configs parameter of UpdateQuantizationConfigs. This parameter is accepted by several other functions, as well. See individual fields for restrictions on overriding. """ # Whether to use asymmetric quantization. This value may not be overridden for # network nodes, as it will lead to inconsistent assumptions about quantization.
[docs] asymmetry: Opt[bool] = Opt()
# Whether to use per-channel quantization in a convolution or matrix multiply.
[docs] per_channel: Opt[bool] = Opt()
# Whether to use UDF in the quantized implementation of LeakyRelu. # If False, arithmetic is used.
[docs] leaky_relu_uses_udf: Opt[bool] = Opt()
# Quantization precision used in quantization. When this precision is not supported, # the quantizer will use a different precision. # The output_int32 flag overrides this one.
[docs] quantization_precision: Opt[QuantizationPrecision] = Opt()
# Sensitivity ranking for mixed precision quantization. Nodes with high sensitivity # have higher priority to be quantized with high precision. Nodes with no sensitivity # are not candidates for high precision.
[docs] quantization_sensitivity: Opt[int] = Opt()
# Whether to use wide node outputs during quantization. Only has an effect when # quantization_precision's value is INT_16. # A quantized node's output type should be int32 if this flag is true, int16 if this flag is false. # This affects the quantization pass; subsequent passes may change the type.
[docs] intermediate_int32: Opt[bool] = Opt()
# Type of correction for quantization-induced bias: None/Regular/Iterative (see PA-1029 and https://arxiv.org/abs/1906.04721, Section 4.2)
[docs] biascorr_type: Opt[BiasCorrectionType] = Opt()
# Whether to use the int32 numeric type in the output of convolution related operators. # Skip requantization and output the layer in int32 instead of int8.
[docs] output_int32: Opt[bool] = Opt()
# A way of doing quantized arithmetic.
[docs] requantization_mode: Opt[RequantizationMode] = Opt()
# Whether to enable channel equalization. This value must not be overridden for individual nodes.
[docs] channel_equalization: Opt[bool] = Opt()
# Whether to enable smooth quant. This value must not be overridden for individual nodes.
[docs] smooth_quant: Opt[bool] = Opt()
[docs] def initialize_empty_quant_config(): """ Helper function for initializing empty QuantizationConfigs. """ opt = Opt() return QuantizationConfigs(asymmetry=opt, per_channel=opt, leaky_relu_uses_udf=opt, quantization_precision=opt, quantization_sensitivity=opt, intermediate_int32=opt, biascorr_type=opt, output_int32=opt, requantization_mode=opt)
[docs] def merge_quantization_configs(*, config1: QuantizationConfigs, config2: QuantizationConfigs) -> QuantizationConfigs: """ Merge 2 QuantizationConfigs. When merging, values from first config have higher priority and values from second are discarded so QuantizationConfigs with higher priority should be config1. """ asymmetry = config1.asymmetry.merge(config2.asymmetry) per_channel = config1.per_channel.merge(config2.per_channel) leaky_relu_uses_udf = config1.leaky_relu_uses_udf.merge(config2.leaky_relu_uses_udf) quantization_precision = config1.quantization_precision.merge(config2.quantization_precision) quantization_sensitivity = config1.quantization_precision.merge(config2.quantization_sensitivity) intermediate_int32 = config1.intermediate_int32.merge(config2.intermediate_int32) biascorr_type = config1.biascorr_type.merge(config2.biascorr_type) output_int32 = config1.output_int32.merge(config2.output_int32) requantization_mode = config1.requantization_mode.merge(config2.requantization_mode) return QuantizationConfigs(asymmetry=asymmetry, per_channel=per_channel, leaky_relu_uses_udf=leaky_relu_uses_udf, quantization_precision=quantization_precision, quantization_sensitivity=quantization_sensitivity, intermediate_int32=intermediate_int32, biascorr_type=biascorr_type, output_int32=output_int32, requantization_mode=requantization_mode)
[docs] def create_quantization_configs(*, asymmetry: bool = True, per_channel: bool = False, leaky_relu_uses_udf: bool = True, quantization_precision: QuantizationPrecision = QuantizationPrecision.INT_8, quantization_sensitivity: int = 0, requantization_mode: RequantizationMode = RequantizationMode.sima, intermediate_int32: bool = False, biascorr_type: BiasCorrectionType = BiasCorrectionType.NONE, output_int32: bool = False, channel_equalization: bool = False, smooth_quant: bool = False) -> QuantizationConfigs: """ Construct QuantizationConfigs. :param asymmetry: Whether to use asymmetric quantization. :param per_channel: Whether to use per-channel quantization. :param leaky_relu_uses_udf: Whether to use UDF instead of arithmetic instructions for quantization. :param quantization_precision: Precision used during quantization. :param quantization_sensitivity: Sensitivity for mixed precision quantization. :param requantization_mode: A way of doing quantized arithmetic. :param intermediate_int32: Whether to use wide node outputs during quantization. :param biascorr_type: Method to correct for quantization-induced bias: None/Regular/Iterative. :param output_int32: Whether to use the int32 numeric type in the output of convolution related operators. :param channel_equalization: Whether to enable channel equalization. :param smooth_quant: Whether to enable smooth quant. :return: QuantizationConfigs """ return QuantizationConfigs(asymmetry=Opt(asymmetry), per_channel=Opt(per_channel), leaky_relu_uses_udf=Opt(leaky_relu_uses_udf), quantization_precision=Opt(quantization_precision), quantization_sensitivity=Opt(quantization_sensitivity), intermediate_int32=Opt(intermediate_int32), biascorr_type=Opt(biascorr_type), output_int32=Opt(output_int32), requantization_mode=Opt(requantization_mode), channel_equalization=Opt(channel_equalization), smooth_quant=Opt(smooth_quant))
[docs] def update_quantization_configs(quantization_configs: QuantizationConfigs, field_name: str, value: Any) -> None: """ Given a field name and a value, if the QuantizationConfigs has attribute name same as the given field name, update the attribute with the given value. Parameters ---------- :param quantization_configs: QuantizationConfigs. :param field_name: str. Name of the target attribute in the given QuantizationConfigs object. :param value: Any. Target value that is going to be assigned to the attribute. """ if not hasattr(quantization_configs, field_name): raise AttributeError(f"QuantizationConfigs does not have attribute name '{field_name}'.") old_value = quantization_configs.__dict__[field_name] if not old_value.is_empty() and not isinstance(old_value.get(), type(value)): raise AttributeError(f"QuantizationConfigs' {field_name} attribute must have type ", f"{type(old_value.get())}. Got {type(value)}") setattr(quantization_configs, field_name, Opt(value))
@dataclass
[docs] class CompressionConfigs:
[docs] compress: bool = False
@dataclass(frozen=True)
[docs] class CalibrationConfigs: """ Parameters for calibration. Instances should be constructed using one of the construction functions. Attribute ---------- :attribute calibration_method: CalibrationMethod used during calibration. See the CalibrationMethod Enum class for currently supported methods. :attribute num_calibration_samples: int. Limit on number of data samples that we use to feed inputs to the AwesomeNet during calibration. If None, all data samples that are passed to calibration are used. :attribute percentile_value: Optional[float]. Percentage of values to keep when using histogram percentile. """
[docs] calibration_method: CalibrationMethod
[docs] num_calibration_samples: Optional[int]
[docs] def create_testcase_calibration_configs(num_calibration_samples: int, calibration_method: CalibrationMethod = MinMaxMethod()) \ -> CalibrationConfigs: """ Construct CalibrationConfigs using parameters from the network test configuration data. :param num_calibration_samples: Maximum number of calibration data samples to use for calibration. :param calibration_method: CalibrationMethod used in calibration. See the CalibrationMethod Enum class for supported values. :return: Constructed value """ return CalibrationConfigs(calibration_method=calibration_method, num_calibration_samples=num_calibration_samples)
[docs] def api_calibration_configs(calibration_method: CalibrationMethod = MinMaxMethod()) \ -> CalibrationConfigs: """ Construct CalibrationConfigs using user-specified parameters. :param calibration_method: CalibrationMethod used in calibration. See the CalibrationMethod Enum class for supported values. :param percentile_value: Optional[float]. In case of Histogram percentile observer, configures percentage of values to keep in the histogram. :return: Constructed value """ return CalibrationConfigs(calibration_method=calibration_method, num_calibration_samples=None)
[docs] class PerfThreshold(object): """ Used to store the threshold value for quantized model performance. """
[docs] def set_threshold(self, fp32_perf: float) -> float: raise NotImplementedError("PerfThreshold class is abstract, use either RelativePerfThreshold or " "AbsolutePerfThreshold to set the quantized model performance threshold.")
@dataclass
[docs] class RelativePerfThreshold(PerfThreshold): """ Used to store the threshold value for quantized model performance. The threshold value is given as a value relative to the floating-point model performance. :param rel_value: float. Quantized model performance threshold value relative to the floating-point model performance. """
[docs] rel_value: float
[docs] def set_threshold(self, fp32_perf: float) -> float: """ Returns the threshold for quantized model performance relative to the floating-point model performance. :param fp32_perf: float. Floating-point model performance. :return: float. Threshold value relative to floating-point model performance. """ return self.rel_value * fp32_perf
@dataclass
[docs] class AbsolutePerfThreshold(PerfThreshold): """ Used to store the threshold value for quantized model performance. The threshold value is given as an absolute value. :param abs_value: float. Quantized model performance threshold value. """
[docs] abs_value: float
[docs] def set_threshold(self, fp32_perf: float) -> float: """ Returns the threshold for quantized model performance. :param fp32_perf: float. Unused. Floating-point model performance. :return: float. Quantized model performance threshold value. """ return self.abs_value
@dataclass
[docs] class QuantizationAwarePartitioningConfigs: """ Config used for quantization-aware partitioning Attribute ---------- :attribute performance_threshold: PerfThreshold. Value used as a target for quantized model performance. Given either as an absolute value or as a value relative to the floating point model performance. :attribute target_performance_mode: PerformanceMode. Whether the target performance is given as an absolute value or as a value relative to the floating point model performance. :attribute max_iterations: int. Maximal number of iterations in the QAP loop. Represents the maximal number of layers that are to be fixed to floating point while performing the quantization-aware partitioning. :attribute graph_analyzer_mode: QuantizedGraphAnalyzerMode. Graph analysis execution mode. :attribute graph_analyzer_metric: str. Metric used in graph analysis. :attribute graph_analyzer_number_of_samples: int. Number of input samples to be used in graph analysis. """
[docs] performance_threshold: PerfThreshold = RelativePerfThreshold(0.95)
[docs] max_iterations: int = 1
[docs] graph_analyzer_mode: QuantizedGraphAnalyzerMode = QuantizedGraphAnalyzerMode.local_feed
[docs] graph_analyzer_metric: Metric = Metric.mse
[docs] graph_analyzer_number_of_samples: int = 2
@dataclass
[docs] class OptimizationConfigs: """Class for holding the configuration information used by the OptimizerClass"""
[docs] strategy: str = 'sequential'
# Calibration
[docs] calibration_configs: CalibrationConfigs = api_calibration_configs()
# Quantization
[docs] quantization_configs: QuantizationConfigs = create_quantization_configs()
# Compression
[docs] compression_configs: CompressionConfigs = CompressionConfigs()
def _process_backend_indices_dict( backend_indices_dict: Optional[Dict[Backend, List[Union[int, Tuple[int, ...]]]]] = None ) -> Optional[Dict[int, Backend]]: """ Parse the backend_indices_dict into an indices_to_backend_dict. :param backend_indices_dict: Optional[Dict[Backend, List[Union[int, Tuple[int, ...]]]]]. Default is None. Dictionary of targeted Backend to indices. For each Backend key, provides the list of indices or index tuples that are being mapped to the targeted Backend. If the tuple is being given in the list, all indices in range tuple[0]..tuple[1] will be mapped to targeted Backend. :return: Optional[Dict[int, Backend]]. Dictionary containing index: Backend mapping for all layer indices that are mapped to certain Backend. """ if backend_indices_dict is None: return {} # Handle indices tagged to a Backend by user indices_to_backend_dict = {} for backend, indices in backend_indices_dict.items(): parsed_indices = parse_indices(indices) for idx in parsed_indices: indices_to_backend_dict[idx] = backend return indices_to_backend_dict @dataclass
[docs] class RunConfigs: """ Configuration parameters for how to execute networks in software emulation. :attribute fast_mode: If True, use a fast implementation of an operator. Results may not match the result of executing on the MLA. If False, use an implementation that exactly matches execution on the MLA. """
[docs] fast_mode: bool = False
[docs] class ConvertLayoutMethod(Enum): """Enumeration specifying the layout conversion algorithm. Common processing flow requires the model to be converted to MLA's native layout, that is 'NHWC'. This enumeration specifies what algorithm shall be used for layout conversion. Currently, the 'legacy' algorithm is tested and proven on most CNN models. The 'automated' algorithm is being developed and is aimed mostly for ViT models. The 'none' option is used in internal test cases, and it specifies the processing flow where layout conversion is skipped. It should not be used in the general model processing pipeline. """
[docs] NONE = 'none'
"""No layout conversion. This is backward-compatible value designating the case where layout conversion transform is disabled in some test cases. Since we would want to run the layout conversion algorithm for all of the test cases, this value would get deprecated in the future. """
[docs] LEGACY = 'legacy'
"""Legacy algorithm using TVM's ConvertLayout pass"""
[docs] AUTOMATED = 'automated'
"""Algorithm doing automatic rewrite for MLA suported layouts""" @staticmethod
[docs] def from_str(method: str) -> "ConvertLayoutMethod": """Helper method for constructing an enumeration instance from string. Args: method: String parameter defining the layout conversion method. Returns: The ConvertLayoutMethod Enum instance. """ match method: case 'legacy': return ConvertLayoutMethod.LEGACY case 'automated': return ConvertLayoutMethod.AUTOMATED case 'none' | _: return ConvertLayoutMethod.NONE
@dataclass
[docs] class TransformerConfigs: """ Class holding the configuration information used by GraphTransformer and Partitioner. Attributes ---------- :attribute convert_layout_method: Specifies the algorithm used for converting the model layout. :attribute enable_graph_partition: bool. Whether to apply graph partitioning on the model. :attribute indices_to_backend_dict: Optional[Dict[int, afe.backends.Backend]]. Dictionary containing mapping of layer indices to it's targeted backend, if any. If a layer {index: target_backend} pair is present in the dictionary, it means that the layer with given index will be executed on the target_backend Backend. If the index is absent from the dictionary, it means that the layer with that index will be executed on the highest-priority backend that is supported for that layer. :attribute enable_quantization_based_partitioning: bool. Flag containing information whether to apply quantization-based partitioning. :attribute requantization_mode: How to convert TVM quantized operators to SiMa IR quantized operators. Only quantized TVM operators that are assigned the MLA are affected. :attribute enabled_backends: Which set of backends to assign nodes to in graph partitioning. Any assignment in backend_indices_dict overrides this parameter. Example ------- The example shows how to create a TransformerConfigs that will convert the layout to NHWC, enable the graph partitioning, and set the node with index = [1, 13, 22] to APU: backend_indices_dict = {Backend.APU: [1, 13, 22]} transformer_configs = TransformerConfigs(convert_layout=True, enable_graph_partition=True, backend_indices_dict=backend_indices_dict) """
[docs] convert_layout_method: ConvertLayoutMethod
[docs] enable_graph_partition: bool
[docs] indices_to_backend_dict: dict[int, Backend]
[docs] enable_quantization_based_partitioning: bool
[docs] requantization_mode: RequantizationMode
[docs] enabled_backends: CompileMode
def __init__( self, convert_layout_method: ConvertLayoutMethod = ConvertLayoutMethod.LEGACY, enable_graph_partition: bool = True, backend_indices_dict: dict[Backend, list[int | tuple[int, ...]]] | None = None, enable_quantization_based_partitioning: bool = False, *, requantization_mode: RequantizationMode = RequantizationMode.sima, enabled_backends: CompileMode = CompileMode.MLA_EV74_CPU ) -> None: """ Constructor for the TransformerConfigs object. Parameters ---------- :param convert_layout_method: Specifies the algorithm used for converting the model layout. :param enable_graph_partition: Whether to apply graph partitioning on the model. :param backend_indices_dict: Default is None. Dictionary of targeted Backend to indices. For each Backend key, provides the list of indices or index tuples that are being mapped to the targeted Backend. If the tuple is being given in the list, all indices in range tuple[0]..tuple[1] will be mapped to targeted Backend. For layers with indices not covered by the dictionary, the algorithm will select the highest priority Backend supported for that layer. :param enable_quantization_based_partitioning: Default value is False. Set to True if user wants to apply quantization-based partitioning. :param requantization_mode: How to convert TVM quantized operators to SiMa IR quantized operators. Only quantized operators are affected. :param enabled_backends: Which set of backends to assign nodes to in graph partitioning. Any assignment in backend_indices_dict overrides this parameter. Example ------- The example shows how to create a TransformerConfigs that will convert the layout to NHWC, enable the graph partitioning, and set the node with index = [1, 13, 22] to APU: backend_indices_dict = {Backend.APU: [1, 13, 22]} transformer_configs = TransformerConfigs(convert_layout=True, enable_graph_partition=True, backend_indices_dict=backend_indices_dict) """ self.convert_layout_method = convert_layout_method self.enable_graph_partition = enable_graph_partition self.indices_to_backend_dict = _process_backend_indices_dict(backend_indices_dict) self.enable_quantization_based_partitioning = enable_quantization_based_partitioning self.requantization_mode = requantization_mode self.enabled_backends = enabled_backends @property
[docs] def convert_layout(self) -> bool: """Property defining whether layout conversion algorithm is enabled. This property is defined due to backward portability issues, and should be used only in some helper test functions (i.e. determining whether model inputs and/or outputs should be transposed during the test run). This property should not be used to define any aspect of tvm transformations, as it would be deprecated in near future. Returns: Boolean flag determining whether layout conversion algorithm is run during tvm transformations. """ return self.convert_layout_method != ConvertLayoutMethod.NONE
@dataclass
[docs] class AfeProcessingConfigs: """ Dataclass holding all the configuration information used in end-to-end processing. Attributes ---------- :attribute model_configs: ModelConfigs. Configuration information on model that is being processed. :attribute transformer_configs: TransformerConfigs. Configuration information on transformations being used in model processing. :attribute optimization_configs: OptimizationConfigs. Configuration information on optimizations being used in processing. :attribute qap_configs: QuantizationAwarePartitioningConfigs. Configuration information being used in quantization-aware partitioning algorithm. :attribute target: A target platform that a model is compiled for. """
[docs] model_configs: ModelConfigs
[docs] transformer_configs: TransformerConfigs = TransformerConfigs()
[docs] optimization_configs: OptimizationConfigs = OptimizationConfigs()
[docs] qap_configs: QuantizationAwarePartitioningConfigs = QuantizationAwarePartitioningConfigs()
[docs] target: Platform = gen1_target