.. _ev74_graph_201_sima_detess_dequant: |graph| ======================= Description ----------- The detess_dequant graph accepts an array of tensors in NCHW and their corresponding detessellate tile dimension and dequantization parameters. This input tensor array is expected to be placed sequentially one after the other in the memory. The graph detessellates the input tensors and dequantizes them to FP16 or FP32 depending on the output data type configured by the user. Detessellation tile/slcie size is calculated as below: ``tile_sz = tile_width x tile_height x (tile_channel rounded to nearest 16 byte boundary) x data_type_size_bytes`` The various supported input output combinations are as per the below table: .. code-block:: +--------------+---------------+------------+-----------+-----------+ | in_data_type | out_data_type | out_format | tile_size | supported | +--------------+---------------+------------+-----------+-----------+ | INT8 | FP16 | NHWC | small | Yes | | | FP16 | NHWC | large | No | | | FP16 | NCHW | small | No | | | FP16 | NCHW | large | No | | | FP32 | NHWC | small | Yes | | | FP32 | NHWC | large | Yes | | | FP32 | NCHW | small | Yes | | | FP32 | NCHW | large | Yes | +--------------+---------------+------------+-----------+-----------+ | INT16 | FP16 | NHWC | small | No | | | FP16 | NHWC | large | No | | | FP16 | NCHW | small | No | | | FP16 | NCHW | large | No | | | FP32 | NHWC | small | Yes | | | FP32 | NHWC | large | No | | | FP32 | NCHW | small | No | | | FP32 | NCHW | large | No | +--------------+---------------+------------+-----------+-----------+ | INT32 | FP16 | NHWC | small | No | | | FP16 | NHWC | large | No | | | FP16 | NCHW | small | No | | | FP16 | NCHW | large | No | | | FP32 | NHWC | small | Yes | | | FP32 | NHWC | large | No | | | FP32 | NCHW | small | No | | | FP32 | NCHW | large | No | +--------------+---------------+------------+-----------+-----------+ Graph Info ---------- Overview ******** .. list-table:: |graph| :widths: 12 20 :stub-columns: 1 * - Graph Name - |graph| * - Graph ID - 201 * - Operations Supported - Dequantize Detesselate * - Available Since Yocto Build - B684 Example Config -------------- |ev74_example_config_text| .. code-block:: json { "version": 0.1, "node_name": "detess-dequant", "simaai__params": { "params": 15, "cpu": 1, "next_cpu": 0, "no_of_outbuf": 1, "ibufname": "", "graph_id": 201, "num_tensors": 1, "input_width": [ 1 ], "input_height": [ 1 ], "input_depth": [ 1000 ], "slice_width": [ 1 ], "slice_height": [ 1 ], "slice_depth": [ 1000 ], "dq_scale": [ 255.02200010497842 ], "dq_zp": [ -128 ], "data_type": [ 0 ], "fp16_out_en": [ 0 ], "output_format": [ 0 ], "debug": 0, "out_sz": 4000, "dump_data": 1 } } Parameters ********** .. list-table:: |graph| Params :widths: 10 50 10 10 10 10 :header-rows: 1 * - Parameter Name - Parameter Description - Data Type - Default - Min - Max * - num_tensors - Number of input tensors in the input buffer - int32_t - 13 - 1 - 32 * - input_width - Width of the input tensor - int32_t - [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64] - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] * - input_height - Height of the input tensor - int32_t - [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64] - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] * - input_depth - Depth of the input tensor - int32_t - [32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32] - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] * - slice_width - Slice/Tile width to be used for detessellation - int32_t - [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16] - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] * - slice_height - Slice/Tile height to be used for detessellation - int32_t - [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16] - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] * - slice_depth - Slice/Tile depth/channels to be used for detessellation - int32_t - [32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32] - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] * - dq_scale - Dequantization scale - float32 - [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] - [1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0] * - dq_zp - Dequantization zero point - int32_t - ['INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8', 'INT8'] - [-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128] - [127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127] * - data_type - Specifies the input tessellated datatype, 0=> 'INT8', 1=> 'INT16', 2=> 'INT32'. - int32_t - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - N/A - N/A * - fp16_out_en - Enables fp16 output if set to 1 else, gives fp32 output. - bool - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - 0 - 1 * - output_format - Output tensor format. 0 => NHWC, 1 => NCHW.. - int32_t - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - 0 - 1 * - debug - Enable more debug logs, 0 => disable, 1=> additonal logs, 2 => profile runtime of individual input tensors, 3 => profile overall graph runtime. - int32_t - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - 0 - 3 * - dump_data - Enable (1) or disable (0) dumping of output tensor to ``/tmp`` directory on device with the name ``{node_name}-###.out``. The sequence number ``###`` will increment with each output dump (e.g., -001.out, -002.out, ...). - int32_t - 0 - 0 - 1 |dependent_app| --------------- .. note:: * The need to write, build and execute a dependent application for the CVU will be removed in an upcoming release. |ev74_dependent_app_brief| .. _cvu_detess_dequant_cvu_cfg_graph_cpp: How to compile using the files below ************************************ |ev74_dependent_app_footer| Directory structure ******************* .. code:: shell . ├── CMakeLists.txt ├── cvu_cfg_graph.cpp └── cvu_cfg_main.cpp Code files ********** .. code-block:: cpp :caption: cvu_cfg_graph.cpp :linenos: #include #include #include #include #define SIMA_IPC_GRAPH_NAME "SIMA_DETESS_DEQUANT" #define SIMA_IPC_GRAPH_CODE (201) #define NUM_IN_TENSORS (1) #define INPUT_WIDTH (2) #define INPUT_HEIGHT (3) #define INPUT_DEPTH (4) #define SLICE_WIDTH (5) #define SLICE_HEIGHT (6) #define SLICE_DEPTH (7) #define DEQUANT_SCALE (8) #define DEQUANT_ZEROPOINT (9) #define INPUT_TYPE (10) #define FP16_OUT_ENABLED (11) #define OUTPUT_FORMAT (12) #define DEBUG (13) int parse_and_send_array_param(simaai_params_t *params, const char *param_name, int array_len, int graph_id, int param_id, bool is_float) { simaai_double_array_t *arr; uint8_t *buf = (uint8_t *)calloc(1, sizeof(uint32_t) * array_len); arr = (simaai_double_array_t *)parser_get_double_array(params, param_name); if (arr != NULL) { if (arr->size != array_len) { std::cout << "\n Param list incomplete for " << /*static_cast */ (param_name); return -1; } for (int i = 0; i < arr->size; i++) { if(is_float) { send_float_param(2, graph_id, param_id, buf, (double)arr->values[i]); } else { send_i32_param(2, graph_id, param_id, buf, (int)arr->values[i]); } } } else { std::cout << "\n Param list empty for " << /*static_cast */ (param_name); return -1; } return 0; } void configure_graph(const char *json_in) { simaai_params_t *params = parser_node_struct_init(); if (params == NULL) { std::cout << "Unable to create params \n"; } if ((parse_json_file(json_in, params) != PARSER_SUCCESS)) { std::cout << "Unable to start parser \n"; } uint8_t *buf = (uint8_t *)calloc(1, sizeof(uint8_t) * 16); int num_input_tensors_val = *((int *)parser_get_int(params, "num_tensors")); send_i32_param(2, SIMA_IPC_GRAPH_CODE, NUM_IN_TENSORS, buf, num_input_tensors_val); parse_and_send_array_param(params, "input_width", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_WIDTH, false); parse_and_send_array_param(params, "input_height", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_HEIGHT, false); parse_and_send_array_param(params, "input_depth", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_DEPTH, false); parse_and_send_array_param(params, "slice_width", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_WIDTH, false); parse_and_send_array_param(params, "slice_height", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_HEIGHT, false); parse_and_send_array_param(params, "slice_depth", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_DEPTH, false); parse_and_send_array_param(params, "dq_scale", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, DEQUANT_SCALE, true); parse_and_send_array_param(params, "dq_zp", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, DEQUANT_ZEROPOINT, false); parse_and_send_array_param(params, "data_type", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_TYPE, false); parse_and_send_array_param(params, "fp16_out_en", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, FP16_OUT_ENABLED, false); parse_and_send_array_param(params, "output_format", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, OUTPUT_FORMAT, false); int debug_val = *((int *)parser_get_int(params, "debug")); send_i32_param(2, SIMA_IPC_GRAPH_CODE, DEBUG, buf, debug_val); parser_finalize(params); free(buf); std::cout << "Completed " << SIMA_IPC_GRAPH_NAME << " graph configure \n"; } .. _cvu_detess_dequant_cvu_cfg_main_cpp: .. code-block:: cpp :caption: cvu_cfg_main.cpp :linenos: #include #include #include #include #include extern void configure_graph(const char *json_fpath); bool is_valid_path(const char *path) { struct stat buffer; return (stat(path, &buffer) == 0); } int main(int argc, char **argv) { const char *json_path = argv[1]; if(is_valid_path(json_path)) { configure_graph(json_path); } else { std::cerr << "Invalid path: " << json_path << std::endl; return 1; } return 0; } .. _cvu_detess_dequant_cmakelists: .. code-block:: cmake :caption: CMakeLists.txt :linenos: cmake_minimum_required(VERSION 3.16) # set the project name set(GRAPH_NAME "detessdequant_201") set(PROJECT_NAME "CVU Graph Cfg. App.") project("${PROJECT_NAME}" VERSION 0.1 DESCRIPTION "CVU Graph Configuration Application" LANGUAGES C CXX) set(PIPELINE_SOURCES cvu_cfg_graph.cpp) execute_process( COMMAND git rev-parse --abbrev-ref HEAD WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GIT_BRANCH OUTPUT_STRIP_TRAILING_WHITESPACE ) # Get the latest abbreviated commit hash of the working branch execute_process( COMMAND git log -1 --format=%h WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GIT_COMMIT_HASH OUTPUT_STRIP_TRAILING_WHITESPACE ) link_directories(${CMAKE_INSTALL_DIR}/core ${CMAKE_INSTALL_DIR}/gst ) include(GNUInstallDirs) # ev-configuration genertion executable set(EV_EXEC_NAME "${GRAPH_NAME}_cvu_cfg_app") add_executable(${EV_EXEC_NAME} cvu_cfg_main.cpp cvu_cfg_graph.cpp) target_link_libraries(${EV_EXEC_NAME} PUBLIC simaaiparser evhelpers) INSTALL(TARGETS "${EV_EXEC_NAME}") .. |graph| replace:: SIMA_DETESS_DEQUANT