SIMA_DETESS_DEQUANT

Description

The detess_dequant graph accepts an array of tensors in NCHW and their corresponding detessellate tile dimension and dequantization parameters. This input tensor array is expected to be placed sequentially one after the other in the memory. The graph detessellates the input tensors and dequantizes them to FP16 or FP32 depending on the output data type configured by the user.

Detessellation tile/slcie size is calculated as below:

tile_sz = tile_width x tile_height x (tile_channel rounded to nearest 16 byte boundary) x data_type_size_bytes

The various supported input output combinations are as per the below table:

+--------------+---------------+------------+-----------+-----------+
| in_data_type | out_data_type | out_format | tile_size | supported |
+--------------+---------------+------------+-----------+-----------+
| INT8         | FP16          | NHWC       | small     |   Yes     |
|              | FP16          | NHWC       | large     |   No      |
|              | FP16          | NCHW       | small     |   No      |
|              | FP16          | NCHW       | large     |   No      |
|              | FP32          | NHWC       | small     |   Yes     |
|              | FP32          | NHWC       | large     |   Yes     |
|              | FP32          | NCHW       | small     |   Yes     |
|              | FP32          | NCHW       | large     |   Yes     |
+--------------+---------------+------------+-----------+-----------+
| INT16        | FP16          | NHWC       | small     |   No      |
|              | FP16          | NHWC       | large     |   No      |
|              | FP16          | NCHW       | small     |   No      |
|              | FP16          | NCHW       | large     |   No      |
|              | FP32          | NHWC       | small     |   Yes     |
|              | FP32          | NHWC       | large     |   No      |
|              | FP32          | NCHW       | small     |   No      |
|              | FP32          | NCHW       | large     |   No      |
+--------------+---------------+------------+-----------+-----------+
| INT32        | FP16          | NHWC       | small     |   No      |
|              | FP16          | NHWC       | large     |   No      |
|              | FP16          | NCHW       | small     |   No      |
|              | FP16          | NCHW       | large     |   No      |
|              | FP32          | NHWC       | small     |   Yes     |
|              | FP32          | NHWC       | large     |   No      |
|              | FP32          | NCHW       | small     |   No      |
|              | FP32          | NCHW       | large     |   No      |
+--------------+---------------+------------+-----------+-----------+

Graph Info

Overview

SIMA_DETESS_DEQUANT

Graph Name

SIMA_DETESS_DEQUANT

Graph ID

201

Operations Supported

Dequantize Detesselate

Available Since Yocto Build

B684

Example Config

Below is the example config json for this graph. We need to use such config for configuring the EV74 graph first. For this purpose, we need a CVU Configuration Application developed in C++.

{
    "version": 0.1,
    "node_name": "detess-dequant",
    "simaai__params": {
        "params": 15,
        "cpu": 1,
        "next_cpu": 0,
        "no_of_outbuf": 1,
        "ibufname": "",
        "graph_id": 201,
        "num_tensors": 1,
        "input_width": [
            1
        ],
        "input_height": [
            1
        ],
        "input_depth": [
            1000
        ],
        "slice_width": [
            1
        ],
        "slice_height": [
            1
        ],
        "slice_depth": [
            1000
        ],
        "dq_scale": [
            255.02200010497842
        ],
        "dq_zp": [
            -128
        ],
        "data_type": [
            0
        ],
        "fp16_out_en": [
            0
        ],
        "output_format": [
            0
        ],
        "debug": 0,
        "out_sz": 4000,
        "dump_data": 1
    }
}

Parameters

SIMA_DETESS_DEQUANT Params

Parameter Name

Parameter Description

Data Type

Default

Min

Max

num_tensors

Number of input tensors in the input buffer

int32_t

13

1

32

input_width

Width of the input tensor

int32_t

[64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]

input_height

Height of the input tensor

int32_t

[64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]

input_depth

Depth of the input tensor

int32_t

[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]

slice_width

Slice/Tile width to be used for detessellation

int32_t

[16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]

slice_height

Slice/Tile height to be used for detessellation

int32_t

[16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]

slice_depth

Slice/Tile depth/channels to be used for detessellation

int32_t

[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]

dq_scale

Dequantization scale

float32

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0]

dq_zp

Dequantization zero point

int32_t

[‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’]

[-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128]

[127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127]

data_type

Specifies the input tessellated datatype, 0=> ‘INT8’, 1=> ‘INT16’, 2=> ‘INT32’.

int32_t

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

N/A

N/A

fp16_out_en

Enables fp16 output if set to 1 else, gives fp32 output.

bool

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

0

1

output_format

Output tensor format. 0 => NHWC, 1 => NCHW..

int32_t

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

0

1

debug

Enable more debug logs, 0 => disable, 1=> additonal logs, 2 => profile runtime of individual input tensors, 3 => profile overall graph runtime.

int32_t

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

0

3

dump_data

Enable (1) or disable (0) dumping of output tensor to /tmp directory on device with the name {node_name}-###.out. The sequence number ### will increment with each output dump (e.g., -001.out, -002.out, …).

int32_t

0

0

1

CVU Configuration Application

Note

  • The need to write, build and execute a dependent application for the CVU will be removed in an upcoming release.

To configure any CVU graph, a C++ CVU Configuration Application must be cross-compiled and executed on the board before using the CVU. Multiple graphs can be pre-programmed into the CVU before running any application. This guide provides a pre-written C++ application for download for each graph that can be cross-compiled on Palette, and executed on the board prior to running the simaaiprocesscvu GStreamer plugin. An pre-compiled version is also included for direct use. If you encounter issues, please re-compile the application from the sources provided.

How to compile using the files below

Please refer to How to compile CVU Configuration Application? for more info.

Directory structure

.
├── CMakeLists.txt
├── cvu_cfg_graph.cpp
└── cvu_cfg_main.cpp

Code files

cvu_cfg_graph.cpp
 1 #include <simaai/ev_cfg_helper.h>
 2 #include <simaai/parser.h>
 3 #include <simaai/platform/simaevxxipc.h>
 4 #include <string.h>
 5
 6 #define SIMA_IPC_GRAPH_NAME "SIMA_DETESS_DEQUANT"
 7 #define SIMA_IPC_GRAPH_CODE (201)
 8
 9 #define NUM_IN_TENSORS (1)
10 #define INPUT_WIDTH (2)
11 #define INPUT_HEIGHT (3)
12 #define INPUT_DEPTH (4)
13 #define SLICE_WIDTH (5)
14 #define SLICE_HEIGHT (6)
15 #define SLICE_DEPTH (7)
16 #define DEQUANT_SCALE (8)
17 #define DEQUANT_ZEROPOINT (9)
18 #define INPUT_TYPE (10)
19 #define FP16_OUT_ENABLED (11)
20 #define OUTPUT_FORMAT (12)
21 #define DEBUG (13)
22
23 int parse_and_send_array_param(simaai_params_t *params, const char *param_name, int array_len, int graph_id, int param_id, bool is_float) {
24   simaai_double_array_t *arr;
25   uint8_t *buf = (uint8_t *)calloc(1, sizeof(uint32_t) * array_len);
26   arr = (simaai_double_array_t *)parser_get_double_array(params, param_name);
27   if (arr != NULL) {
28     if (arr->size != array_len) {
29       std::cout << "\n Param list incomplete for " << /*static_cast <const void *>*/ (param_name);
30       return -1;
31     }
32
33     for (int i = 0; i < arr->size; i++) {
34       if(is_float) {
35         send_float_param(2, graph_id, param_id, buf, (double)arr->values[i]);
36       } else {
37         send_i32_param(2, graph_id, param_id, buf, (int)arr->values[i]);
38       }
39     }
40   }
41   else {
42     std::cout << "\n Param list empty for " << /*static_cast <const void *>*/ (param_name);
43     return -1;
44   }
45   return 0;
46 }
47
48 void configure_graph(const char *json_in) {
49   simaai_params_t *params = parser_node_struct_init();
50   if (params == NULL) {
51     std::cout << "Unable to create params \n";
52   }
53   if ((parse_json_file(json_in, params) != PARSER_SUCCESS)) {
54     std::cout << "Unable to start parser \n";
55   }
56
57   uint8_t *buf = (uint8_t *)calloc(1, sizeof(uint8_t) * 16);
58
59   int num_input_tensors_val = *((int *)parser_get_int(params, "num_tensors"));
60   send_i32_param(2, SIMA_IPC_GRAPH_CODE, NUM_IN_TENSORS, buf, num_input_tensors_val);
61
62   parse_and_send_array_param(params, "input_width", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_WIDTH, false);
63   parse_and_send_array_param(params, "input_height", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_HEIGHT, false);
64   parse_and_send_array_param(params, "input_depth", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_DEPTH, false);
65   parse_and_send_array_param(params, "slice_width", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_WIDTH, false);
66   parse_and_send_array_param(params, "slice_height", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_HEIGHT, false);
67   parse_and_send_array_param(params, "slice_depth", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_DEPTH, false);
68   parse_and_send_array_param(params, "dq_scale", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, DEQUANT_SCALE, true);
69   parse_and_send_array_param(params, "dq_zp", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, DEQUANT_ZEROPOINT, false);
70   parse_and_send_array_param(params, "data_type", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_TYPE, false);
71   parse_and_send_array_param(params, "fp16_out_en", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, FP16_OUT_ENABLED, false);
72   parse_and_send_array_param(params, "output_format", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, OUTPUT_FORMAT, false);
73
74   int debug_val = *((int *)parser_get_int(params, "debug"));
75   send_i32_param(2, SIMA_IPC_GRAPH_CODE, DEBUG, buf, debug_val);
76
77   parser_finalize(params);
78   free(buf);
79
80   std::cout << "Completed " << SIMA_IPC_GRAPH_NAME << " graph configure \n";
81 }
cvu_cfg_main.cpp
 1 #include <getopt.h>
 2 #include <sys/stat.h>
 3 #include <unistd.h>
 4
 5 #include <cstring>
 6 #include <iostream>
 7
 8 extern void configure_graph(const char *json_fpath);
 9
10 bool is_valid_path(const char *path) {
11   struct stat buffer;
12   return (stat(path, &buffer) == 0);
13 }
14
15 int main(int argc, char **argv) {
16   const char *json_path = argv[1];
17
18   if(is_valid_path(json_path)) {
19     configure_graph(json_path);
20   } else {
21     std::cerr << "Invalid path: " << json_path << std::endl;
22     return 1;
23   }
24
25   return 0;
26 }
CMakeLists.txt
 1 cmake_minimum_required(VERSION 3.16)
 2
 3 # set the project name
 4 set(GRAPH_NAME "detessdequant_201")
 5 set(PROJECT_NAME "CVU Graph Cfg. App.")
 6
 7 project("${PROJECT_NAME}"
 8     VERSION 0.1
 9     DESCRIPTION "CVU Graph Configuration Application"
10     LANGUAGES C CXX)
11
12 set(PIPELINE_SOURCES
13     cvu_cfg_graph.cpp)
14
15 execute_process(
16     COMMAND git rev-parse --abbrev-ref HEAD
17     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
18     OUTPUT_VARIABLE GIT_BRANCH
19     OUTPUT_STRIP_TRAILING_WHITESPACE
20 )
21
22 # Get the latest abbreviated commit hash of the working branch
23 execute_process(
24     COMMAND git log -1 --format=%h
25     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
26     OUTPUT_VARIABLE GIT_COMMIT_HASH
27     OUTPUT_STRIP_TRAILING_WHITESPACE
28 )
29
30 link_directories(${CMAKE_INSTALL_DIR}/core
31     ${CMAKE_INSTALL_DIR}/gst
32 )
33
34 include(GNUInstallDirs)
35
36 # ev-configuration genertion executable
37 set(EV_EXEC_NAME "${GRAPH_NAME}_cvu_cfg_app")
38
39 add_executable(${EV_EXEC_NAME}
40     cvu_cfg_main.cpp
41     cvu_cfg_graph.cpp)
42
43 target_link_libraries(${EV_EXEC_NAME}
44     PUBLIC
45     simaaiparser
46     evhelpers)
47
48 INSTALL(TARGETS "${EV_EXEC_NAME}")