SIMA_DETESS_DEQUANT
Description
The detess_dequant graph accepts an array of tensors in NCHW and their corresponding detessellate tile dimension and dequantization parameters. This input tensor array is expected to be placed sequentially one after the other in the memory. The graph detessellates the input tensors and dequantizes them to FP16 or FP32 depending on the output data type configured by the user.
Detessellation tile/slcie size is calculated as below:
tile_sz = tile_width x tile_height x (tile_channel rounded to nearest 16 byte boundary) x data_type_size_bytes
The various supported input output combinations are as per the below table:
+--------------+---------------+------------+-----------+-----------+
| in_data_type | out_data_type | out_format | tile_size | supported |
+--------------+---------------+------------+-----------+-----------+
| INT8 | FP16 | NHWC | small | Yes |
| | FP16 | NHWC | large | No |
| | FP16 | NCHW | small | No |
| | FP16 | NCHW | large | No |
| | FP32 | NHWC | small | Yes |
| | FP32 | NHWC | large | Yes |
| | FP32 | NCHW | small | Yes |
| | FP32 | NCHW | large | Yes |
+--------------+---------------+------------+-----------+-----------+
| INT16 | FP16 | NHWC | small | No |
| | FP16 | NHWC | large | No |
| | FP16 | NCHW | small | No |
| | FP16 | NCHW | large | No |
| | FP32 | NHWC | small | Yes |
| | FP32 | NHWC | large | No |
| | FP32 | NCHW | small | No |
| | FP32 | NCHW | large | No |
+--------------+---------------+------------+-----------+-----------+
| INT32 | FP16 | NHWC | small | No |
| | FP16 | NHWC | large | No |
| | FP16 | NCHW | small | No |
| | FP16 | NCHW | large | No |
| | FP32 | NHWC | small | Yes |
| | FP32 | NHWC | large | No |
| | FP32 | NCHW | small | No |
| | FP32 | NCHW | large | No |
+--------------+---------------+------------+-----------+-----------+
Graph Info
Overview
Graph Name |
SIMA_DETESS_DEQUANT |
---|---|
Graph ID |
201 |
Operations Supported |
Dequantize Detesselate |
Available Since Yocto Build |
B684 |
Example Config
Below is the example config json for this graph. We need to use such config for configuring the EV74 graph first. For this purpose, we need a CVU Configuration Application developed in C++.
{
"version": 0.1,
"node_name": "detess-dequant",
"simaai__params": {
"params": 15,
"cpu": 1,
"next_cpu": 0,
"no_of_outbuf": 1,
"ibufname": "",
"graph_id": 201,
"num_tensors": 1,
"input_width": [
1
],
"input_height": [
1
],
"input_depth": [
1000
],
"slice_width": [
1
],
"slice_height": [
1
],
"slice_depth": [
1000
],
"dq_scale": [
255.02200010497842
],
"dq_zp": [
-128
],
"data_type": [
0
],
"fp16_out_en": [
0
],
"output_format": [
0
],
"debug": 0,
"out_sz": 4000,
"dump_data": 1
}
}
Parameters
Parameter Name |
Parameter Description |
Data Type |
Default |
Min |
Max |
---|---|---|---|---|---|
num_tensors |
Number of input tensors in the input buffer |
int32_t |
13 |
1 |
32 |
input_width |
Width of the input tensor |
int32_t |
[64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64] |
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] |
input_height |
Height of the input tensor |
int32_t |
[64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64] |
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] |
input_depth |
Depth of the input tensor |
int32_t |
[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32] |
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] |
slice_width |
Slice/Tile width to be used for detessellation |
int32_t |
[16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16] |
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] |
slice_height |
Slice/Tile height to be used for detessellation |
int32_t |
[16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16] |
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] |
slice_depth |
Slice/Tile depth/channels to be used for detessellation |
int32_t |
[32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32] |
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] |
[4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096] |
dq_scale |
Dequantization scale |
float32 |
[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] |
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] |
[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0] |
dq_zp |
Dequantization zero point |
int32_t |
[‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’, ‘INT8’] |
[-128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128, -128] |
[127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127] |
data_type |
Specifies the input tessellated datatype, 0=> ‘INT8’, 1=> ‘INT16’, 2=> ‘INT32’. |
int32_t |
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
N/A |
N/A |
fp16_out_en |
Enables fp16 output if set to 1 else, gives fp32 output. |
bool |
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
0 |
1 |
output_format |
Output tensor format. 0 => NHWC, 1 => NCHW.. |
int32_t |
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
0 |
1 |
debug |
Enable more debug logs, 0 => disable, 1=> additonal logs, 2 => profile runtime of individual input tensors, 3 => profile overall graph runtime. |
int32_t |
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] |
0 |
3 |
dump_data |
Enable (1) or disable (0) dumping of output tensor to |
int32_t |
0 |
0 |
1 |
CVU Configuration Application
Note
The need to write, build and execute a dependent application for the CVU will be removed in an upcoming release.
To configure any CVU graph, a C++ CVU Configuration Application must be cross-compiled and executed on the board before using the CVU. Multiple graphs can be pre-programmed into the CVU before running any application. This guide provides a pre-written C++ application for download for each graph that can be cross-compiled on Palette, and executed on the board prior to running the simaaiprocesscvu GStreamer plugin. An pre-compiled version is also included for direct use. If you encounter issues, please re-compile the application from the sources provided.
How to compile using the files below
Please refer to How to compile CVU Configuration Application? for more info.
Directory structure
.
├── CMakeLists.txt
├── cvu_cfg_graph.cpp
└── cvu_cfg_main.cpp
Code files
1 #include <simaai/ev_cfg_helper.h>
2 #include <simaai/parser.h>
3 #include <simaai/platform/simaevxxipc.h>
4 #include <string.h>
5
6 #define SIMA_IPC_GRAPH_NAME "SIMA_DETESS_DEQUANT"
7 #define SIMA_IPC_GRAPH_CODE (201)
8
9 #define NUM_IN_TENSORS (1)
10 #define INPUT_WIDTH (2)
11 #define INPUT_HEIGHT (3)
12 #define INPUT_DEPTH (4)
13 #define SLICE_WIDTH (5)
14 #define SLICE_HEIGHT (6)
15 #define SLICE_DEPTH (7)
16 #define DEQUANT_SCALE (8)
17 #define DEQUANT_ZEROPOINT (9)
18 #define INPUT_TYPE (10)
19 #define FP16_OUT_ENABLED (11)
20 #define OUTPUT_FORMAT (12)
21 #define DEBUG (13)
22
23 int parse_and_send_array_param(simaai_params_t *params, const char *param_name, int array_len, int graph_id, int param_id, bool is_float) {
24 simaai_double_array_t *arr;
25 uint8_t *buf = (uint8_t *)calloc(1, sizeof(uint32_t) * array_len);
26 arr = (simaai_double_array_t *)parser_get_double_array(params, param_name);
27 if (arr != NULL) {
28 if (arr->size != array_len) {
29 std::cout << "\n Param list incomplete for " << /*static_cast <const void *>*/ (param_name);
30 return -1;
31 }
32
33 for (int i = 0; i < arr->size; i++) {
34 if(is_float) {
35 send_float_param(2, graph_id, param_id, buf, (double)arr->values[i]);
36 } else {
37 send_i32_param(2, graph_id, param_id, buf, (int)arr->values[i]);
38 }
39 }
40 }
41 else {
42 std::cout << "\n Param list empty for " << /*static_cast <const void *>*/ (param_name);
43 return -1;
44 }
45 return 0;
46 }
47
48 void configure_graph(const char *json_in) {
49 simaai_params_t *params = parser_node_struct_init();
50 if (params == NULL) {
51 std::cout << "Unable to create params \n";
52 }
53 if ((parse_json_file(json_in, params) != PARSER_SUCCESS)) {
54 std::cout << "Unable to start parser \n";
55 }
56
57 uint8_t *buf = (uint8_t *)calloc(1, sizeof(uint8_t) * 16);
58
59 int num_input_tensors_val = *((int *)parser_get_int(params, "num_tensors"));
60 send_i32_param(2, SIMA_IPC_GRAPH_CODE, NUM_IN_TENSORS, buf, num_input_tensors_val);
61
62 parse_and_send_array_param(params, "input_width", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_WIDTH, false);
63 parse_and_send_array_param(params, "input_height", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_HEIGHT, false);
64 parse_and_send_array_param(params, "input_depth", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_DEPTH, false);
65 parse_and_send_array_param(params, "slice_width", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_WIDTH, false);
66 parse_and_send_array_param(params, "slice_height", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_HEIGHT, false);
67 parse_and_send_array_param(params, "slice_depth", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, SLICE_DEPTH, false);
68 parse_and_send_array_param(params, "dq_scale", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, DEQUANT_SCALE, true);
69 parse_and_send_array_param(params, "dq_zp", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, DEQUANT_ZEROPOINT, false);
70 parse_and_send_array_param(params, "data_type", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, INPUT_TYPE, false);
71 parse_and_send_array_param(params, "fp16_out_en", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, FP16_OUT_ENABLED, false);
72 parse_and_send_array_param(params, "output_format", num_input_tensors_val, SIMA_IPC_GRAPH_CODE, OUTPUT_FORMAT, false);
73
74 int debug_val = *((int *)parser_get_int(params, "debug"));
75 send_i32_param(2, SIMA_IPC_GRAPH_CODE, DEBUG, buf, debug_val);
76
77 parser_finalize(params);
78 free(buf);
79
80 std::cout << "Completed " << SIMA_IPC_GRAPH_NAME << " graph configure \n";
81 }
1 #include <getopt.h>
2 #include <sys/stat.h>
3 #include <unistd.h>
4
5 #include <cstring>
6 #include <iostream>
7
8 extern void configure_graph(const char *json_fpath);
9
10 bool is_valid_path(const char *path) {
11 struct stat buffer;
12 return (stat(path, &buffer) == 0);
13 }
14
15 int main(int argc, char **argv) {
16 const char *json_path = argv[1];
17
18 if(is_valid_path(json_path)) {
19 configure_graph(json_path);
20 } else {
21 std::cerr << "Invalid path: " << json_path << std::endl;
22 return 1;
23 }
24
25 return 0;
26 }
1 cmake_minimum_required(VERSION 3.16)
2
3 # set the project name
4 set(GRAPH_NAME "detessdequant_201")
5 set(PROJECT_NAME "CVU Graph Cfg. App.")
6
7 project("${PROJECT_NAME}"
8 VERSION 0.1
9 DESCRIPTION "CVU Graph Configuration Application"
10 LANGUAGES C CXX)
11
12 set(PIPELINE_SOURCES
13 cvu_cfg_graph.cpp)
14
15 execute_process(
16 COMMAND git rev-parse --abbrev-ref HEAD
17 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
18 OUTPUT_VARIABLE GIT_BRANCH
19 OUTPUT_STRIP_TRAILING_WHITESPACE
20 )
21
22 # Get the latest abbreviated commit hash of the working branch
23 execute_process(
24 COMMAND git log -1 --format=%h
25 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
26 OUTPUT_VARIABLE GIT_COMMIT_HASH
27 OUTPUT_STRIP_TRAILING_WHITESPACE
28 )
29
30 link_directories(${CMAKE_INSTALL_DIR}/core
31 ${CMAKE_INSTALL_DIR}/gst
32 )
33
34 include(GNUInstallDirs)
35
36 # ev-configuration genertion executable
37 set(EV_EXEC_NAME "${GRAPH_NAME}_cvu_cfg_app")
38
39 add_executable(${EV_EXEC_NAME}
40 cvu_cfg_main.cpp
41 cvu_cfg_graph.cpp)
42
43 target_link_libraries(${EV_EXEC_NAME}
44 PUBLIC
45 simaaiparser
46 evhelpers)
47
48 INSTALL(TARGETS "${EV_EXEC_NAME}")