From a1c9b45d1ac1f3a7b830df2fa17b962d88a128cf Mon Sep 17 00:00:00 2001 From: Dongju Chae Date: Thu, 4 Jun 2020 20:25:54 +0900 Subject: [PATCH] [Decoder/Neon] Support neon accl for SNPE deeplab/depth seg decoder This patch supports neon accl for SNPE deeplab/depth seg decoder Signed-off-by: Dongju Chae --- .../tensor_decoder/tensordec-imagesegment.c | 136 +++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/ext/nnstreamer/tensor_decoder/tensordec-imagesegment.c b/ext/nnstreamer/tensor_decoder/tensordec-imagesegment.c index 8eeed9e..58a4e41 100644 --- a/ext/nnstreamer/tensor_decoder/tensordec-imagesegment.c +++ b/ext/nnstreamer/tensor_decoder/tensordec-imagesegment.c @@ -83,7 +83,11 @@ #include #if defined(__aarch64__) +#include + #define NEON64_ENABLED +#define GRAYSCALE_HEX (0x00010101) +#define ALPHA_HEX (0xFF000000) #endif #define DEFAULT_LABELS (20) @@ -131,6 +135,7 @@ typedef struct guint height; /**< Input video height */ GRand *rand; /**< random value generator */ + guint rgb_modifier; /**< rgb modifier according to # labels */ } image_segments; /** @brief tensordec-plugin's GstTensorDecoderDef callback */ @@ -152,6 +157,7 @@ is_init (void **pdata) idata->max_labels = DEFAULT_LABELS; idata->segment_map = NULL; idata->color_map = NULL; + idata->rgb_modifier = 0; return TRUE; } @@ -188,11 +194,21 @@ _fill_color_map (image_segments * idata) guint i; idata->color_map[0] = 0; /* background */ + +#if defined (NEON64_ENABLED) + idata->rgb_modifier = 0xFFFFFF / (idata->max_labels + 1); + for (i = 1; i <= idata->max_labels; i++) { + /* colors should be the same with neon calculations */ + idata->color_map[i] = idata->rgb_modifier * i; + ((guint8 *)&idata->color_map[i])[3] = '\xff'; /* alpha */ + } +#else for (i = 1; i <= idata->max_labels; i++) { /* any color value would be acceptable */ idata->color_map[i] = g_rand_int_range (idata->rand, 0x101010, 0xFFFFFF); ((guint8 *)&idata->color_map[i])[3] = '\xff'; /* alpha */ } +#endif } /** @brief tensordec-plugin's GstTensorDecoderDef callback */ @@ -313,6 +329,52 @@ set_color_according_to_label (image_segments * idata, GstMapInfo * out_info) guint num_pixels = idata->height * idata->width; guint label_idx, idx = 0; +#if defined (NEON64_ENABLED) + float32x4_t v_src_float; + + uint32x4_t v_src_uint; + uint32x4_t v_magic; + uint32x4_t v_mask; + uint32x4_t v_alpha; + uint32x4_t v_zero; + + guint num_lanes = 4; + + v_magic = vdupq_n_u32 (idata->rgb_modifier); + v_alpha = vdupq_n_u32 (ALPHA_HEX); + v_zero = vdupq_n_u32 (0); + + for (idx = 0; idx < num_pixels; idx += num_lanes) { + /* load float32 vector */ + v_src_float = vld1q_f32 (input); + input += num_lanes; + + /* convert float32 vector to uint32 vector */ + v_src_uint = vcvtq_u32_f32 (v_src_float); + + /* multiply by magic number to fill RGB values */ + v_src_uint = vmulq_u32 (v_src_uint, v_magic); + + /* check whether the label is zero (i.e., background) */ + v_mask = vceqq_u32 (v_src_uint, v_zero); + v_mask = vbslq_u32 (v_mask, v_zero, v_alpha); + + /* set the alpha value unless it's background */ + v_src_uint = vorrq_u32 (v_src_uint, v_mask); + + /* store uint32 vector */ + vst1q_u32 (output, v_src_uint); + output += num_lanes; + } + + if (num_pixels == idx) + return; + + /* handle remaining data */ + input = (float *) idata->segment_map; + output = (uint32_t *) out_info->data; + idx -= num_lanes; +#endif for (; idx < num_pixels; idx++) { label_idx = (guint) input[idx]; @@ -333,6 +395,33 @@ find_max_grayscale (image_segments * idata) guint num_pixels = idata->height * idata->width; guint idx = 0; +#if defined (NEON64_ENABLED) + float32x4_t v_src, v_max; + guint num_lanes = 4; + + v_max = vdupq_n_f32 (0); + + /* find the maximum value per lane */ + for (idx = 0; idx < num_pixels; idx += num_lanes) { + v_src = vld1q_f32 (input); + input += num_lanes; + + v_max = vmaxq_f32 (v_src, v_max); + } + + /* find the maximum value among all lanes */ + gray_max = MAX (gray_max, vgetq_lane_f32 (v_max, 0)); + gray_max = MAX (gray_max, vgetq_lane_f32 (v_max, 1)); + gray_max = MAX (gray_max, vgetq_lane_f32 (v_max, 2)); + gray_max = MAX (gray_max, vgetq_lane_f32 (v_max, 3)); + + if (num_pixels == idx) + return gray_max; + + /* handle remaining data */ + input = idata->segment_map; + idx -= num_lanes; +#endif for (; idx < num_pixels; idx++) gray_max = MAX (gray_max, input [idx]); @@ -355,6 +444,53 @@ set_color_grayscale (image_segments * idata, GstMapInfo * out_info) if (G_UNLIKELY (max_grayscale == 0.0)) return; +#if defined (NEON64_ENABLED) + { + float32x4_t v_src_float; + float32x4_t v_max_gray; + float32x4_t v_max_rgb; + + uint32x4_t v_src_uint; + uint32x4_t v_magic; + uint32x4_t v_alpha; + + guint num_lanes = 4; + + v_max_gray = vdupq_n_f32 (max_grayscale); + v_max_rgb = vdupq_n_f32 (MAX_RGB); + v_magic = vdupq_n_u32 (GRAYSCALE_HEX); + v_alpha = vdupq_n_u32 (ALPHA_HEX); + + for (idx = 0; idx < num_pixels; idx += num_lanes) { + /* load float32 vector */ + v_src_float = vld1q_f32 (input); + input += num_lanes; + + /* normalized_gray = (gray / max_gray) x max_rgb */ + v_src_float = vdivq_f32 (v_src_float, v_max_gray); + v_src_float = vmulq_f32 (v_src_float, v_max_rgb); + + /* convert float32 vector to uint32 vector */ + v_src_uint = vcvtq_u32_f32 (v_src_float); + + /* multiply by magic number to fill the same RGB values */ + v_src_uint = vmulq_u32 (v_src_uint, v_magic); + v_src_uint = vaddq_u32 (v_src_uint, v_alpha); + + /* store uint32 vector */ + vst1q_u32 (output, v_src_uint); + output += num_lanes; + } + + if (num_pixels == idx) + return; + + /* handle remaining data */ + input = idata->segment_map; + output = (uint32_t *) out_info->data; + idx -= num_lanes; + } +#endif for (; idx < num_pixels; idx++) { /* normalize grayscale values to RGB_MAX */ grayscale = (guint) ((input[idx] / max_grayscale) * MAX_RGB); -- 2.7.4