From 7647fe83ddd1ddc021fef60479d3bcf8f5cca0df Mon Sep 17 00:00:00 2001 From: Dongju Chae Date: Mon, 1 Jun 2020 12:31:08 +0000 Subject: [PATCH] [Decoder] Extend tensordec-imagesegment to support snpe-deeplab/depth This patch extends tensordec-imagesegment to support snpe-deeplab/depth. Also, it include minor code refactoring. Signed-off-by: Dongju Chae --- .../tensor_decoder/tensordec-imagesegment.c | 289 +++++++++++++++------ 1 file changed, 214 insertions(+), 75 deletions(-) diff --git a/ext/nnstreamer/tensor_decoder/tensordec-imagesegment.c b/ext/nnstreamer/tensor_decoder/tensordec-imagesegment.c index 82d2e19..fe08880 100644 --- a/ext/nnstreamer/tensor_decoder/tensordec-imagesegment.c +++ b/ext/nnstreamer/tensor_decoder/tensordec-imagesegment.c @@ -2,6 +2,7 @@ * GStreamer / NNStreamer tensor_decoder subplugin, "image segment" * Copyright (C) 2019 Jihoon Lee * Copyright (C) 2019 niklasjang + * Copyright (C) 2020 Dongju Chae * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -23,10 +24,28 @@ * @see https://github.com/nnstreamer/nnstreamer * @author Jihoon Lee * niklasjang - * @bug No known bugs except for NYI items + * Dongju Chae + * @bug No known bugs except for NYI items * * option1: Decoder mode of image segmentation * Available : tflite-deeplab + * Available : snpe-deeplab + * Available : snpe-depth + * + * option2: Maximum number of class labels (except background), default is 20 (Pascal) + * + * expected models + * - tflite-deeplab : deeplabv3_257_mv_gpu.tflite (designed for embedded devices) + * - snpe-deeplab : deeplabv3_mnv2_pascal_train_aug.dlc (converted from a TF model) + * - snpe-depth : any snpe models (.dlc) producing grayscale images + * + * expected input dims + * - tflite-deeplab : #labels x width x height (float32, label probability) + * (e.g., 21 x 257 x 257) + * - snpe-deeplab : width x height x 1 (float32, label index) + * (e.g., 513 x 513 x 1) + * - snpe-depth : 1 x width x height (float32, grayscale) + * (e.g., 1 x 320 x 240) * * pipeline: * filesrc @@ -63,12 +82,17 @@ #include #include +#if defined(__aarch64__) +#define NEON64_ENABLED +#endif + +#define DEFAULT_LABELS (20) +#define RGBA_CHANNEL (4) +#define MAX_RGB (255) + void init_is (void) __attribute__ ((constructor)); void fini_is (void) __attribute__ ((destructor)); -#define RGBA_CHANNEL 4 -#define TFLITE_DEEPLAB_TOTAL_LABELS 21 - const static float DETECTION_THRESHOLD = 0.5f; /** @@ -77,6 +101,8 @@ const static float DETECTION_THRESHOLD = 0.5f; typedef enum { MODE_TFLITE_DEEPLAB = 0, + MODE_SNPE_DEEPLAB = 1, + MODE_SNPE_DEPTH = 2, MODE_UNKNOWN, } image_segment_modes; @@ -85,6 +111,8 @@ typedef enum */ static const char *is_modes[] = { [MODE_TFLITE_DEEPLAB] = "tflite-deeplab", + [MODE_SNPE_DEEPLAB] = "snpe-deeplab", + [MODE_SNPE_DEPTH] = "snpe-depth", NULL, }; @@ -94,10 +122,15 @@ static const char *is_modes[] = { typedef struct { image_segment_modes mode; /**< The image segmentation decoding mode */ - guint **segment_map; /**< The image segmentated map */ + float *segment_map; /**< The image segmentated map */ + + guint max_labels; /**< Maximum number of labels */ + guint *color_map; /**< The RGBA color map (up to max labels) */ - guint width; /**< Input video width */ - guint height; /**< Input video height */ + guint width; /**< Input video width */ + guint height; /**< Input video height */ + + GRand *rand; /**< random value generator */ } image_segments; /** @brief tensordec-plugin's GstTensorDecoderDef callback */ @@ -112,28 +145,28 @@ is_init (void **pdata) return FALSE; } + idata->rand = g_rand_new (); idata->mode = MODE_UNKNOWN; idata->width = 0; idata->height = 0; + idata->max_labels = DEFAULT_LABELS; idata->segment_map = NULL; + idata->color_map = NULL; return TRUE; } -/** @brief Free the allocated segment_map */ +/** @brief Free the allocated resources */ static void -_free_segment_map (image_segments * idata) +_free_resources (image_segments * idata) { - int i; - - if (idata->segment_map) { - for (i = 0; i < idata->height; i++) { - g_free (idata->segment_map[i]); - } - g_free (idata->segment_map); - } + g_free (idata->segment_map); + g_free (idata->color_map); + g_rand_free (idata->rand); idata->segment_map = NULL; + idata->color_map = NULL; + idata->rand = NULL; } /** @brief tensordec-plugin's GstTensorDecoderDef callback */ @@ -142,12 +175,26 @@ is_exit (void **pdata) { image_segments *idata = *pdata; - _free_segment_map (idata); + _free_resources (idata); g_free (*pdata); *pdata = NULL; } +/** @brief fill rgba color map */ +static void +_fill_color_map (image_segments * idata) +{ + guint i; + + idata->color_map[0] = 0; /* background */ + for (i = 1; i <= idata->max_labels; i++) { + /* any color value would be acceptable */ + idata->color_map[i] = g_rand_int_range (idata->rand, 0x101010, 0xFFFFFF); + ((guint8 *)&idata->color_map[i])[3] = '\xff'; /* alpha */ + } +} + /** @brief tensordec-plugin's GstTensorDecoderDef callback */ static int is_setOption (void **pdata, int op_num, const char *param) @@ -168,6 +215,10 @@ is_setOption (void **pdata, int op_num, const char *param) return TRUE; } return TRUE; + } else if (op_num == 1) { + guint64 max_labels_64 = g_ascii_strtoll (param, NULL, 10); + if (max_labels_64 != 0 && max_labels_64 <= UINT_MAX) + idata->max_labels = (guint) max_labels_64; } GST_WARNING ("mode-option-\"%d\" is not definded.", op_num); @@ -179,16 +230,24 @@ static gboolean _init_modes (image_segments * idata) { if (idata->mode == MODE_TFLITE_DEEPLAB) { - int i; + /* init image segments if seg map is null */ + if (idata->segment_map == NULL) + idata->segment_map = g_new0 (float, idata->height * idata->width); - idata->segment_map = g_new0 (guint *, idata->height); - g_assert (idata->segment_map != NULL); - for (i = 0; i < idata->height; i++) { - idata->segment_map[i] = g_new0 (guint, idata->width); - g_assert (idata->segment_map[i] != NULL); + if (idata->color_map == NULL) { + idata->color_map = g_new (guint, idata->max_labels + 1); + _fill_color_map (idata); } return TRUE; + } else if (idata->mode == MODE_SNPE_DEEPLAB) { + if (idata->color_map == NULL) { + idata->color_map = g_new (guint, idata->max_labels + 1); + _fill_color_map (idata); + } + return TRUE; + } else if (idata->mode == MODE_SNPE_DEPTH) { + return TRUE; } GST_ERROR ("Failed to initialize, unknown mode %d.", idata->mode); @@ -210,15 +269,16 @@ is_getOutCaps (void **pdata, const GstTensorsConfig * config) GstCaps *caps; char *str; - if (idata->mode == MODE_TFLITE_DEEPLAB) { - g_return_val_if_fail (config != NULL, NULL); - GST_INFO ("Num Tensors = %d", config->info.num_tensors); - g_return_val_if_fail (config->info.num_tensors >= 1, NULL); + g_return_val_if_fail (config != NULL, NULL); + GST_INFO ("Num Tensors = %d", config->info.num_tensors); + g_return_val_if_fail (config->info.num_tensors >= 1, NULL); - if (idata->width == 0 || idata->height == 0) { - idata->width = config->info.info[0].dimension[1]; - idata->height = config->info.info[0].dimension[2]; - } + if (idata->mode == MODE_SNPE_DEEPLAB) { + idata->width = config->info.info[0].dimension[0]; + idata->height = config->info.info[0].dimension[1]; + } else { + idata->width = config->info.info[0].dimension[1]; + idata->height = config->info.info[0].dimension[2]; } str = g_strdup_printf ("video/x-raw, format = RGBA, " @@ -244,35 +304,70 @@ is_getTransformSize (void **pdata, const GstTensorsConfig * config, /** @todo Use appropriate values */ } - -/** @brief Set color according to each pixel's max probability */ +/** @brief Set color according to each pixel's label (RGBA) */ static void set_color_according_to_label (image_segments * idata, GstMapInfo * out_info) { - uint32_t *frame = (uint32_t *) out_info->data; - uint32_t *pos; - guint i, j, label_idx; - const uint32_t label_color[21] = { - 0xFF000040, 0xFF800000, 0xFFFFEFD5, 0xFF40E0D0, 0xFFFFA500, - 0xFF00FF00, 0xFFDC143C, 0xFFF0F8FF, 0xFF008000, 0xFFEE82EE, - 0xFF808080, 0xFF4169E1, 0xFF008080, 0xFFFF6347, 0xFF000000, - 0xFFFF4500, 0xFFDA70D6, 0xFFEEE8AA, 0xFF98FB98, 0xFFAFEEEE, - 0xFFFFF5EE - }; + float *input = idata->segment_map; + uint32_t *output = (uint32_t *) out_info->data; + guint data_idx, label_idx; + guint i, j; for (i = 0; i < idata->height; i++) { for (j = 0; j < idata->width; j++) { - label_idx = idata->segment_map[i][j]; - - /* If background or out-of-range, don't draw it */ - if (label_idx == 0 || label_idx > 20) + data_idx = i * idata->width + j; + if (idata->mode == MODE_TFLITE_DEEPLAB) + label_idx = ((guint *) input)[data_idx]; + else + label_idx = (guint) input[data_idx]; + + /* If out-of-range, don't draw it */ + if (G_UNLIKELY (label_idx > idata->max_labels)) continue; - pos = &frame[i * idata->width + j]; - *pos = label_color[label_idx]; + + output[data_idx] = idata->color_map[label_idx]; } } } +/** @brief Set color with grayscale value */ +static void +set_color_grayscale (image_segments * idata, GstMapInfo * out_info) +{ + float *input = idata->segment_map; + uint32_t *output = (uint32_t *) out_info->data; + guint data_idx, grayscale; + guint i, j; + float max_grayscale = 0.0; + + /* find the maximum value */ + for (i = 0; i < idata->height; i++) { + for (j = 0; j < idata->width; j++) { + data_idx = i * idata->width + j; + if (max_grayscale < input[data_idx]) + max_grayscale = input[data_idx]; + } + } + + if (G_UNLIKELY (max_grayscale == 0.0)) + return; + + /* normalize the values */ + for (i = 0; i < idata->height; i++) { + for (j = 0; j < idata->width; j++) { + data_idx = i * idata->width + j; + grayscale = (guint) ((((float) input[data_idx]) / max_grayscale) * MAX_RGB); + + /* Should be less than 256 */ + if (G_UNLIKELY (grayscale > MAX_RGB)) + continue; + + grayscale = grayscale | (grayscale << 8) | (grayscale << 16) | (grayscale << 24); + + output[data_idx] = grayscale; + } + } +} /** @brief Set label index according to each pixel's label probabilities */ static void @@ -282,31 +377,70 @@ set_label_index (image_segments * idata, void *data) int idx, i, j; int max_idx; float max_prob; + guint total_labels = idata->max_labels + 1; - for (i = 0; i < idata->height; i++) { - memset (idata->segment_map[i], 0, idata->width * sizeof (guint)); - } + memset (idata->segment_map, '\x00', + idata->width * idata->height * sizeof (float)); for (i = 0; i < idata->height; i++) { for (j = 0; j < idata->width; j++) { max_idx = 0; - max_prob = prob_map[i * idata->width * TFLITE_DEEPLAB_TOTAL_LABELS - + j * TFLITE_DEEPLAB_TOTAL_LABELS]; - for (idx = 1; idx < TFLITE_DEEPLAB_TOTAL_LABELS; idx++) { - float prob = prob_map[i * idata->width * TFLITE_DEEPLAB_TOTAL_LABELS - + j * TFLITE_DEEPLAB_TOTAL_LABELS + idx]; + max_prob = prob_map[i * idata->width * total_labels + + j * total_labels]; + for (idx = 1; idx < total_labels; idx++) { + float prob = prob_map[i * idata->width * total_labels + + j * total_labels + idx]; if (prob > max_prob) { max_prob = prob; max_idx = idx; } } if (max_prob > DETECTION_THRESHOLD) { - idata->segment_map[i][j] = max_idx; - } + idata->segment_map[i * idata->width + j] = (float) max_idx; + } /* otherwise, regarded as background */ } } } +/** @brief set color to output buffer depending on each mode */ +static void +set_color (image_segments * idata, void *data, GstMapInfo * out_info) +{ + /* tflite-deeplab needs to perform extra post-processing to set labels */ + if (idata->mode == MODE_TFLITE_DEEPLAB) { + set_label_index (idata, data); + set_color_according_to_label (idata, out_info); + return; + } + + /* snpe-deeplab already has labeled data as input */ + idata->segment_map = data; + + if (idata->mode == MODE_SNPE_DEEPLAB) + set_color_according_to_label (idata, out_info); + else if (idata->mode == MODE_SNPE_DEPTH) + set_color_grayscale (idata, out_info); + + idata->segment_map = NULL; +} + +/** @brief sanity check for each mode */ +static gboolean +check_sanity (image_segments * idata, const GstTensorsConfig * config) +{ + if (idata->mode == MODE_TFLITE_DEEPLAB) { + return (config->info.info[0].type == _NNS_FLOAT32) && + (config->info.info[0].dimension[0] == idata->max_labels + 1); + } else if (idata->mode == MODE_SNPE_DEEPLAB) { + return (config->info.info[0].type == _NNS_FLOAT32); + } else if (idata->mode == MODE_SNPE_DEPTH) { + return (config->info.info[0].type == _NNS_FLOAT32) && + (config->info.info[0].dimension[0] == 1); + } + + return FALSE; +} + /** @brief tensordec-plugin's GstTensorDecoderDef callback */ static GstFlowReturn is_decode (void **pdata, const GstTensorsConfig * config, @@ -314,17 +448,15 @@ is_decode (void **pdata, const GstTensorsConfig * config, { image_segments *idata = *pdata; const size_t size = idata->width * idata->height * RGBA_CHANNEL; + gboolean need_output_alloc; GstMapInfo out_info; GstMemory *out_mem; - /* init image segments if seg map is null */ - if (idata->segment_map == NULL) { - if (!_init_modes (idata)) - return GST_FLOW_ERROR; - } + if (FALSE == _init_modes (idata) || outbuf == NULL) + return GST_FLOW_ERROR; - g_assert (outbuf); - if (gst_buffer_get_size (outbuf) == 0) { + need_output_alloc = (gst_buffer_get_size (outbuf) == 0); + if (TRUE == need_output_alloc) { out_mem = gst_allocator_alloc (NULL, size, NULL); } else { if (gst_buffer_get_size (outbuf) < size) { @@ -334,25 +466,32 @@ is_decode (void **pdata, const GstTensorsConfig * config, } if (FALSE == gst_memory_map (out_mem, &out_info, GST_MAP_WRITE)) { ml_loge ("Cannot map output memory / tensordec-imagesegment.\n"); - return GST_FLOW_ERROR; + goto error_free; } - memset (out_info.data, 0, size); + memset (out_info.data, '\x00', size); - if (idata->mode == MODE_TFLITE_DEEPLAB) { - g_assert (config->info.info[0].type == _NNS_FLOAT32); - g_assert (config->info.info[0].dimension[0] == TFLITE_DEEPLAB_TOTAL_LABELS); - set_label_index (idata, input->data); + if (FALSE == check_sanity (idata, config)) { + ml_loge ("Invalid input data format detected.\n"); + goto error_unmap; } - set_color_according_to_label (idata, &out_info); + set_color (idata, input->data, &out_info); gst_memory_unmap (out_mem, &out_info); - if (gst_buffer_get_size (outbuf) == 0) + if (TRUE == need_output_alloc) gst_buffer_append_memory (outbuf, out_mem); return GST_FLOW_OK; + +error_unmap: + gst_memory_unmap (out_mem, &out_info); +error_free: + if (TRUE == need_output_alloc) + gst_allocator_free (NULL, out_mem); + + return GST_FLOW_ERROR; } static gchar decoder_subplugin_image_segment[] = "image_segment"; -- 2.7.4