This patch removes subclasses in the tensordec-boundingbox header.
Each box properties are located inside box_properties subdir.
Signed-off-by: Yelin Jeong <yelini.jeong@samsung.com>
--- /dev/null
+decoder_sub_bounding_boxes_sources += files(
+'mobilenetssd.cc',
+ 'mobilenetssdpp.cc',
+ 'ovdetection.cc',
+ 'yolo.cc',
+ 'mppalmdetection.cc'
+)
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file mobilenetssd.cc
+ * @date 13 May 2024
+ * @brief NNStreamer tensor-decoder bounding box properties
+ *
+ * @see https://github.com/nnstreamer/nnstreamer
+ * @author Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include "../tensordec-boundingbox.h"
+
+#define MAX_TENSORS (2U)
+
+#define THRESHOLD_IDX (0)
+#define Y_SCALE_IDX (1)
+#define X_SCALE_IDX (2)
+#define H_SCALE_IDX (3)
+#define W_SCALE_IDX (4)
+#define IOU_THRESHOLD_IDX (5)
+
+#define DETECTION_THRESHOLD_DEFAULT (0.5f)
+#define THRESHOLD_IOU_DEFAULT (0.5f)
+#define Y_SCALE_DEFAULT (10.0f)
+#define X_SCALE_DEFAULT (10.0f)
+#define H_SCALE_DEFAULT (5.0f)
+#define W_SCALE_DEFAULT (5.0f)
+
+#define _expit(x) (1.f / (1.f + expf (-((float) x))))
+
+/**
+ * @brief C++-Template-like box location calculation for box-priors
+ * @bug This is not macro-argument safe. Use paranthesis!
+ * @param[in] bb The configuration, "bounding_boxes"
+ * @param[in] index The index (3rd dimension of BOX_SIZE:1:DETECTION_MAX:1)
+ * @param[in] total_labels The count of total labels. We can get this from input tensor info. (1st dimension of LABEL_SIZE:DETECTION_MAX:1:1)
+ * @param[in] boxprior The box prior data from the box file of SSD.
+ * @param[in] boxinputptr Cursor pointer of input + byte-per-index * index (box)
+ * @param[in] detinputptr Cursor pointer of input + byte-per-index * index (detection)
+ * @param[in] result The object returned. (pointer to object)
+ */
+#define _get_object_i_mobilenet_ssd(index, total_labels, boxprior, \
+ boxinputptr, detinputptr, result, i_width, i_height) \
+ do { \
+ unsigned int c; \
+ gfloat highscore = -FLT_MAX; \
+ float y_scale = params[Y_SCALE_IDX]; \
+ float x_scale = params[X_SCALE_IDX]; \
+ float h_scale = params[H_SCALE_IDX]; \
+ float w_scale = params[W_SCALE_IDX]; \
+ result->valid = FALSE; \
+ for (c = 1; c < total_labels; c++) { \
+ if (detinputptr[c] >= sigmoid_threshold) { \
+ gfloat score = _expit (detinputptr[c]); \
+ float ycenter \
+ = boxinputptr[0] / y_scale * boxprior[2][index] + boxprior[0][index]; \
+ float xcenter \
+ = boxinputptr[1] / x_scale * boxprior[3][index] + boxprior[1][index]; \
+ float h = (float) expf (boxinputptr[2] / h_scale) * boxprior[2][index]; \
+ float w = (float) expf (boxinputptr[3] / w_scale) * boxprior[3][index]; \
+ float ymin = ycenter - h / 2.f; \
+ float xmin = xcenter - w / 2.f; \
+ int x = xmin * i_width; \
+ int y = ymin * i_height; \
+ int width = w * i_width; \
+ int height = h * i_height; \
+ if (highscore < score) { \
+ result->class_id = c; \
+ result->x = MAX (0, x); \
+ result->y = MAX (0, y); \
+ result->width = width; \
+ result->height = height; \
+ result->prob = score; \
+ result->valid = TRUE; \
+ } \
+ } \
+ } \
+ } while (0);
+
+/**
+ * @brief C++-Template-like box location calculation for box-priors for Mobilenet SSD Model
+ * @param[in] type The tensor type of inputptr
+ * @param[in] typename nnstreamer enum corresponding to the type
+ * @param[in] boxprior The box prior data from the box file of MOBILENET_SSD.
+ * @param[in] boxinput Input Tensor Data (Boxes)
+ * @param[in] detinput Input Tensor Data (Detection). Null if not available. (numtensor ==1)
+ * @param[in] config Tensor configs of the input tensors
+ * @param[out] results The object returned. (GArray with detectedObject)
+ */
+#define _get_objects_mobilenet_ssd(_type, typename, boxprior, boxinput, \
+ detinput, config, results, i_width, i_height, max_detection) \
+ case typename: \
+ { \
+ int d; \
+ _type *boxinput_ = (_type *) boxinput; \
+ size_t boxbpi = config->info.info[0].dimension[0]; \
+ _type *detinput_ = (_type *) detinput; \
+ size_t detbpi = config->info.info[1].dimension[0]; \
+ int num = (DETECTION_MAX > max_detection) ? max_detection : DETECTION_MAX; \
+ detectedObject object = { \
+ .valid = FALSE, .class_id = 0, .x = 0, .y = 0, .width = 0, .height = 0, .prob = .0, .tracking_id = 0 \
+ }; \
+ for (d = 0; d < num; d++) { \
+ _get_object_i_mobilenet_ssd (d, detbpi, boxprior, (boxinput_ + (d * boxbpi)), \
+ (detinput_ + (d * detbpi)), (&object), i_width, i_height); \
+ if (object.valid == TRUE) { \
+ g_array_append_val (results, object); \
+ } \
+ } \
+ } \
+ break
+
+
+/** @brief Macro to simplify calling _get_objects_mobilenet_ssd */
+#define _get_objects_mobilenet_ssd_(type, typename) \
+ _get_objects_mobilenet_ssd (type, typename, box_priors, (boxes->data), \
+ (detections->data), config, results, i_width, i_height, max_detection)
+
+/** @brief Mathematic inverse of sigmoid function, aka logit */
+static float
+logit (float x)
+{
+ if (x <= 0.0f)
+ return -INFINITY;
+
+ if (x >= 1.0f)
+ return INFINITY;
+
+ return log (x / (1.0 - x));
+}
+
+/** @brief Constructor of MobilenetSSD */
+MobilenetSSD::MobilenetSSD ()
+{
+ params[THRESHOLD_IDX] = DETECTION_THRESHOLD_DEFAULT;
+ params[Y_SCALE_IDX] = Y_SCALE_DEFAULT;
+ params[X_SCALE_IDX] = X_SCALE_DEFAULT;
+ params[H_SCALE_IDX] = H_SCALE_DEFAULT;
+ params[W_SCALE_IDX] = W_SCALE_DEFAULT;
+ params[IOU_THRESHOLD_IDX] = THRESHOLD_IOU_DEFAULT;
+ sigmoid_threshold = logit (DETECTION_THRESHOLD_DEFAULT);
+
+ max_detection = 0;
+ total_labels = 0;
+ box_prior_path = nullptr;
+}
+
+/**
+ * @brief Load box-prior data from a file
+ * @param[in/out] bdata The internal data.
+ * @return TRUE if loaded and configured. FALSE if failed to do so.
+ */
+int
+MobilenetSSD::mobilenet_ssd_loadBoxPrior ()
+{
+ gboolean failed = FALSE;
+ GError *err = NULL;
+ gchar **priors;
+ gchar *line = NULL;
+ gchar *contents = NULL;
+ guint row;
+ gint prev_reg = -1;
+
+ /* Read file contents */
+ if (!g_file_get_contents (box_prior_path, &contents, NULL, &err)) {
+ GST_ERROR ("Decoder/Bound-Box/SSD's box prior file %s cannot be read: %s",
+ box_prior_path, err->message);
+ g_clear_error (&err);
+ return FALSE;
+ }
+
+ priors = g_strsplit (contents, "\n", -1);
+ /* If given prior file is inappropriate, report back to tensor-decoder */
+ if (g_strv_length (priors) < BOX_SIZE) {
+ ml_loge ("The given prior file, %s, should have at least %d lines.\n",
+ box_prior_path, BOX_SIZE);
+ failed = TRUE;
+ goto error;
+ }
+
+ for (row = 0; row < BOX_SIZE; row++) {
+ gint column = 0, registered = 0;
+
+ line = priors[row];
+ if (line) {
+ gchar **list = g_strsplit_set (line, " \t,", -1);
+ gchar *word;
+
+ while ((word = list[column]) != NULL) {
+ column++;
+
+ if (word && *word) {
+ if (registered > DETECTION_MAX) {
+ GST_WARNING ("Decoder/Bound-Box/SSD's box prior data file has too many priors. %d >= %d",
+ registered, DETECTION_MAX);
+ break;
+ }
+ box_priors[row][registered] = (gfloat) g_ascii_strtod (word, NULL);
+ registered++;
+ }
+ }
+
+ g_strfreev (list);
+ }
+
+ if (prev_reg != -1 && prev_reg != registered) {
+ GST_ERROR ("Decoder/Bound-Box/SSD's box prior data file is not consistent.");
+ failed = TRUE;
+ break;
+ }
+ prev_reg = registered;
+ }
+
+error:
+ g_strfreev (priors);
+ g_free (contents);
+ return !failed;
+}
+
+/** @brief Set internal option of MobilenetSSD
+ * @param[in] param The option string.
+ */
+int
+MobilenetSSD::setOptionInternal (const char *param)
+{
+ gchar **options;
+ int noptions, idx;
+ int ret = 1;
+
+ options = g_strsplit (param, ":", -1);
+ noptions = g_strv_length (options);
+
+ if (noptions > (PARAMS_MAX + 1))
+ noptions = PARAMS_MAX + 1;
+
+ if (box_prior_path) {
+ g_free (box_prior_path);
+ box_prior_path = nullptr;
+ }
+
+ box_prior_path = g_strdup (options[0]);
+
+ if (NULL != box_prior_path) {
+ ret = mobilenet_ssd_loadBoxPrior ();
+ if (ret == 0)
+ goto exit_mobilenet_ssd;
+ }
+
+ for (idx = 1; idx < noptions; idx++) {
+ if (strlen (options[idx]) == 0)
+ continue;
+ params[idx - 1] = strtod (options[idx], NULL);
+ }
+
+ sigmoid_threshold = logit (params[THRESHOLD_IDX]);
+
+ return TRUE;
+
+exit_mobilenet_ssd:
+ g_strfreev (options);
+ return ret;
+}
+
+/** @brief Check compatibility of given tensors config
+ * @param[in] config The tensors config to check compatibility
+ */
+int
+MobilenetSSD::checkCompatible (const GstTensorsConfig *config)
+{
+ const uint32_t *dim1, *dim2;
+ int i;
+ guint max_detection, max_label;
+
+ if (!check_tensors (config, MAX_TENSORS))
+ return FALSE;
+
+ /* Check if the first tensor is compatible */
+ dim1 = config->info.info[0].dimension;
+ g_return_val_if_fail (dim1[0] == BOX_SIZE, FALSE);
+ g_return_val_if_fail (dim1[1] == 1, FALSE);
+ max_detection = dim1[2];
+ g_return_val_if_fail (max_detection > 0, FALSE);
+
+ /** @todo unused dimension value should be 0 */
+ for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
+ g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
+
+ /* Check if the second tensor is compatible */
+ dim2 = config->info.info[1].dimension;
+
+ max_label = dim2[0];
+ g_return_val_if_fail (max_label <= total_labels, FALSE);
+ if (max_label < total_labels)
+ GST_WARNING ("The given tensor (2nd) has max_label (first dimension: %u) smaller than the number of labels in labels file (%u).",
+ max_label, total_labels);
+ g_return_val_if_fail (max_detection == dim2[1], FALSE);
+ for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
+ g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
+
+ /* Check consistency with max_detection */
+ if (this->max_detection == 0)
+ this->max_detection = max_detection;
+ else
+ g_return_val_if_fail (max_detection == this->max_detection, FALSE);
+
+ if (this->max_detection > DETECTION_MAX) {
+ GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+MobilenetSSD::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+ const GstTensorMemory *boxes, *detections = NULL;
+ GArray *results;
+ const guint num_tensors = config->info.num_tensors;
+
+ /**
+ * @todo 100 is a heuristic number of objects in a picture frame
+ * We may have better "heuristics" than this.
+ * For the sake of performance, don't make it too small.
+ */
+
+ /* Already checked with getOutCaps. Thus, this is an internal bug */
+ g_assert (num_tensors >= MAX_TENSORS);
+ results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), 100);
+
+ boxes = &input[0];
+ if (num_tensors >= MAX_TENSORS) /* lgtm[cpp/constant-comparison] */
+ detections = &input[1];
+
+ switch (config->info.info[0].type) {
+ _get_objects_mobilenet_ssd_ (uint8_t, _NNS_UINT8);
+ _get_objects_mobilenet_ssd_ (int8_t, _NNS_INT8);
+ _get_objects_mobilenet_ssd_ (uint16_t, _NNS_UINT16);
+ _get_objects_mobilenet_ssd_ (int16_t, _NNS_INT16);
+ _get_objects_mobilenet_ssd_ (uint32_t, _NNS_UINT32);
+ _get_objects_mobilenet_ssd_ (int32_t, _NNS_INT32);
+ _get_objects_mobilenet_ssd_ (uint64_t, _NNS_UINT64);
+ _get_objects_mobilenet_ssd_ (int64_t, _NNS_INT64);
+ _get_objects_mobilenet_ssd_ (float, _NNS_FLOAT32);
+ _get_objects_mobilenet_ssd_ (double, _NNS_FLOAT64);
+ default:
+ g_assert (0);
+ }
+ nms (results, params[IOU_THRESHOLD_IDX]);
+ return results;
+}
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file mobilenetssdpp.cc
+ * @date 13 May 2024
+ * @brief NNStreamer tensor-decoder bounding box properties
+ *
+ * @see https://github.com/nnstreamer/nnstreamer
+ * @author Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include <stdio.h>
+#include "../tensordec-boundingbox.h"
+
+#define BOX_SIZE (4)
+#define DETECTION_MAX (100)
+#define LOCATIONS_IDX (0)
+#define CLASSES_IDX (1)
+#define SCORES_IDX (2)
+#define NUM_IDX (3)
+
+#define LOCATIONS_DEFAULT (3)
+#define CLASSES_DEFAULT (1)
+#define SCORES_DEFAULT (2)
+#define NUM_DEFAULT (0)
+#define THRESHOLD_DEFAULT (G_MINFLOAT)
+
+/**
+ * @brief C++-Template-like box location calculation for Tensorflow SSD model
+ * @param[in] type The tensor type of inputptr
+ * @param[in] typename nnstreamer enum corresponding to the type
+ * @param[in] numinput Input Tensor Data (The number of detections)
+ * @param[in] classinput Input Tensor Data (Detected classes)
+ * @param[in] scoreinput Input Tensor Data (Detection scores)
+ * @param[in] boxesinput Input Tensor Data (Boxes)
+ * @param[in] config Tensor configs of the input tensors
+ * @param[out] results The object returned. (GArray with detectedObject)
+ */
+#define _get_objects_mobilenet_ssd_pp(_type, typename, numinput, classinput, \
+ scoreinput, boxesinput, config, results, i_width, i_height) \
+ case typename: \
+ { \
+ int d, num; \
+ size_t boxbpi; \
+ _type *num_detection_ = (_type *) numinput; \
+ _type *classes_ = (_type *) classinput; \
+ _type *scores_ = (_type *) scoreinput; \
+ _type *boxes_ = (_type *) boxesinput; \
+ int locations_idx \
+ = get_mobilenet_ssd_pp_tensor_idx (MOBILENET_SSD_PP_BBOX_IDX_LOCATIONS); \
+ num = (int) num_detection_[0]; \
+ results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num); \
+ boxbpi = config->info.info[locations_idx].dimension[0]; \
+ for (d = 0; d < num; d++) { \
+ _type x1, x2, y1, y2; \
+ detectedObject object; \
+ if (scores_[d] < threshold) \
+ continue; \
+ object.valid = TRUE; \
+ object.class_id = (int) classes_[d]; \
+ x1 = MIN (MAX (boxes_[d * boxbpi + 1], 0), 1); \
+ y1 = MIN (MAX (boxes_[d * boxbpi], 0), 1); \
+ x2 = MIN (MAX (boxes_[d * boxbpi + 3], 0), 1); \
+ y2 = MIN (MAX (boxes_[d * boxbpi + 2], 0), 1); \
+ object.x = (int) (x1 * i_width); \
+ object.y = (int) (y1 * i_height); \
+ object.width = (int) ((x2 - x1) * i_width); \
+ object.height = (int) ((y2 - y1) * i_height); \
+ object.prob = scores_[d]; \
+ g_array_append_val (results, object); \
+ } \
+ } \
+ break
+
+/** @brief Macro to simplify calling _get_objects_mobilenet_ssd_pp */
+#define _get_objects_mobilenet_ssd_pp_(type, typename) \
+ _get_objects_mobilenet_ssd_pp (type, typename, (mem_num->data), (mem_classes->data), \
+ (mem_scores->data), (mem_boxes->data), config, results, i_width, i_height)
+
+/**
+ * @brief MOBILENET SSD PostProcess Output tensor feature mapping.
+ */
+typedef enum {
+ MOBILENET_SSD_PP_BBOX_IDX_LOCATIONS = 0,
+ MOBILENET_SSD_PP_BBOX_IDX_CLASSES = 1,
+ MOBILENET_SSD_PP_BBOX_IDX_SCORES = 2,
+ MOBILENET_SSD_PP_BBOX_IDX_NUM = 3,
+ MOBILENET_SSD_PP_BBOX_IDX_UNKNOWN
+} mobilenet_ssd_pp_bbox_idx_t;
+
+MobilenetSSDPP::MobilenetSSDPP ()
+{
+ tensor_mapping[LOCATIONS_IDX] = LOCATIONS_DEFAULT;
+ tensor_mapping[CLASSES_IDX] = CLASSES_DEFAULT;
+ tensor_mapping[SCORES_IDX] = SCORES_DEFAULT;
+ tensor_mapping[NUM_IDX] = NUM_DEFAULT;
+ threshold = THRESHOLD_DEFAULT;
+}
+
+/** @brief Helper to retrieve tensor index by feature */
+int
+MobilenetSSDPP::get_mobilenet_ssd_pp_tensor_idx (int idx)
+{
+ return tensor_mapping[idx];
+}
+
+/** @brief Set internal option of MobilenetSSDPP
+ * @param[in] param The option string.
+ */
+int
+MobilenetSSDPP::setOptionInternal (const char *param)
+{
+ int threshold_percent;
+ int ret = sscanf (param, "%i:%i:%i:%i,%i", &tensor_mapping[LOCATIONS_IDX],
+ &tensor_mapping[CLASSES_IDX], &tensor_mapping[SCORES_IDX],
+ &tensor_mapping[NUM_IDX], &threshold_percent);
+
+ if ((ret == EOF) || (ret < 5)) {
+ GST_ERROR ("Invalid options, must be \"locations idx:classes idx:scores idx:num idx,threshold\"");
+ return FALSE;
+ }
+
+ GST_INFO ("MOBILENET SSD POST PROCESS output tensors mapping: "
+ "locations idx (%d), classes idx (%d), scores idx (%d), num detections idx (%d)",
+ tensor_mapping[LOCATIONS_IDX], tensor_mapping[CLASSES_IDX],
+ tensor_mapping[SCORES_IDX], tensor_mapping[NUM_IDX]);
+
+ if ((threshold_percent > 100) || (threshold_percent < 0)) {
+ GST_ERROR ("Invalid MOBILENET SSD POST PROCESS threshold detection (%i), must be in range [0 100]",
+ threshold_percent);
+ } else {
+ threshold = threshold_percent / 100.0;
+ }
+
+ GST_INFO ("MOBILENET SSD POST PROCESS object detection threshold: %.2f", threshold);
+
+ return TRUE;
+}
+
+/** @brief Check compatibility of given tensors config */
+int
+MobilenetSSDPP::checkCompatible (const GstTensorsConfig *config)
+{
+ const uint32_t *dim1, *dim2, *dim3, *dim4;
+ int locations_idx, classes_idx, scores_idx, num_idx, i;
+
+ if (!check_tensors (config, MAX_TENSORS))
+ return FALSE;
+
+ locations_idx = get_mobilenet_ssd_pp_tensor_idx (LOCATIONS_IDX);
+ classes_idx = get_mobilenet_ssd_pp_tensor_idx (CLASSES_IDX);
+ scores_idx = get_mobilenet_ssd_pp_tensor_idx (SCORES_IDX);
+ num_idx = get_mobilenet_ssd_pp_tensor_idx (NUM_IDX);
+
+ /* Check if the number of detections tensor is compatible */
+ dim1 = config->info.info[num_idx].dimension;
+ g_return_val_if_fail (dim1[0] == 1, FALSE);
+ for (i = 1; i < NNS_TENSOR_RANK_LIMIT; ++i)
+ g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
+
+ /* Check if the classes & scores tensors are compatible */
+ dim2 = config->info.info[classes_idx].dimension;
+ dim3 = config->info.info[scores_idx].dimension;
+ g_return_val_if_fail (dim3[0] == dim2[0], FALSE);
+ max_detection = dim2[0];
+ for (i = 1; i < NNS_TENSOR_RANK_LIMIT; ++i) {
+ g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
+ g_return_val_if_fail (dim3[i] == 0 || dim3[i] == 1, FALSE);
+ }
+
+ /* Check if the bbox locations tensor is compatible */
+ dim4 = config->info.info[locations_idx].dimension;
+ g_return_val_if_fail (BOX_SIZE == dim4[0], FALSE);
+ g_return_val_if_fail (max_detection == dim4[1], FALSE);
+ for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
+ g_return_val_if_fail (dim4[i] == 0 || dim4[i] == 1, FALSE);
+
+ /* Check consistency with max_detection */
+ if (this->max_detection == 0)
+ this->max_detection = max_detection;
+ else
+ g_return_val_if_fail (max_detection == this->max_detection, FALSE);
+
+ if (this->max_detection > DETECTION_MAX) {
+ GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+MobilenetSSDPP::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+
+ const GstTensorMemory *mem_num, *mem_classes, *mem_scores, *mem_boxes;
+ int locations_idx, classes_idx, scores_idx, num_idx;
+ GArray *results = NULL;
+ const guint num_tensors = config->info.num_tensors;
+
+ /* Already checked with getOutCaps. Thus, this is an internal bug */
+ g_assert (num_tensors >= MAX_TENSORS);
+
+ locations_idx = get_mobilenet_ssd_pp_tensor_idx (LOCATIONS_IDX);
+ classes_idx = get_mobilenet_ssd_pp_tensor_idx (CLASSES_IDX);
+ scores_idx = get_mobilenet_ssd_pp_tensor_idx (SCORES_IDX);
+ num_idx = get_mobilenet_ssd_pp_tensor_idx (NUM_IDX);
+
+ mem_num = &input[num_idx];
+ mem_classes = &input[classes_idx];
+ mem_scores = &input[scores_idx];
+ mem_boxes = &input[locations_idx];
+
+ switch (config->info.info[num_idx].type) {
+ _get_objects_mobilenet_ssd_pp_ (uint8_t, _NNS_UINT8);
+ _get_objects_mobilenet_ssd_pp_ (int8_t, _NNS_INT8);
+ _get_objects_mobilenet_ssd_pp_ (uint16_t, _NNS_UINT16);
+ _get_objects_mobilenet_ssd_pp_ (int16_t, _NNS_INT16);
+ _get_objects_mobilenet_ssd_pp_ (uint32_t, _NNS_UINT32);
+ _get_objects_mobilenet_ssd_pp_ (int32_t, _NNS_INT32);
+ _get_objects_mobilenet_ssd_pp_ (uint64_t, _NNS_UINT64);
+ _get_objects_mobilenet_ssd_pp_ (int64_t, _NNS_INT64);
+ _get_objects_mobilenet_ssd_pp_ (float, _NNS_FLOAT32);
+ _get_objects_mobilenet_ssd_pp_ (double, _NNS_FLOAT64);
+ default:
+ g_assert (0);
+ }
+ return results;
+}
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file mppalmdetection.cc
+ * @date 13 May 2024
+ * @brief NNStreamer tensor-decoder bounding box properties
+ *
+ * @see https://github.com/nnstreamer/nnstreamer
+ * @author Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include "../tensordec-boundingbox.h"
+
+#define INFO_SIZE (18)
+#define MAX_TENSORS (2U)
+#define MAX_DETECTION (2016)
+
+#define NUM_LAYERS_DEFAULT (4)
+#define MIN_SCALE_DEFAULT (1.0)
+#define MAX_SCALE_DEFAULT (1.0)
+#define OFFSET_X_DEFAULT (0.5)
+#define OFFSET_Y_DEFAULT (0.5)
+#define STRIDE_0_DEFAULT (8)
+#define STRIDE_1_DEFAULT (16)
+#define STRIDE_2_DEFAULT (16)
+#define STRIDE_3_DEFAULT (16)
+#define MIN_SCORE_THRESHOLD_DEFAULT (0.5)
+
+#define PARAMS_STRIDE_SIZE (8)
+
+/**
+ * @brief C++-Template-like box location calculation for Tensorflow model
+ * @param[in] type The tensor type of inputptr
+ * @param[in] typename nnstreamer enum corresponding to the type
+ * @param[in] scoreinput Input Tensor Data (Detection scores)
+ * @param[in] boxesinput Input Tensor Data (Boxes)
+ * @param[in] config Tensor configs of the input tensors
+ * @param[out] results The object returned. (GArray with detectedObject)
+ */
+#define _get_objects_mp_palm_detection(_type, typename, scoreinput, boxesinput, config) \
+ case typename: \
+ { \
+ int d_; \
+ _type *scores_ = (_type *) scoreinput; \
+ _type *boxes_ = (_type *) boxesinput; \
+ int num_ = max_detection; \
+ size_t boxbpi_ = config->info.info[0].dimension[0]; \
+ results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num_); \
+ for (d_ = 0; d_ < num_; d_++) { \
+ gfloat y_center, x_center, h, w; \
+ gfloat ymin, xmin; \
+ int y, x, width, height; \
+ detectedObject object; \
+ gfloat score = (gfloat) scores_[d_]; \
+ _type *box = boxes_ + boxbpi_ * d_; \
+ anchor *a = &g_array_index (this->anchors, anchor, d_); \
+ score = MAX (score, -100.0f); \
+ score = MIN (score, 100.0f); \
+ score = 1.0f / (1.0f + exp (-score)); \
+ if (score < min_score_threshold) \
+ continue; \
+ y_center = (box[0] * 1.f) / i_height * a->h + a->y_center; \
+ x_center = (box[1] * 1.f) / i_width * a->w + a->x_center; \
+ h = (box[2] * 1.f) / i_height * a->h; \
+ w = (box[3] * 1.f) / i_width * a->w; \
+ ymin = y_center - h / 2.f; \
+ xmin = x_center - w / 2.f; \
+ y = ymin * i_height; \
+ x = xmin * i_width; \
+ width = w * i_width; \
+ height = h * i_height; \
+ object.class_id = 0; \
+ object.x = MAX (0, x); \
+ object.y = MAX (0, y); \
+ object.width = width; \
+ object.height = height; \
+ object.prob = score; \
+ object.valid = TRUE; \
+ g_array_append_val (results, object); \
+ } \
+ } \
+ break
+
+/** @brief Macro to simplify calling _get_objects_mp_palm_detection */
+#define _get_objects_mp_palm_detection_(type, typename) \
+ _get_objects_mp_palm_detection (type, typename, (detections->data), (boxes->data), config)
+
+#define mp_palm_detection_option(option, type, idx) \
+ if (noptions > idx) \
+ option = (type) g_strtod (options[idx], NULL)
+
+/**
+ * @brief Calculate anchor scale
+ */
+static gfloat
+_calculate_scale (float min_scale, float max_scale, int stride_index, int num_strides)
+{
+ if (num_strides == 1) {
+ return (min_scale + max_scale) * 0.5f;
+ } else {
+ return min_scale + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
+ }
+}
+
+/**
+ * @brief Generate anchor information
+ */
+void
+MpPalmDetection::mp_palm_detection_generate_anchors ()
+{
+ int layer_id = 0;
+ guint i;
+
+ while (layer_id < num_layers) {
+ GArray *aspect_ratios = g_array_new (FALSE, TRUE, sizeof (gfloat));
+ GArray *scales = g_array_new (FALSE, TRUE, sizeof (gfloat));
+ GArray *anchor_height = g_array_new (FALSE, TRUE, sizeof (gfloat));
+ GArray *anchor_width = g_array_new (FALSE, TRUE, sizeof (gfloat));
+
+ int last_same_stride_layer = layer_id;
+
+ while (last_same_stride_layer < num_layers
+ && strides[last_same_stride_layer] == strides[layer_id]) {
+ gfloat scale;
+ gfloat ratio = 1.0f;
+ g_array_append_val (aspect_ratios, ratio);
+ g_array_append_val (aspect_ratios, ratio);
+ scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer, num_layers);
+ g_array_append_val (scales, scale);
+ scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer + 1, num_layers);
+ g_array_append_val (scales, scale);
+ last_same_stride_layer++;
+ }
+
+ for (i = 0; i < aspect_ratios->len; ++i) {
+ const float ratio_sqrts = sqrt (g_array_index (aspect_ratios, gfloat, i));
+ const gfloat sc = g_array_index (scales, gfloat, i);
+ gfloat anchor_height_ = sc / ratio_sqrts;
+ gfloat anchor_width_ = sc * ratio_sqrts;
+ g_array_append_val (anchor_height, anchor_height_);
+ g_array_append_val (anchor_width, anchor_width_);
+ }
+
+ {
+ int feature_map_height = 0;
+ int feature_map_width = 0;
+ int x, y;
+ int anchor_id;
+
+ const int stride = strides[layer_id];
+ feature_map_height = ceil (1.0f * 192 / stride);
+ feature_map_width = ceil (1.0f * 192 / stride);
+ for (y = 0; y < feature_map_height; ++y) {
+ for (x = 0; x < feature_map_width; ++x) {
+ for (anchor_id = 0; anchor_id < (int) aspect_ratios->len; ++anchor_id) {
+ const float x_center = (x + offset_x) * 1.0f / feature_map_width;
+ const float y_center = (y + offset_y) * 1.0f / feature_map_height;
+
+ const anchor a = { .x_center = x_center,
+ .y_center = y_center,
+ .w = g_array_index (anchor_width, gfloat, anchor_id),
+ .h = g_array_index (anchor_height, gfloat, anchor_id) };
+ g_array_append_val (anchors, a);
+ }
+ }
+ }
+ layer_id = last_same_stride_layer;
+ }
+
+ g_array_free (anchor_height, TRUE);
+ g_array_free (anchor_width, TRUE);
+ g_array_free (aspect_ratios, TRUE);
+ g_array_free (scales, TRUE);
+ }
+}
+
+/** @brief Constructor of MpPalmDetection */
+MpPalmDetection::MpPalmDetection ()
+{
+ num_layers = NUM_LAYERS_DEFAULT;
+ min_scale = MIN_SCALE_DEFAULT;
+ max_scale = MAX_SCALE_DEFAULT;
+ offset_x = OFFSET_X_DEFAULT;
+ offset_y = OFFSET_Y_DEFAULT;
+ strides[0] = STRIDE_0_DEFAULT;
+ strides[1] = STRIDE_1_DEFAULT;
+ strides[2] = STRIDE_2_DEFAULT;
+ strides[3] = STRIDE_3_DEFAULT;
+ min_score_threshold = MIN_SCORE_THRESHOLD_DEFAULT;
+ anchors = g_array_new (FALSE, TRUE, sizeof (anchor));
+}
+
+/** @brief Destructor of MpPalmDetection */
+MpPalmDetection::~MpPalmDetection ()
+{
+ if (anchors)
+ g_array_free (anchors, TRUE);
+ anchors = NULL;
+}
+
+/** @brief Set internal option of MpPalmDetection
+ * @param[in] param The option string.
+ */
+int
+MpPalmDetection::setOptionInternal (const char *param)
+{
+ /* Load palm detection info from option3 */
+ gchar **options;
+ int noptions, idx;
+ int ret = TRUE;
+
+ options = g_strsplit (param, ":", -1);
+ noptions = g_strv_length (options);
+
+ if (noptions > PARAMS_MAX) {
+ GST_ERROR ("Invalid MP PALM DETECTION PARAM length: %d", noptions);
+ ret = FALSE;
+ goto exit_mp_palm_detection;
+ }
+
+ mp_palm_detection_option (min_score_threshold, gfloat, 0);
+ mp_palm_detection_option (num_layers, gint, 1);
+ mp_palm_detection_option (min_scale, gfloat, 2);
+ mp_palm_detection_option (max_scale, gfloat, 3);
+ mp_palm_detection_option (offset_x, gfloat, 4);
+ mp_palm_detection_option (offset_y, gfloat, 5);
+
+ for (idx = 6; idx < num_layers + 6; idx++) {
+ mp_palm_detection_option (strides[idx - 6], gint, idx);
+ }
+ mp_palm_detection_generate_anchors ();
+
+exit_mp_palm_detection:
+ g_strfreev (options);
+ return ret;
+}
+
+/** @brief Check compatibility of given tensors config */
+int
+MpPalmDetection::checkCompatible (const GstTensorsConfig *config)
+{
+ const uint32_t *dim1, *dim2;
+ int i;
+ if (!check_tensors (config, MAX_TENSORS))
+ return FALSE;
+
+ /* Check if the first tensor is compatible */
+ dim1 = config->info.info[0].dimension;
+
+ g_return_val_if_fail (dim1[0] == INFO_SIZE, FALSE);
+ max_detection = dim1[1];
+ g_return_val_if_fail (max_detection > 0, FALSE);
+ g_return_val_if_fail (dim1[2] == 1, FALSE);
+ for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
+ g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
+
+ /* Check if the second tensor is compatible */
+ dim2 = config->info.info[1].dimension;
+ g_return_val_if_fail (dim2[0] == 1, FALSE);
+ g_return_val_if_fail (max_detection == dim2[1], FALSE);
+ for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
+ g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
+
+ /* Check consistency with max_detection */
+ if (this->max_detection == 0)
+ this->max_detection = max_detection;
+ else
+ g_return_val_if_fail (max_detection == this->max_detection, FALSE);
+
+ if (this->max_detection > MAX_DETECTION) {
+ GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+MpPalmDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+ GArray *results = NULL;
+ const GstTensorMemory *boxes = NULL;
+ const GstTensorMemory *detections = NULL;
+ const guint num_tensors = config->info.num_tensors;
+
+ /* Already checked with getOutCaps. Thus, this is an internal bug */
+ g_assert (num_tensors >= MAX_TENSORS);
+
+ /* results will be allocated by _get_objects_mp_palm_detection_ */
+ boxes = &input[0];
+ detections = &input[1];
+ switch (config->info.info[0].type) {
+ _get_objects_mp_palm_detection_ (uint8_t, _NNS_UINT8);
+ _get_objects_mp_palm_detection_ (int8_t, _NNS_INT8);
+ _get_objects_mp_palm_detection_ (uint16_t, _NNS_UINT16);
+ _get_objects_mp_palm_detection_ (int16_t, _NNS_INT16);
+ _get_objects_mp_palm_detection_ (uint32_t, _NNS_UINT32);
+ _get_objects_mp_palm_detection_ (int32_t, _NNS_INT32);
+ _get_objects_mp_palm_detection_ (uint64_t, _NNS_UINT64);
+ _get_objects_mp_palm_detection_ (int64_t, _NNS_INT64);
+ _get_objects_mp_palm_detection_ (float, _NNS_FLOAT32);
+ _get_objects_mp_palm_detection_ (double, _NNS_FLOAT64);
+
+ default:
+ g_assert (0);
+ }
+ nms (results, 0.05f);
+ return results;
+}
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file ovdetection.cc
+ * @date 13 May 2024
+ * @brief NNStreamer tensor-decoder bounding box properties
+ *
+ * @see https://github.com/nnstreamer/nnstreamer
+ * @author Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include "../tensordec-boundingbox.h"
+
+#define OV_PERSON_DETECTION_CONF_THRESHOLD (0.8)
+/**
+ * @brief C++-Template-like box location calculation for OpenVino Person Detection Model
+ * @param[in] type The tensor type of inputptr
+ * @param[in] intputptr Input tensor Data
+ * @param[in] typename nnstreamer enum corresponding to the type
+ * @param[out] results The object returned. (GArray with detectedObject)
+ */
+#define _get_persons_ov(type, inputptr, typename, results) \
+ case typename: \
+ { \
+ detectedObject object = { \
+ .valid = FALSE, .class_id = 0, .x = 0, .y = 0, .width = 0, .height = 0, .prob = .0, .tracking_id = 0 \
+ }; \
+ type *typed_inputptr = (type *) inputptr; \
+ guint d; \
+ \
+ for (d = 1; d <= DETECTION_MAX; ++d) { \
+ struct { \
+ type image_id; \
+ type label; \
+ type conf; \
+ type x_min; \
+ type y_min; \
+ type x_max; \
+ type y_max; \
+ } desc; \
+ \
+ memcpy (&desc, typed_inputptr, sizeof (desc)); \
+ typed_inputptr += (sizeof (desc) / sizeof (type)); \
+ object.valid = FALSE; \
+ \
+ if ((int) desc.image_id < 0) { \
+ max_detection = (d - 1); \
+ break; \
+ } \
+ object.class_id = -1; \
+ object.x = (int) (desc.x_min * (type) i_width); \
+ object.y = (int) (desc.y_min * (type) i_height); \
+ object.width = (int) ((desc.x_max - desc.x_min) * (type) i_width); \
+ object.height = (int) ((desc.y_max - desc.y_min) * (type) i_height); \
+ if (desc.conf < OV_PERSON_DETECTION_CONF_THRESHOLD) \
+ continue; \
+ object.prob = 1; \
+ object.valid = TRUE; \
+ g_array_append_val (results, object); \
+ } \
+ } \
+ break
+
+/** @brief Check compatibility of given tensors config */
+int
+OVDetection::checkCompatible (const GstTensorsConfig *config)
+{
+ const guint *dim;
+ int i;
+ UNUSED (total_labels);
+
+ if (!check_tensors (config, DEFAULT_MAX_TENSORS))
+ return FALSE;
+
+ /**
+ * The shape of the ouput tensor is [7, N, 1, 1], where N is the maximum
+ * number (i.e., 200) of detected bounding boxes.
+ */
+ dim = config->info.info[0].dimension;
+ g_return_val_if_fail (dim[0] == DEFAULT_SIZE_DETECTION_DESC, FALSE);
+ g_return_val_if_fail (dim[1] == DETECTION_MAX, FALSE);
+ for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
+ g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
+
+ return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+OVDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+ GArray *results = NULL;
+ const guint num_tensors = config->info.num_tensors;
+
+ /* Already checked with getOutCaps. Thus, this is an internal bug */
+ g_assert (num_tensors >= DEFAULT_MAX_TENSORS);
+
+ results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), DETECTION_MAX);
+ switch (config->info.info[0].type) {
+ _get_persons_ov (uint8_t, input[0].data, _NNS_UINT8, results);
+ _get_persons_ov (int8_t, input[0].data, _NNS_INT8, results);
+ _get_persons_ov (uint16_t, input[0].data, _NNS_UINT16, results);
+ _get_persons_ov (int16_t, input[0].data, _NNS_INT16, results);
+ _get_persons_ov (uint32_t, input[0].data, _NNS_UINT32, results);
+ _get_persons_ov (int32_t, input[0].data, _NNS_INT32, results);
+ _get_persons_ov (uint64_t, input[0].data, _NNS_UINT64, results);
+ _get_persons_ov (int64_t, input[0].data, _NNS_INT64, results);
+ _get_persons_ov (float, input[0].data, _NNS_FLOAT32, results);
+ _get_persons_ov (double, input[0].data, _NNS_FLOAT64, results);
+ default:
+ g_assert (0);
+ }
+ return results;
+}
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file yolo.cc
+ * @date 13 May 2024
+ * @brief NNStreamer tensor-decoder bounding box properties
+ *
+ * @see https://github.com/nnstreamer/nnstreamer
+ * @author Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include <nnstreamer_plugin_api_util.h>
+#include "../tensordec-boundingbox.h"
+
+#define YOLO_DETECTION_CONF_THRESHOLD (0.25)
+#define YOLO_DETECTION_IOU_THRESHOLD (0.45)
+#define DEFAULT_DETECTION_NUM_INFO_YOLO5 (5)
+#define DEFAULT_DETECTION_NUM_INFO_YOLO8 (4)
+
+/** @brief Constructor of YoloV5 */
+YoloV5::YoloV5 ()
+{
+ scaled_output = 0;
+ conf_threshold = YOLO_DETECTION_CONF_THRESHOLD;
+ iou_threshold = YOLO_DETECTION_IOU_THRESHOLD;
+}
+
+/** @brief Set internal option of YoloV5
+ * @param[in] param The option string.
+ */
+int
+YoloV5::setOptionInternal (const char *param)
+{
+ gchar **options;
+ int noptions;
+
+ options = g_strsplit (param, ":", -1);
+ noptions = g_strv_length (options);
+ if (noptions > 0)
+ scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
+ if (noptions > 1)
+ conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
+ if (noptions > 2)
+ iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
+
+ nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
+ scaled_output, conf_threshold, iou_threshold);
+
+ g_strfreev (options);
+ return TRUE;
+}
+
+/** @brief Check compatibility of given tensors config
+ * @param[in] param The option string.
+ */
+int
+YoloV5::checkCompatible (const GstTensorsConfig *config)
+{
+ const guint *dim = config->info.info[0].dimension;
+ int i;
+
+ if (!check_tensors (config, 1U))
+ return FALSE;
+
+ max_detection = ((i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
+ + (i_width / 8) * (i_height / 8))
+ * 3;
+
+ g_return_val_if_fail (dim[0] == (total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO5), FALSE);
+ g_return_val_if_fail (dim[1] == max_detection, FALSE);
+ for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
+ g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
+ return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+YoloV5::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+ GArray *results = NULL;
+
+ int bIdx, numTotalBox;
+ int cIdx, numTotalClass, cStartIdx, cIdxMax;
+ float *boxinput;
+ int is_output_scaled = scaled_output;
+
+ numTotalBox = max_detection;
+ numTotalClass = total_labels;
+ cStartIdx = DEFAULT_DETECTION_NUM_INFO_YOLO5;
+ cIdxMax = numTotalClass + cStartIdx;
+
+ /* boxinput[numTotalBox][cIdxMax] */
+ boxinput = (float *) input[0].data;
+
+ /** Only support for float type model */
+ g_assert (config->info.info[0].type == _NNS_FLOAT32);
+
+ results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
+ for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
+ float maxClassConfVal = -INFINITY;
+ int maxClassIdx = -1;
+ for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
+ if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
+ maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
+ maxClassIdx = cIdx;
+ }
+ }
+
+ if (maxClassConfVal * boxinput[bIdx * cIdxMax + 4] > conf_threshold) {
+ detectedObject object;
+ float cx, cy, w, h;
+ cx = boxinput[bIdx * cIdxMax + 0];
+ cy = boxinput[bIdx * cIdxMax + 1];
+ w = boxinput[bIdx * cIdxMax + 2];
+ h = boxinput[bIdx * cIdxMax + 3];
+
+ if (!is_output_scaled) {
+ cx *= (float) i_width;
+ cy *= (float) i_height;
+ w *= (float) i_width;
+ h *= (float) i_height;
+ }
+
+ object.x = (int) (MAX (0.f, (cx - w / 2.f)));
+ object.y = (int) (MAX (0.f, (cy - h / 2.f)));
+ object.width = (int) (MIN ((float) i_width, w));
+ object.height = (int) (MIN ((float) i_height, h));
+
+ object.prob = maxClassConfVal * boxinput[bIdx * cIdxMax + 4];
+ object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO_YOLO5;
+ object.tracking_id = 0;
+ object.valid = TRUE;
+ g_array_append_val (results, object);
+ }
+ }
+
+ nms (results, iou_threshold);
+ return results;
+}
+
+/** @brief Constructor of YoloV8 */
+YoloV8::YoloV8 ()
+{
+ scaled_output = 0;
+ conf_threshold = YOLO_DETECTION_CONF_THRESHOLD;
+ iou_threshold = YOLO_DETECTION_IOU_THRESHOLD;
+}
+
+/** @brief Set internal option of YoloV8 */
+int
+YoloV8::setOptionInternal (const char *param)
+{
+ gchar **options;
+ int noptions;
+
+ options = g_strsplit (param, ":", -1);
+ noptions = g_strv_length (options);
+ if (noptions > 0)
+ scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
+ if (noptions > 1)
+ conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
+ if (noptions > 2)
+ iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
+
+ nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
+ scaled_output, conf_threshold, iou_threshold);
+
+ g_strfreev (options);
+ return TRUE;
+}
+
+/** @brief Check compatibility of given tensors config */
+int
+YoloV8::checkCompatible (const GstTensorsConfig *config)
+{
+ const guint *dim = config->info.info[0].dimension;
+ int i;
+ if (!check_tensors (config, 1U)) {
+ gchar *typestr = gst_tensors_info_to_string (&config->info);
+ nns_loge ("Yolov8 bounding-box decoder needs at least 1 valid tensor. The given input tensor is: %s.",
+ typestr);
+ g_free (typestr);
+ return FALSE;
+ }
+ /** Only support for float type model */
+ if (config->info.info[0].type != _NNS_FLOAT32) {
+ gchar *typestr = gst_tensors_info_to_string (&config->info);
+ nns_loge ("Yolov8 bounding-box decoder accepts float32 input tensors only. The given input tensor is: %s.",
+ typestr);
+ g_free (typestr);
+ return FALSE;
+ }
+
+ max_detection = (i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
+ + (i_width / 8) * (i_height / 8);
+
+ if (dim[0] != (total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO8) || dim[1] != max_detection) {
+ nns_loge ("yolov8 boundingbox decoder requires the input shape to be %d:%d:1. But given shape is %d:%d:1. `tensor_transform mode=transpose` would be helpful.",
+ total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO8, max_detection, dim[0], dim[1]);
+ return FALSE;
+ }
+
+ for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
+ if (dim[i] != 0 && dim[i] != 1) {
+ gchar *typestr = gst_tensors_info_to_string (&config->info);
+ nns_loge ("Yolov8 bounding-box decoder accepts RANK=2 tensors (3rd and later dimensions should be 1 or 0). The given input tensor is: %s.",
+ typestr);
+ g_free (typestr);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+YoloV8::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+ GArray *results = NULL;
+ int bIdx, numTotalBox;
+ int cIdx, numTotalClass, cStartIdx, cIdxMax;
+ float *boxinput;
+ int is_output_scaled = scaled_output;
+ UNUSED (config);
+
+ numTotalBox = max_detection;
+ numTotalClass = total_labels;
+ cStartIdx = DEFAULT_DETECTION_NUM_INFO_YOLO8;
+ cIdxMax = numTotalClass + cStartIdx;
+
+ /* boxinput[numTotalBox][cIdxMax] */
+ boxinput = (float *) input[0].data;
+
+ results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
+ for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
+ float maxClassConfVal = -INFINITY;
+ int maxClassIdx = -1;
+ for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
+ if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
+ maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
+ maxClassIdx = cIdx;
+ }
+ }
+
+ if (maxClassConfVal > conf_threshold) {
+ detectedObject object;
+ float cx, cy, w, h;
+ cx = boxinput[bIdx * cIdxMax + 0];
+ cy = boxinput[bIdx * cIdxMax + 1];
+ w = boxinput[bIdx * cIdxMax + 2];
+ h = boxinput[bIdx * cIdxMax + 3];
+
+ if (!is_output_scaled) {
+ cx *= (float) i_width;
+ cy *= (float) i_height;
+ w *= (float) i_width;
+ h *= (float) i_height;
+ }
+
+ object.x = (int) (MAX (0.f, (cx - w / 2.f)));
+ object.y = (int) (MAX (0.f, (cy - h / 2.f)));
+ object.width = (int) (MIN ((float) i_width, w));
+ object.height = (int) (MIN ((float) i_height, h));
+
+ object.prob = maxClassConfVal;
+ object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO_YOLO8;
+ object.tracking_id = 0;
+ object.valid = TRUE;
+ g_array_append_val (results, object);
+ }
+ }
+
+ nms (results, iou_threshold);
+ return results;
+}
)
# bounding boxes
-decoder_sub_bounding_boxes_sources = [
+decoder_sub_bounding_boxes_sources = files(
'tensordec-boundingbox.cc',
'tensordecutil.c',
'tensordec-font.c'
-]
+)
+subdir('box_properties')
shared_library('nnstreamer_decoder_bounding_boxes',
decoder_sub_bounding_boxes_sources,
dependencies: [nnstreamer_dep, glib_dep, gst_dep, libm_dep],
* @see https://github.com/nnstreamer/nnstreamer
* @author MyungJoo Ham <myungjoo.ham@samsung.com>
* @bug No known bugs except for NYI items
- *
- * option1: Decoder mode of bounding box.
- * Available: yolov5
- * mobilenet-ssd (single shot multibox detector with priors.)
- * mobilenet-ssd-postprocess
- * ov-person-detection
- * tf-ssd (deprecated, recommend to use mobilenet-ssd-postprocess)
- * tflite-ssd (deprecated, recommend to use mobilenet-ssd)
- * option2: Location of label file
- * This is independent from option1
- * option3: Any option1-dependent values
- * !!This depends on option1 values!!
- * for yolov5 and yolov8 mode:
- * The option3 requires up to 3 numbers, which tell
- * - whether the output values are scaled or not
- * 0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)
- * - the threshold of confidence (optional, default set to 0.25)
- * - the threshold of IOU (optional, default set to 0.45)
- * An example of option3 is "option3=0:0.65:0.6"
- * for mobilenet-ssd mode:
- * The option3 definition scheme is, in order, the following:
- * - box priors location file (mandatory)
- * - Detection threshold (optional, default set to 0.5)
- * - Y box scale (optional, default set to 10.0)
- * - X box scale (optional, default set to 10.0)
- * - h box scale (optional, default set to 5.0)
- * - w box scale (optional, default set to 5.0)
- * - IOU box valid threshold (optional, default set to 0.5)
- * The default parameters value could be set in the following ways:
- * option3=box-priors.txt:0.5:10.0:10.0:5.0:5.0:0.5
- * option3=box-priors.txt
- * option3=box-priors.txt::::::
- *
- * It's possible to set only few values, using the default values for
- * those not specified through the command line.
- * You could specify respectively the detection and IOU thresholds to 0.65
- * and 0.6 with the option3 parameter as follow:
- * option3=box-priors.txt:0.65:::::0.6
- * for mobilenet-ssd-postprocess mode:
- * The option3 is required to have 5 integer numbers, which tell
- * the tensor-dec how to interpret the given tensor inputs.
- * The first 4 numbers separated by colon, ':', designate which
- * are location:class:score:number of the tensors.
- * The last number separated by comma, ',' from the first 4 numbers
- * designate the threshold in percent.
- * In other words, "option3=%i:%i:%i:%i,%i".
- * for mp-palm-detection mode:
- * The option3 is required to have 5 float numbers, as following
- * - box score threshold (mandatory)
- * - number of layers for anchor generation (optional, default set to 4)
- * - minimum scale factor for anchor generation (optional, default set to 1.0)
- * - maximum scale factor for anchor generation (optional, default set to 1.0)
- * - X offset (optional, default set to 0.5)
- * - Y offset (optional, default set to 0.5)
- * - strides for each layer for anchor generation (optional, default set to 8:16:16:16)
- * The default parameter value could be set in the following ways:
- * option3=0.5
- * option3=0.5:4:0.2:0.8
- * option3=0.5:4:1.0:1.0:0.5:0.5:8:16:16:16
- *
- * option4: Video Output Dimension (WIDTH:HEIGHT)
- * This is independent from option1
- * option5: Input Dimension (WIDTH:HEIGHT)
- * This is independent from option1
- * option6: Whether to track result bounding boxes or not
- * 0 (default, do not track)
- * 1 (track result bounding boxes, with naive centroid based algorithm)
- * option7: Whether to log the result bounding boxes or not
- * 0 (default, do not log)
- * 1 (log result bounding boxes)
- * option8: Box Style (NYI)
- *
- * MAJOR TODO: Support other colorspaces natively from _decode for performance gain
- * (e.g., BGRA, ARGB, ...)
- *
*/
/** @todo _GNU_SOURCE fix build warning expf (nested-externs). remove this later. */
#define _GNU_SOURCE
#endif
#include <glib.h>
-#include <gst/gst.h>
-#include <nnstreamer_log.h>
+
#include <nnstreamer_plugin_api.h>
#include <nnstreamer_plugin_api_decoder.h>
-#include <nnstreamer_util.h>
+
#include <stdint.h>
-#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "tensordec-boundingbox.h"
*/
static singleLineSprite_t singleLineSprite;
+/**
+ * @brief List of bounding-box decoding schemes in string
+ */
+static const char *bb_modes[] = {
+ [MOBILENET_SSD_BOUNDING_BOX] = "mobilenet-ssd",
+ [MOBILENET_SSD_PP_BOUNDING_BOX] = "mobilenet-ssd-postprocess",
+ [OV_PERSON_DETECTION_BOUNDING_BOX] = "ov-person-detection",
+ [OV_FACE_DETECTION_BOUNDING_BOX] = "ov-face-detection",
+ [OLDNAME_MOBILENET_SSD_BOUNDING_BOX] = "tflite-ssd",
+ [OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX] = "tf-ssd",
+ [YOLOV5_BOUNDING_BOX] = "yolov5",
+ [MP_PALM_DETECTION_BOUNDING_BOX] = "mp-palm-detection",
+ [YOLOV8_BOUNDING_BOX] = "yolov8",
+ NULL,
+};
+
/** @brief tensordec-plugin's GstTensorDecoderDef callback */
static int
bb_init (void **pdata)
nnstreamer_decoder_exit (boundingBox.modename);
}
+/** @brief Compare function for sorting distances. */
+static int
+distance_compare (const void *a, const void *b)
+{
+ const distanceArrayData *da = (const distanceArrayData *) a;
+ const distanceArrayData *db = (const distanceArrayData *) b;
+
+ if (da->distance < db->distance)
+ return -1;
+ if (da->distance > db->distance)
+ return 1;
+ return 0;
+}
+
+/**
+ * @brief Compare Function for g_array_sort with detectedObject.
+ */
+static gint
+compare_detection (gconstpointer _a, gconstpointer _b)
+{
+ const detectedObject *a = static_cast<const detectedObject *> (_a);
+ const detectedObject *b = static_cast<const detectedObject *> (_b);
+
+ /* Larger comes first */
+ return (a->prob > b->prob) ? -1 : ((a->prob == b->prob) ? 0 : 1);
+}
+
+/**
+ * @brief Calculate the intersected surface
+ */
+static gfloat
+iou (detectedObject *a, detectedObject *b)
+{
+ int x1 = MAX (a->x, b->x);
+ int y1 = MAX (a->y, b->y);
+ int x2 = MIN (a->x + a->width, b->x + b->width);
+ int y2 = MIN (a->y + a->height, b->y + b->height);
+ int w = MAX (0, (x2 - x1 + 1));
+ int h = MAX (0, (y2 - y1 + 1));
+ float inter = w * h;
+ float areaA = a->width * a->height;
+ float areaB = b->width * b->height;
+ float o = inter / (areaA + areaB - inter);
+ return (o >= 0) ? o : 0;
+}
+
+/**
+ * @brief Apply NMS to the given results (objects[DETECTION_MAX])
+ */
+void
+nms (GArray *results, gfloat threshold)
+{
+ guint boxes_size;
+ guint i, j;
+
+ boxes_size = results->len;
+ if (boxes_size == 0U)
+ return;
+
+ g_array_sort (results, compare_detection);
+
+ for (i = 0; i < boxes_size; i++) {
+ detectedObject *a = &g_array_index (results, detectedObject, i);
+ if (a->valid == TRUE) {
+ for (j = i + 1; j < boxes_size; j++) {
+ detectedObject *b = &g_array_index (results, detectedObject, j);
+ if (b->valid == TRUE) {
+ if (iou (a, b) > threshold) {
+ b->valid = FALSE;
+ }
+ }
+ }
+ }
+ }
+
+ i = 0;
+ do {
+ detectedObject *a = &g_array_index (results, detectedObject, i);
+ if (a->valid == FALSE)
+ g_array_remove_index (results, i);
+ else
+ i++;
+ } while (i < results->len);
+}
+
/**
* @brief check the num_tensors is valid
*/
-static int
-_check_tensors (const GstTensorsConfig *config, const unsigned int limit)
+int
+check_tensors (const GstTensorsConfig *config, const unsigned int limit)
{
unsigned int i;
g_return_val_if_fail (config != NULL, FALSE);
return TRUE;
}
-/** @brief Compare function for sorting distances. */
-static int
-distance_compare (const void *a, const void *b)
-{
- const distanceArrayData *da = (const distanceArrayData *) a;
- const distanceArrayData *db = (const distanceArrayData *) b;
-
- if (da->distance < db->distance)
- return -1;
- if (da->distance > db->distance)
- return 1;
- return 0;
-}
-
+/** @brief Constructor of BoundingBox */
BoundingBox::BoundingBox ()
{
mode = BOUNDING_BOX_UNKNOWN;
bdata = nullptr;
}
+/** @brief destructor of BoundingBox */
BoundingBox::~BoundingBox ()
{
_free_labels (&labeldata);
}
/**
- * @brief Compare Function for g_array_sort with detectedObject.
- */
-static gint
-compare_detection (gconstpointer _a, gconstpointer _b)
-{
- const detectedObject *a = static_cast<const detectedObject *> (_a);
- const detectedObject *b = static_cast<const detectedObject *> (_b);
-
- /* Larger comes first */
- return (a->prob > b->prob) ? -1 : ((a->prob == b->prob) ? 0 : 1);
-}
-
-/**
- * @brief Calculate the intersected surface
- */
-static gfloat
-iou (detectedObject *a, detectedObject *b)
-{
- int x1 = MAX (a->x, b->x);
- int y1 = MAX (a->y, b->y);
- int x2 = MIN (a->x + a->width, b->x + b->width);
- int y2 = MIN (a->y + a->height, b->y + b->height);
- int w = MAX (0, (x2 - x1 + 1));
- int h = MAX (0, (y2 - y1 + 1));
- float inter = w * h;
- float areaA = a->width * a->height;
- float areaB = b->width * b->height;
- float o = inter / (areaA + areaB - inter);
- return (o >= 0) ? o : 0;
-}
-
-/**
- * @brief Apply NMS to the given results (objects[DETECTION_MAX])
- * @param[in/out] results The results to be filtered with nms
- */
-static void
-nms (GArray *results, gfloat threshold)
-{
- guint boxes_size;
- guint i, j;
-
- boxes_size = results->len;
- if (boxes_size == 0U)
- return;
-
- g_array_sort (results, compare_detection);
-
- for (i = 0; i < boxes_size; i++) {
- detectedObject *a = &g_array_index (results, detectedObject, i);
- if (a->valid == TRUE) {
- for (j = i + 1; j < boxes_size; j++) {
- detectedObject *b = &g_array_index (results, detectedObject, j);
- if (b->valid == TRUE) {
- if (iou (a, b) > threshold) {
- b->valid = FALSE;
- }
- }
- }
- }
- }
-
- i = 0;
- do {
- detectedObject *a = &g_array_index (results, detectedObject, i);
- if (a->valid == FALSE)
- g_array_remove_index (results, i);
- else
- i++;
- } while (i < results->len);
-}
-
-/**
* @brief Draw with the given results (objects[DETECTION_MAX]) to the output buffer
* @param[out] out_info The output buffer (RGBA plain)
* @param[in] bdata The bounding-box internal data.
}
/**
- * @brief check the label relevant properties are valid
+ * @brief Check the label relevant properties are valid
*/
gboolean
BoundingBox::checkLabelProps ()
return TRUE;
}
+/**
+ * @brief Set mode of bounding box
+ */
int
BoundingBox::setBoxDecodingMode (const char *param)
{
return TRUE;
}
+/**
+ * @brief Set label path of bounding box
+ */
int
BoundingBox::setLabelPath (const char *param)
{
/** @todo Do not die for this */
}
+/**
+ * @brief Set video size of bounding box
+ */
int
BoundingBox::setVideoSize (const char *param)
{
return TRUE;
}
+/**
+ * @brief Set input model size of bounding box
+ */
int
BoundingBox::setInputModelSize (const char *param)
{
return TRUE;
}
+/**
+ * @brief Set option of bounding box
+ */
int
BoundingBox::setOption (BoundingBoxOption option, const char *param)
{
return TRUE;
}
+/**
+ * @brief Get out caps of bounding box
+ */
GstCaps *
BoundingBox::getOutCaps (const GstTensorsConfig *config)
{
return caps;
}
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ * @param[out] outbuf A sub-plugin should update or append proper memory for the negotiated media type.
+ */
GstFlowReturn
BoundingBox::decode (const GstTensorsConfig *config,
const GstTensorMemory *input, GstBuffer *outbuf)
return GST_FLOW_ERROR;
}
-
-MobilenetSSD::MobilenetSSD ()
-{
- params[THRESHOLD_IDX] = DETECTION_THRESHOLD_DEFAULT;
- params[Y_SCALE_IDX] = Y_SCALE_DEFAULT;
- params[X_SCALE_IDX] = X_SCALE_DEFAULT;
- params[H_SCALE_IDX] = H_SCALE_DEFAULT;
- params[W_SCALE_IDX] = W_SCALE_DEFAULT;
- params[IOU_THRESHOLD_IDX] = THRESHOLD_IOU_DEFAULT;
- sigmoid_threshold = logit (DETECTION_THRESHOLD_DEFAULT);
-
- max_detection = 0;
- total_labels = 0;
- box_prior_path = nullptr;
-}
-
-/**
- * @brief Load box-prior data from a file
- * @param[in/out] bdata The internal data.
- * @return TRUE if loaded and configured. FALSE if failed to do so.
- */
-int
-MobilenetSSD::mobilenet_ssd_loadBoxPrior ()
-{
- gboolean failed = FALSE;
- GError *err = NULL;
- gchar **priors;
- gchar *line = NULL;
- gchar *contents = NULL;
- guint row;
- gint prev_reg = -1;
-
- /* Read file contents */
- if (!g_file_get_contents (box_prior_path, &contents, NULL, &err)) {
- GST_ERROR ("Decoder/Bound-Box/SSD's box prior file %s cannot be read: %s",
- box_prior_path, err->message);
- g_clear_error (&err);
- return FALSE;
- }
-
- priors = g_strsplit (contents, "\n", -1);
- /* If given prior file is inappropriate, report back to tensor-decoder */
- if (g_strv_length (priors) < BOX_SIZE) {
- ml_loge ("The given prior file, %s, should have at least %d lines.\n",
- box_prior_path, BOX_SIZE);
- failed = TRUE;
- goto error;
- }
-
- for (row = 0; row < BOX_SIZE; row++) {
- gint column = 0, registered = 0;
-
- line = priors[row];
- if (line) {
- gchar **list = g_strsplit_set (line, " \t,", -1);
- gchar *word;
-
- while ((word = list[column]) != NULL) {
- column++;
-
- if (word && *word) {
- if (registered > DETECTION_MAX) {
- GST_WARNING ("Decoder/Bound-Box/SSD's box prior data file has too many priors. %d >= %d",
- registered, DETECTION_MAX);
- break;
- }
- box_priors[row][registered] = (gfloat) g_ascii_strtod (word, NULL);
- registered++;
- }
- }
-
- g_strfreev (list);
- }
-
- if (prev_reg != -1 && prev_reg != registered) {
- GST_ERROR ("Decoder/Bound-Box/SSD's box prior data file is not consistent.");
- failed = TRUE;
- break;
- }
- prev_reg = registered;
- }
-
-error:
- g_strfreev (priors);
- g_free (contents);
- return !failed;
-}
-
-int
-MobilenetSSD::setOptionInternal (const char *param)
-{
- gchar **options;
- int noptions, idx;
- int ret = 1;
-
- options = g_strsplit (param, ":", -1);
- noptions = g_strv_length (options);
-
- if (noptions > (PARAMS_MAX + 1))
- noptions = PARAMS_MAX + 1;
-
- if (box_prior_path) {
- g_free (box_prior_path);
- box_prior_path = nullptr;
- }
-
- box_prior_path = g_strdup (options[0]);
-
- if (NULL != box_prior_path) {
- ret = mobilenet_ssd_loadBoxPrior ();
- if (ret == 0)
- goto exit_mobilenet_ssd;
- }
-
- for (idx = 1; idx < noptions; idx++) {
- if (strlen (options[idx]) == 0)
- continue;
- params[idx - 1] = strtod (options[idx], NULL);
- }
-
- sigmoid_threshold = logit (params[THRESHOLD_IDX]);
-
- return TRUE;
-
-exit_mobilenet_ssd:
- g_strfreev (options);
- return ret;
-}
-
-int
-MobilenetSSD::checkCompatible (const GstTensorsConfig *config)
-{
- const uint32_t *dim1, *dim2;
- int i;
- guint max_detection, max_label;
-
- if (!_check_tensors (config, MAX_TENSORS))
- return FALSE;
-
- /* Check if the first tensor is compatible */
- dim1 = config->info.info[0].dimension;
- g_return_val_if_fail (dim1[0] == BOX_SIZE, FALSE);
- g_return_val_if_fail (dim1[1] == 1, FALSE);
- max_detection = dim1[2];
- g_return_val_if_fail (max_detection > 0, FALSE);
-
- /** @todo unused dimension value should be 0 */
- for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
- g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
-
- /* Check if the second tensor is compatible */
- dim2 = config->info.info[1].dimension;
-
- max_label = dim2[0];
- g_return_val_if_fail (max_label <= total_labels, FALSE);
- if (max_label < total_labels)
- GST_WARNING ("The given tensor (2nd) has max_label (first dimension: %u) smaller than the number of labels in labels file (%u).",
- max_label, total_labels);
- g_return_val_if_fail (max_detection == dim2[1], FALSE);
- for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
- g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
-
- /* Check consistency with max_detection */
- if (this->max_detection == 0)
- this->max_detection = max_detection;
- else
- g_return_val_if_fail (max_detection == this->max_detection, FALSE);
-
- if (this->max_detection > DETECTION_MAX) {
- GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
- return FALSE;
- }
-
- return TRUE;
-}
-
-GArray *
-MobilenetSSD::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
- const GstTensorMemory *boxes, *detections = NULL;
- GArray *results;
- const guint num_tensors = config->info.num_tensors;
-
- /**
- * @todo 100 is a heuristic number of objects in a picture frame
- * We may have better "heuristics" than this.
- * For the sake of performance, don't make it too small.
- */
-
- /* Already checked with getOutCaps. Thus, this is an internal bug */
- g_assert (num_tensors >= MAX_TENSORS);
- results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), 100);
-
- boxes = &input[0];
- if (num_tensors >= MAX_TENSORS) /* lgtm[cpp/constant-comparison] */
- detections = &input[1];
-
- switch (config->info.info[0].type) {
- _get_objects_mobilenet_ssd_ (uint8_t, _NNS_UINT8);
- _get_objects_mobilenet_ssd_ (int8_t, _NNS_INT8);
- _get_objects_mobilenet_ssd_ (uint16_t, _NNS_UINT16);
- _get_objects_mobilenet_ssd_ (int16_t, _NNS_INT16);
- _get_objects_mobilenet_ssd_ (uint32_t, _NNS_UINT32);
- _get_objects_mobilenet_ssd_ (int32_t, _NNS_INT32);
- _get_objects_mobilenet_ssd_ (uint64_t, _NNS_UINT64);
- _get_objects_mobilenet_ssd_ (int64_t, _NNS_INT64);
- _get_objects_mobilenet_ssd_ (float, _NNS_FLOAT32);
- _get_objects_mobilenet_ssd_ (double, _NNS_FLOAT64);
- default:
- g_assert (0);
- }
- nms (results, params[IOU_THRESHOLD_IDX]);
- return results;
-}
-
-MobilenetSSDPP::MobilenetSSDPP ()
-{
- tensor_mapping[LOCATIONS_IDX] = LOCATIONS_DEFAULT;
- tensor_mapping[CLASSES_IDX] = CLASSES_DEFAULT;
- tensor_mapping[SCORES_IDX] = SCORES_DEFAULT;
- tensor_mapping[NUM_IDX] = NUM_DEFAULT;
- threshold = THRESHOLD_DEFAULT;
-}
-
-/** @brief Helper to retrieve tensor index by feature */
-int
-MobilenetSSDPP::get_mobilenet_ssd_pp_tensor_idx (int idx)
-{
- return tensor_mapping[idx];
-}
-
-int
-MobilenetSSDPP::setOptionInternal (const char *param)
-{
- int threshold_percent;
- int ret = sscanf (param, "%i:%i:%i:%i,%i", &tensor_mapping[LOCATIONS_IDX],
- &tensor_mapping[CLASSES_IDX], &tensor_mapping[SCORES_IDX],
- &tensor_mapping[NUM_IDX], &threshold_percent);
-
- if ((ret == EOF) || (ret < 5)) {
- GST_ERROR ("Invalid options, must be \"locations idx:classes idx:scores idx:num idx,threshold\"");
- return FALSE;
- }
-
- GST_INFO ("MOBILENET SSD POST PROCESS output tensors mapping: "
- "locations idx (%d), classes idx (%d), scores idx (%d), num detections idx (%d)",
- tensor_mapping[LOCATIONS_IDX], tensor_mapping[CLASSES_IDX],
- tensor_mapping[SCORES_IDX], tensor_mapping[NUM_IDX]);
-
- if ((threshold_percent > 100) || (threshold_percent < 0)) {
- GST_ERROR ("Invalid MOBILENET SSD POST PROCESS threshold detection (%i), must be in range [0 100]",
- threshold_percent);
- } else {
- threshold = threshold_percent / 100.0;
- }
-
- GST_INFO ("MOBILENET SSD POST PROCESS object detection threshold: %.2f", threshold);
-
- return TRUE;
-}
-
-int
-MobilenetSSDPP::checkCompatible (const GstTensorsConfig *config)
-{
- const uint32_t *dim1, *dim2, *dim3, *dim4;
- int locations_idx, classes_idx, scores_idx, num_idx, i;
-
- if (!_check_tensors (config, MAX_TENSORS))
- return FALSE;
-
- locations_idx = get_mobilenet_ssd_pp_tensor_idx (LOCATIONS_IDX);
- classes_idx = get_mobilenet_ssd_pp_tensor_idx (CLASSES_IDX);
- scores_idx = get_mobilenet_ssd_pp_tensor_idx (SCORES_IDX);
- num_idx = get_mobilenet_ssd_pp_tensor_idx (NUM_IDX);
-
- /* Check if the number of detections tensor is compatible */
- dim1 = config->info.info[num_idx].dimension;
- g_return_val_if_fail (dim1[0] == 1, FALSE);
- for (i = 1; i < NNS_TENSOR_RANK_LIMIT; ++i)
- g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
-
- /* Check if the classes & scores tensors are compatible */
- dim2 = config->info.info[classes_idx].dimension;
- dim3 = config->info.info[scores_idx].dimension;
- g_return_val_if_fail (dim3[0] == dim2[0], FALSE);
- max_detection = dim2[0];
- for (i = 1; i < NNS_TENSOR_RANK_LIMIT; ++i) {
- g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
- g_return_val_if_fail (dim3[i] == 0 || dim3[i] == 1, FALSE);
- }
-
- /* Check if the bbox locations tensor is compatible */
- dim4 = config->info.info[locations_idx].dimension;
- g_return_val_if_fail (BOX_SIZE == dim4[0], FALSE);
- g_return_val_if_fail (max_detection == dim4[1], FALSE);
- for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
- g_return_val_if_fail (dim4[i] == 0 || dim4[i] == 1, FALSE);
-
- /* Check consistency with max_detection */
- if (this->max_detection == 0)
- this->max_detection = max_detection;
- else
- g_return_val_if_fail (max_detection == this->max_detection, FALSE);
-
- if (this->max_detection > DETECTION_MAX) {
- GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
- return FALSE;
- }
- return TRUE;
-}
-
-GArray *
-MobilenetSSDPP::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
-
- const GstTensorMemory *mem_num, *mem_classes, *mem_scores, *mem_boxes;
- int locations_idx, classes_idx, scores_idx, num_idx;
- GArray *results = NULL;
- const guint num_tensors = config->info.num_tensors;
-
- /* Already checked with getOutCaps. Thus, this is an internal bug */
- g_assert (num_tensors >= MAX_TENSORS);
-
- locations_idx = get_mobilenet_ssd_pp_tensor_idx (LOCATIONS_IDX);
- classes_idx = get_mobilenet_ssd_pp_tensor_idx (CLASSES_IDX);
- scores_idx = get_mobilenet_ssd_pp_tensor_idx (SCORES_IDX);
- num_idx = get_mobilenet_ssd_pp_tensor_idx (NUM_IDX);
-
- mem_num = &input[num_idx];
- mem_classes = &input[classes_idx];
- mem_scores = &input[scores_idx];
- mem_boxes = &input[locations_idx];
-
- switch (config->info.info[num_idx].type) {
- _get_objects_mobilenet_ssd_pp_ (uint8_t, _NNS_UINT8);
- _get_objects_mobilenet_ssd_pp_ (int8_t, _NNS_INT8);
- _get_objects_mobilenet_ssd_pp_ (uint16_t, _NNS_UINT16);
- _get_objects_mobilenet_ssd_pp_ (int16_t, _NNS_INT16);
- _get_objects_mobilenet_ssd_pp_ (uint32_t, _NNS_UINT32);
- _get_objects_mobilenet_ssd_pp_ (int32_t, _NNS_INT32);
- _get_objects_mobilenet_ssd_pp_ (uint64_t, _NNS_UINT64);
- _get_objects_mobilenet_ssd_pp_ (int64_t, _NNS_INT64);
- _get_objects_mobilenet_ssd_pp_ (float, _NNS_FLOAT32);
- _get_objects_mobilenet_ssd_pp_ (double, _NNS_FLOAT64);
- default:
- g_assert (0);
- }
- return results;
-}
-
-int
-OVDetection::checkCompatible (const GstTensorsConfig *config)
-{
- const guint *dim;
- int i;
- UNUSED (total_labels);
-
- if (!_check_tensors (config, DEFAULT_MAX_TENSORS))
- return FALSE;
-
- /**
- * The shape of the ouput tensor is [7, N, 1, 1], where N is the maximum
- * number (i.e., 200) of detected bounding boxes.
- */
- dim = config->info.info[0].dimension;
- g_return_val_if_fail (dim[0] == DEFAULT_SIZE_DETECTION_DESC, FALSE);
- g_return_val_if_fail (dim[1] == DETECTION_MAX, FALSE);
- for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
- g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
-
- return TRUE;
-}
-
-GArray *
-OVDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
- GArray *results = NULL;
- const guint num_tensors = config->info.num_tensors;
-
- /* Already checked with getOutCaps. Thus, this is an internal bug */
- g_assert (num_tensors >= DEFAULT_MAX_TENSORS);
-
- results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), DETECTION_MAX);
- switch (config->info.info[0].type) {
- _get_persons_ov (uint8_t, input[0].data, _NNS_UINT8, results);
- _get_persons_ov (int8_t, input[0].data, _NNS_INT8, results);
- _get_persons_ov (uint16_t, input[0].data, _NNS_UINT16, results);
- _get_persons_ov (int16_t, input[0].data, _NNS_INT16, results);
- _get_persons_ov (uint32_t, input[0].data, _NNS_UINT32, results);
- _get_persons_ov (int32_t, input[0].data, _NNS_INT32, results);
- _get_persons_ov (uint64_t, input[0].data, _NNS_UINT64, results);
- _get_persons_ov (int64_t, input[0].data, _NNS_INT64, results);
- _get_persons_ov (float, input[0].data, _NNS_FLOAT32, results);
- _get_persons_ov (double, input[0].data, _NNS_FLOAT64, results);
- default:
- g_assert (0);
- }
- return results;
-}
-
-int
-YoloV5::setOptionInternal (const char *param)
-{
- gchar **options;
- int noptions;
-
- options = g_strsplit (param, ":", -1);
- noptions = g_strv_length (options);
- if (noptions > 0)
- scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
- if (noptions > 1)
- conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
- if (noptions > 2)
- iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
-
- nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
- scaled_output, conf_threshold, iou_threshold);
-
- g_strfreev (options);
- return TRUE;
-}
-
-int
-YoloV5::checkCompatible (const GstTensorsConfig *config)
-{
- const guint *dim = config->info.info[0].dimension;
- int i;
-
- if (!_check_tensors (config, 1U))
- return FALSE;
-
- max_detection = ((i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
- + (i_width / 8) * (i_height / 8))
- * 3;
-
- g_return_val_if_fail (dim[0] == (total_labels + DEFAULT_DETECTION_NUM_INFO), FALSE);
- g_return_val_if_fail (dim[1] == max_detection, FALSE);
- for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
- g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
- return TRUE;
-}
-
-GArray *
-YoloV5::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
- GArray *results = NULL;
-
- int bIdx, numTotalBox;
- int cIdx, numTotalClass, cStartIdx, cIdxMax;
- float *boxinput;
- int is_output_scaled = scaled_output;
-
- numTotalBox = max_detection;
- numTotalClass = total_labels;
- cStartIdx = DEFAULT_DETECTION_NUM_INFO;
- cIdxMax = numTotalClass + cStartIdx;
-
- /* boxinput[numTotalBox][cIdxMax] */
- boxinput = (float *) input[0].data;
-
- /** Only support for float type model */
- g_assert (config->info.info[0].type == _NNS_FLOAT32);
-
- results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
- for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
- float maxClassConfVal = -INFINITY;
- int maxClassIdx = -1;
- for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
- if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
- maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
- maxClassIdx = cIdx;
- }
- }
-
- if (maxClassConfVal * boxinput[bIdx * cIdxMax + 4] > conf_threshold) {
- detectedObject object;
- float cx, cy, w, h;
- cx = boxinput[bIdx * cIdxMax + 0];
- cy = boxinput[bIdx * cIdxMax + 1];
- w = boxinput[bIdx * cIdxMax + 2];
- h = boxinput[bIdx * cIdxMax + 3];
-
- if (!is_output_scaled) {
- cx *= (float) i_width;
- cy *= (float) i_height;
- w *= (float) i_width;
- h *= (float) i_height;
- }
-
- object.x = (int) (MAX (0.f, (cx - w / 2.f)));
- object.y = (int) (MAX (0.f, (cy - h / 2.f)));
- object.width = (int) (MIN ((float) i_width, w));
- object.height = (int) (MIN ((float) i_height, h));
-
- object.prob = maxClassConfVal * boxinput[bIdx * cIdxMax + 4];
- object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO;
- object.tracking_id = 0;
- object.valid = TRUE;
- g_array_append_val (results, object);
- }
- }
-
- nms (results, iou_threshold);
- return results;
-}
-
-
-int
-YoloV8::setOptionInternal (const char *param)
-{
- gchar **options;
- int noptions;
-
- options = g_strsplit (param, ":", -1);
- noptions = g_strv_length (options);
- if (noptions > 0)
- scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
- if (noptions > 1)
- conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
- if (noptions > 2)
- iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
-
- nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
- scaled_output, conf_threshold, iou_threshold);
-
- g_strfreev (options);
- return TRUE;
-}
-
-int
-YoloV8::checkCompatible (const GstTensorsConfig *config)
-{
- const guint *dim = config->info.info[0].dimension;
- int i;
- if (!_check_tensors (config, 1U))
- return FALSE;
-
- /** Only support for float type model */
- g_return_val_if_fail (config->info.info[0].type == _NNS_FLOAT32, FALSE);
-
- max_detection = (i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
- + (i_width / 8) * (i_height / 8);
-
- if (dim[0] != (total_labels + DEFAULT_DETECTION_NUM_INFO) || dim[1] != max_detection) {
- nns_loge ("yolov8 boundingbox decoder requires the input shape to be %d:%d:1. But given shape is %d:%d:1. `tensor_transform mode=transpose` would be helpful.",
- total_labels + DEFAULT_DETECTION_NUM_INFO, max_detection, dim[0], dim[1]);
- return FALSE;
- }
-
- for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
- g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
- return TRUE;
-}
-
-GArray *
-YoloV8::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
- GArray *results = NULL;
- int bIdx, numTotalBox;
- int cIdx, numTotalClass, cStartIdx, cIdxMax;
- float *boxinput;
- int is_output_scaled = scaled_output;
- UNUSED (config);
-
- numTotalBox = max_detection;
- numTotalClass = total_labels;
- cStartIdx = DEFAULT_DETECTION_NUM_INFO;
- cIdxMax = numTotalClass + cStartIdx;
-
- /* boxinput[numTotalBox][cIdxMax] */
- boxinput = (float *) input[0].data;
-
- results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
- for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
- float maxClassConfVal = -INFINITY;
- int maxClassIdx = -1;
- for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
- if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
- maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
- maxClassIdx = cIdx;
- }
- }
-
- if (maxClassConfVal > conf_threshold) {
- detectedObject object;
- float cx, cy, w, h;
- cx = boxinput[bIdx * cIdxMax + 0];
- cy = boxinput[bIdx * cIdxMax + 1];
- w = boxinput[bIdx * cIdxMax + 2];
- h = boxinput[bIdx * cIdxMax + 3];
-
- if (!is_output_scaled) {
- cx *= (float) i_width;
- cy *= (float) i_height;
- w *= (float) i_width;
- h *= (float) i_height;
- }
-
- object.x = (int) (MAX (0.f, (cx - w / 2.f)));
- object.y = (int) (MAX (0.f, (cy - h / 2.f)));
- object.width = (int) (MIN ((float) i_width, w));
- object.height = (int) (MIN ((float) i_height, h));
-
- object.prob = maxClassConfVal;
- object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO;
- object.tracking_id = 0;
- object.valid = TRUE;
- g_array_append_val (results, object);
- }
- }
-
- nms (results, iou_threshold);
- return results;
-}
-
-#define mp_palm_detection_option(option, type, idx) \
- if (noptions > idx) \
- option = (type) g_strtod (options[idx], NULL)
-
-MpPalmDetection::MpPalmDetection ()
-{
- num_layers = NUM_LAYERS_DEFAULT;
- min_scale = MIN_SCALE_DEFAULT;
- max_scale = MAX_SCALE_DEFAULT;
- offset_x = OFFSET_X_DEFAULT;
- offset_y = OFFSET_Y_DEFAULT;
- strides[0] = STRIDE_0_DEFAULT;
- strides[1] = STRIDE_1_DEFAULT;
- strides[2] = STRIDE_2_DEFAULT;
- strides[3] = STRIDE_3_DEFAULT;
- min_score_threshold = MIN_SCORE_THRESHOLD_DEFAULT;
- anchors = g_array_new (FALSE, TRUE, sizeof (anchor));
-}
-
-MpPalmDetection::~MpPalmDetection ()
-{
- if (anchors)
- g_array_free (anchors, TRUE);
- anchors = NULL;
-}
-
-/**
- * @brief Calculate anchor scale
- */
-static gfloat
-_calculate_scale (float min_scale, float max_scale, int stride_index, int num_strides)
-{
- if (num_strides == 1) {
- return (min_scale + max_scale) * 0.5f;
- } else {
- return min_scale + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
- }
-}
-
-/**
- * @brief Generate anchor information
- */
-void
-MpPalmDetection::mp_palm_detection_generate_anchors ()
-{
- int layer_id = 0;
- guint i;
-
- while (layer_id < num_layers) {
- GArray *aspect_ratios = g_array_new (FALSE, TRUE, sizeof (gfloat));
- GArray *scales = g_array_new (FALSE, TRUE, sizeof (gfloat));
- GArray *anchor_height = g_array_new (FALSE, TRUE, sizeof (gfloat));
- GArray *anchor_width = g_array_new (FALSE, TRUE, sizeof (gfloat));
-
- int last_same_stride_layer = layer_id;
-
- while (last_same_stride_layer < num_layers
- && strides[last_same_stride_layer] == strides[layer_id]) {
- gfloat scale;
- gfloat ratio = 1.0f;
- g_array_append_val (aspect_ratios, ratio);
- g_array_append_val (aspect_ratios, ratio);
- scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer, num_layers);
- g_array_append_val (scales, scale);
- scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer + 1, num_layers);
- g_array_append_val (scales, scale);
- last_same_stride_layer++;
- }
-
- for (i = 0; i < aspect_ratios->len; ++i) {
- const float ratio_sqrts = sqrt (g_array_index (aspect_ratios, gfloat, i));
- const gfloat sc = g_array_index (scales, gfloat, i);
- gfloat anchor_height_ = sc / ratio_sqrts;
- gfloat anchor_width_ = sc * ratio_sqrts;
- g_array_append_val (anchor_height, anchor_height_);
- g_array_append_val (anchor_width, anchor_width_);
- }
-
- {
- int feature_map_height = 0;
- int feature_map_width = 0;
- int x, y;
- int anchor_id;
-
- const int stride = strides[layer_id];
- feature_map_height = ceil (1.0f * 192 / stride);
- feature_map_width = ceil (1.0f * 192 / stride);
- for (y = 0; y < feature_map_height; ++y) {
- for (x = 0; x < feature_map_width; ++x) {
- for (anchor_id = 0; anchor_id < (int) aspect_ratios->len; ++anchor_id) {
- const float x_center = (x + offset_x) * 1.0f / feature_map_width;
- const float y_center = (y + offset_y) * 1.0f / feature_map_height;
-
- const anchor a = { .x_center = x_center,
- .y_center = y_center,
- .w = g_array_index (anchor_width, gfloat, anchor_id),
- .h = g_array_index (anchor_height, gfloat, anchor_id) };
- g_array_append_val (anchors, a);
- }
- }
- }
- layer_id = last_same_stride_layer;
- }
-
- g_array_free (anchor_height, TRUE);
- g_array_free (anchor_width, TRUE);
- g_array_free (aspect_ratios, TRUE);
- g_array_free (scales, TRUE);
- }
-}
-
-int
-MpPalmDetection::setOptionInternal (const char *param)
-{
- /* Load palm detection info from option3 */
- gchar **options;
- int noptions, idx;
- int ret = TRUE;
-
- options = g_strsplit (param, ":", -1);
- noptions = g_strv_length (options);
-
- if (noptions > PARAMS_MAX) {
- GST_ERROR ("Invalid MP PALM DETECTION PARAM length: %d", noptions);
- ret = FALSE;
- goto exit_mp_palm_detection;
- }
-
- mp_palm_detection_option (min_score_threshold, gfloat, 0);
- mp_palm_detection_option (num_layers, gint, 1);
- mp_palm_detection_option (min_scale, gfloat, 2);
- mp_palm_detection_option (max_scale, gfloat, 3);
- mp_palm_detection_option (offset_x, gfloat, 4);
- mp_palm_detection_option (offset_y, gfloat, 5);
-
- for (idx = 6; idx < num_layers + 6; idx++) {
- mp_palm_detection_option (strides[idx - 6], gint, idx);
- }
- mp_palm_detection_generate_anchors ();
-
-exit_mp_palm_detection:
- g_strfreev (options);
- return ret;
-}
-
-int
-MpPalmDetection::checkCompatible (const GstTensorsConfig *config)
-{
- const uint32_t *dim1, *dim2;
- int i;
- if (!_check_tensors (config, MAX_TENSORS))
- return FALSE;
-
- /* Check if the first tensor is compatible */
- dim1 = config->info.info[0].dimension;
-
- g_return_val_if_fail (dim1[0] == INFO_SIZE, FALSE);
- max_detection = dim1[1];
- g_return_val_if_fail (max_detection > 0, FALSE);
- g_return_val_if_fail (dim1[2] == 1, FALSE);
- for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
- g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
-
- /* Check if the second tensor is compatible */
- dim2 = config->info.info[1].dimension;
- g_return_val_if_fail (dim2[0] == 1, FALSE);
- g_return_val_if_fail (max_detection == dim2[1], FALSE);
- for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
- g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
-
- /* Check consistency with max_detection */
- if (this->max_detection == 0)
- this->max_detection = max_detection;
- else
- g_return_val_if_fail (max_detection == this->max_detection, FALSE);
-
- if (this->max_detection > MAX_DETECTION) {
- GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
- return FALSE;
- }
- return TRUE;
-}
-
-GArray *
-MpPalmDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
- GArray *results = NULL;
- const GstTensorMemory *boxes = NULL;
- const GstTensorMemory *detections = NULL;
- const guint num_tensors = config->info.num_tensors;
-
- /* Already checked with getOutCaps. Thus, this is an internal bug */
- g_assert (num_tensors >= MAX_TENSORS);
-
- /* results will be allocated by _get_objects_mp_palm_detection_ */
- boxes = &input[0];
- detections = &input[1];
- switch (config->info.info[0].type) {
- _get_objects_mp_palm_detection_ (uint8_t, _NNS_UINT8);
- _get_objects_mp_palm_detection_ (int8_t, _NNS_INT8);
- _get_objects_mp_palm_detection_ (uint16_t, _NNS_UINT16);
- _get_objects_mp_palm_detection_ (int16_t, _NNS_INT16);
- _get_objects_mp_palm_detection_ (uint32_t, _NNS_UINT32);
- _get_objects_mp_palm_detection_ (int32_t, _NNS_INT32);
- _get_objects_mp_palm_detection_ (uint64_t, _NNS_UINT64);
- _get_objects_mp_palm_detection_ (int64_t, _NNS_INT64);
- _get_objects_mp_palm_detection_ (float, _NNS_FLOAT32);
- _get_objects_mp_palm_detection_ (double, _NNS_FLOAT64);
-
- default:
- g_assert (0);
- }
- nms (results, 0.05f);
- return results;
-}
+/**
+ * GStreamer / NNStreamer tensor_decoder subplugin, "bounding boxes"
+ * Copyright (C) 2018 Samsung Electronics Co. Ltd.
+ * Copyright (C) 2018 MyungJoo Ham <myungjoo.ham@samsung.com>
+ * Copyright 2021 NXP
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ */
+/**
+ * @file tensordec-boundingbox.h
+ * @date 15 Nov 2018
+ * @brief NNStreamer tensor-decoder subplugin, "bounding boxes",
+ * which converts tensors to video stream w/ boxes on
+ * transparent background.
+ * This code is NYI/WIP and not compilable.
+ *
+ * @see https://github.com/nnstreamer/nnstreamer
+ * @author MyungJoo Ham <myungjoo.ham@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ * option1: Decoder mode of bounding box.
+ * Available: yolov5
+ * mobilenet-ssd (single shot multibox detector with priors.)
+ * mobilenet-ssd-postprocess
+ * ov-person-detection
+ * tf-ssd (deprecated, recommend to use mobilenet-ssd-postprocess)
+ * tflite-ssd (deprecated, recommend to use mobilenet-ssd)
+ * option2: Location of label file
+ * This is independent from option1
+ * option3: Any option1-dependent values
+ * !!This depends on option1 values!!
+ * for yolov5 and yolov8 mode:
+ * The option3 requires up to 3 numbers, which tell
+ * - whether the output values are scaled or not
+ * 0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)
+ * - the threshold of confidence (optional, default set to 0.25)
+ * - the threshold of IOU (optional, default set to 0.45)
+ * An example of option3 is "option3=0:0.65:0.6"
+ * for mobilenet-ssd mode:
+ * The option3 definition scheme is, in order, the following:
+ * - box priors location file (mandatory)
+ * - Detection threshold (optional, default set to 0.5)
+ * - Y box scale (optional, default set to 10.0)
+ * - X box scale (optional, default set to 10.0)
+ * - h box scale (optional, default set to 5.0)
+ * - w box scale (optional, default set to 5.0)
+ * - IOU box valid threshold (optional, default set to 0.5)
+ * The default parameters value could be set in the following ways:
+ * option3=box-priors.txt:0.5:10.0:10.0:5.0:5.0:0.5
+ * option3=box-priors.txt
+ * option3=box-priors.txt::::::
+ *
+ * It's possible to set only few values, using the default values for
+ * those not specified through the command line.
+ * You could specify respectively the detection and IOU thresholds to 0.65
+ * and 0.6 with the option3 parameter as follow:
+ * option3=box-priors.txt:0.65:::::0.6
+ * for mobilenet-ssd-postprocess mode:
+ * The option3 is required to have 5 integer numbers, which tell
+ * the tensor-dec how to interpret the given tensor inputs.
+ * The first 4 numbers separated by colon, ':', designate which
+ * are location:class:score:number of the tensors.
+ * The last number separated by comma, ',' from the first 4 numbers
+ * designate the threshold in percent.
+ * In other words, "option3=%i:%i:%i:%i,%i".
+ * for mp-palm-detection mode:
+ * The option3 is required to have 5 float numbers, as following
+ * - box score threshold (mandatory)
+ * - number of layers for anchor generation (optional, default set to 4)
+ * - minimum scale factor for anchor generation (optional, default set to 1.0)
+ * - maximum scale factor for anchor generation (optional, default set to 1.0)
+ * - X offset (optional, default set to 0.5)
+ * - Y offset (optional, default set to 0.5)
+ * - strides for each layer for anchor generation (optional, default set to 8:16:16:16)
+ * The default parameter value could be set in the following ways:
+ * option3=0.5
+ * option3=0.5:4:0.2:0.8
+ * option3=0.5:4:1.0:1.0:0.5:0.5:8:16:16:16
+ *
+ * option4: Video Output Dimension (WIDTH:HEIGHT)
+ * This is independent from option1
+ * option5: Input Dimension (WIDTH:HEIGHT)
+ * This is independent from option1
+ * option6: Whether to track result bounding boxes or not
+ * 0 (default, do not track)
+ * 1 (track result bounding boxes, with naive centroid based algorithm)
+ * option7: Whether to log the result bounding boxes or not
+ * 0 (default, do not log)
+ * 1 (log result bounding boxes)
+ * option8: Box Style (NYI)
+ *
+ * MAJOR TODO: Support other colorspaces natively from _decode for performance gain
+ * (e.g., BGRA, ARGB, ...)
+ *
+ */
+
#ifndef _TENSORDECBB_H__
#define _TENSORDECBB_H__
-
+#include <gst/gst.h>
#include <math.h> /* expf */
+#include <nnstreamer_log.h>
+#include <nnstreamer_util.h>
#include "tensordecutil.h"
#define PIXEL_VALUE (0xFF0000FF) /* RED 100% in RGBA */
/**
- * @brief C++-Template-like box location calculation for box-priors for Mobilenet SSD Model
- * @param[in] type The tensor type of inputptr
- * @param[in] typename nnstreamer enum corresponding to the type
- * @param[in] boxprior The box prior data from the box file of MOBILENET_SSD.
- * @param[in] boxinput Input Tensor Data (Boxes)
- * @param[in] detinput Input Tensor Data (Detection). Null if not available. (numtensor ==1)
- * @param[in] config Tensor configs of the input tensors
- * @param[out] results The object returned. (GArray with detectedObject)
+ * @brief Option of bounding box
*/
-#define _get_objects_mobilenet_ssd(_type, typename, boxprior, boxinput, \
- detinput, config, results, i_width, i_height, max_detection) \
- case typename: \
- { \
- int d; \
- _type *boxinput_ = (_type *) boxinput; \
- size_t boxbpi = config->info.info[0].dimension[0]; \
- _type *detinput_ = (_type *) detinput; \
- size_t detbpi = config->info.info[1].dimension[0]; \
- int num = (DETECTION_MAX > max_detection) ? max_detection : DETECTION_MAX; \
- detectedObject object = { \
- .valid = FALSE, .class_id = 0, .x = 0, .y = 0, .width = 0, .height = 0, .prob = .0, .tracking_id = 0 \
- }; \
- for (d = 0; d < num; d++) { \
- _get_object_i_mobilenet_ssd (d, detbpi, boxprior, (boxinput_ + (d * boxbpi)), \
- (detinput_ + (d * detbpi)), (&object), i_width, i_height); \
- if (object.valid == TRUE) { \
- g_array_append_val (results, object); \
- } \
- } \
- } \
- break
-
-
-/** @brief Macro to simplify calling _get_objects_mobilenet_ssd */
-#define _get_objects_mobilenet_ssd_(type, typename) \
- _get_objects_mobilenet_ssd (type, typename, box_priors, (boxes->data), \
- (detections->data), config, results, i_width, i_height, max_detection)
-
-
-#define _expit(x) (1.f / (1.f + expf (-((float) x))))
-
-/**
- * @brief C++-Template-like box location calculation for box-priors
- * @bug This is not macro-argument safe. Use paranthesis!
- * @param[in] bb The configuration, "bounding_boxes"
- * @param[in] index The index (3rd dimension of BOX_SIZE:1:DETECTION_MAX:1)
- * @param[in] total_labels The count of total labels. We can get this from input tensor info. (1st dimension of LABEL_SIZE:DETECTION_MAX:1:1)
- * @param[in] boxprior The box prior data from the box file of SSD.
- * @param[in] boxinputptr Cursor pointer of input + byte-per-index * index (box)
- * @param[in] detinputptr Cursor pointer of input + byte-per-index * index (detection)
- * @param[in] result The object returned. (pointer to object)
- */
-#define _get_object_i_mobilenet_ssd(index, total_labels, boxprior, \
- boxinputptr, detinputptr, result, i_width, i_height) \
- do { \
- unsigned int c; \
- gfloat highscore = -FLT_MAX; \
- float y_scale = params[Y_SCALE_IDX]; \
- float x_scale = params[X_SCALE_IDX]; \
- float h_scale = params[H_SCALE_IDX]; \
- float w_scale = params[W_SCALE_IDX]; \
- result->valid = FALSE; \
- for (c = 1; c < total_labels; c++) { \
- if (detinputptr[c] >= sigmoid_threshold) { \
- gfloat score = _expit (detinputptr[c]); \
- float ycenter \
- = boxinputptr[0] / y_scale * boxprior[2][index] + boxprior[0][index]; \
- float xcenter \
- = boxinputptr[1] / x_scale * boxprior[3][index] + boxprior[1][index]; \
- float h = (float) expf (boxinputptr[2] / h_scale) * boxprior[2][index]; \
- float w = (float) expf (boxinputptr[3] / w_scale) * boxprior[3][index]; \
- float ymin = ycenter - h / 2.f; \
- float xmin = xcenter - w / 2.f; \
- int x = xmin * i_width; \
- int y = ymin * i_height; \
- int width = w * i_width; \
- int height = h * i_height; \
- if (highscore < score) { \
- result->class_id = c; \
- result->x = MAX (0, x); \
- result->y = MAX (0, y); \
- result->width = width; \
- result->height = height; \
- result->prob = score; \
- result->valid = TRUE; \
- } \
- } \
- } \
- } while (0);
-
enum class BoundingBoxOption {
MODE = 0,
LABEL_PATH = 1,
} bounding_box_modes;
/**
- * @brief MOBILENET SSD PostProcess Output tensor feature mapping.
- */
-typedef enum {
- MOBILENET_SSD_PP_BBOX_IDX_LOCATIONS = 0,
- MOBILENET_SSD_PP_BBOX_IDX_CLASSES = 1,
- MOBILENET_SSD_PP_BBOX_IDX_SCORES = 2,
- MOBILENET_SSD_PP_BBOX_IDX_NUM = 3,
- MOBILENET_SSD_PP_BBOX_IDX_UNKNOWN
-} mobilenet_ssd_pp_bbox_idx_t;
-
-/**
- * @brief List of bounding-box decoding schemes in string
- */
-static const char *bb_modes[] = {
- [MOBILENET_SSD_BOUNDING_BOX] = "mobilenet-ssd",
- [MOBILENET_SSD_PP_BOUNDING_BOX] = "mobilenet-ssd-postprocess",
- [OV_PERSON_DETECTION_BOUNDING_BOX] = "ov-person-detection",
- [OV_FACE_DETECTION_BOUNDING_BOX] = "ov-face-detection",
- [OLDNAME_MOBILENET_SSD_BOUNDING_BOX] = "tflite-ssd",
- [OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX] = "tf-ssd",
- [YOLOV5_BOUNDING_BOX] = "yolov5",
- [MP_PALM_DETECTION_BOUNDING_BOX] = "mp-palm-detection",
- [YOLOV8_BOUNDING_BOX] = "yolov8",
- NULL,
-};
-
-/**
* @brief Structure for object centroid tracking.
*/
typedef struct {
int tracking_id;
} detectedObject;
+
/**
- * @brief C++-Template-like box location calculation for Tensorflow SSD model
- * @param[in] type The tensor type of inputptr
- * @param[in] typename nnstreamer enum corresponding to the type
- * @param[in] numinput Input Tensor Data (The number of detections)
- * @param[in] classinput Input Tensor Data (Detected classes)
- * @param[in] scoreinput Input Tensor Data (Detection scores)
- * @param[in] boxesinput Input Tensor Data (Boxes)
- * @param[in] config Tensor configs of the input tensors
- * @param[out] results The object returned. (GArray with detectedObject)
+ * @brief Apply NMS to the given results (objects[DETECTION_MAX])
+ * @param[in/out] results The results to be filtered with nms
*/
-#define _get_objects_mobilenet_ssd_pp(_type, typename, numinput, classinput, \
- scoreinput, boxesinput, config, results, i_width, i_height) \
- case typename: \
- { \
- int d, num; \
- size_t boxbpi; \
- _type *num_detection_ = (_type *) numinput; \
- _type *classes_ = (_type *) classinput; \
- _type *scores_ = (_type *) scoreinput; \
- _type *boxes_ = (_type *) boxesinput; \
- int locations_idx \
- = get_mobilenet_ssd_pp_tensor_idx (MOBILENET_SSD_PP_BBOX_IDX_LOCATIONS); \
- num = (int) num_detection_[0]; \
- results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num); \
- boxbpi = config->info.info[locations_idx].dimension[0]; \
- for (d = 0; d < num; d++) { \
- _type x1, x2, y1, y2; \
- detectedObject object; \
- if (scores_[d] < threshold) \
- continue; \
- object.valid = TRUE; \
- object.class_id = (int) classes_[d]; \
- x1 = MIN (MAX (boxes_[d * boxbpi + 1], 0), 1); \
- y1 = MIN (MAX (boxes_[d * boxbpi], 0), 1); \
- x2 = MIN (MAX (boxes_[d * boxbpi + 3], 0), 1); \
- y2 = MIN (MAX (boxes_[d * boxbpi + 2], 0), 1); \
- object.x = (int) (x1 * i_width); \
- object.y = (int) (y1 * i_height); \
- object.width = (int) ((x2 - x1) * i_width); \
- object.height = (int) ((y2 - y1) * i_height); \
- object.prob = scores_[d]; \
- g_array_append_val (results, object); \
- } \
- } \
- break
-
-/** @brief Macro to simplify calling _get_objects_mobilenet_ssd_pp */
-#define _get_objects_mobilenet_ssd_pp_(type, typename) \
- _get_objects_mobilenet_ssd_pp (type, typename, (mem_num->data), (mem_classes->data), \
- (mem_scores->data), (mem_boxes->data), config, results, i_width, i_height)
-
-
-#define OV_PERSON_DETECTION_CONF_THRESHOLD (0.8)
+void nms (GArray *results, gfloat threshold);
+
/**
- * @brief C++-Template-like box location calculation for OpenVino Person Detection Model
- * @param[in] type The tensor type of inputptr
- * @param[in] intputptr Input tensor Data
- * @param[in] typename nnstreamer enum corresponding to the type
- * @param[out] results The object returned. (GArray with detectedObject)
+ * @brief check the num_tensors is valid
+ * @param[in] config The structure of tensors info to check.
+ * @param[in] limit The limit of tensors number.
+ * @return TRUE if tensors info is valid.
*/
-#define _get_persons_ov(type, inputptr, typename, results) \
- case typename: \
- { \
- detectedObject object = { \
- .valid = FALSE, .class_id = 0, .x = 0, .y = 0, .width = 0, .height = 0, .prob = .0, .tracking_id = 0 \
- }; \
- type *typed_inputptr = (type *) inputptr; \
- guint d; \
- \
- for (d = 1; d <= DETECTION_MAX; ++d) { \
- struct { \
- type image_id; \
- type label; \
- type conf; \
- type x_min; \
- type y_min; \
- type x_max; \
- type y_max; \
- } desc; \
- \
- memcpy (&desc, typed_inputptr, sizeof (desc)); \
- typed_inputptr += (sizeof (desc) / sizeof (type)); \
- object.valid = FALSE; \
- \
- if ((int) desc.image_id < 0) { \
- max_detection = (d - 1); \
- break; \
- } \
- object.class_id = -1; \
- object.x = (int) (desc.x_min * (type) i_width); \
- object.y = (int) (desc.y_min * (type) i_height); \
- object.width = (int) ((desc.x_max - desc.x_min) * (type) i_width); \
- object.height = (int) ((desc.y_max - desc.y_min) * (type) i_height); \
- if (desc.conf < OV_PERSON_DETECTION_CONF_THRESHOLD) \
- continue; \
- object.prob = 1; \
- object.valid = TRUE; \
- g_array_append_val (results, object); \
- } \
- } \
- break
+int check_tensors (const GstTensorsConfig *config, const unsigned int limit);
/**
- * @brief C++-Template-like box location calculation for Tensorflow model
- * @param[in] type The tensor type of inputptr
- * @param[in] typename nnstreamer enum corresponding to the type
- * @param[in] scoreinput Input Tensor Data (Detection scores)
- * @param[in] boxesinput Input Tensor Data (Boxes)
- * @param[in] config Tensor configs of the input tensors
- * @param[out] results The object returned. (GArray with detectedObject)
+ * @brief Interface for Bounding box's properties
*/
-#define _get_objects_mp_palm_detection(_type, typename, scoreinput, boxesinput, config) \
- case typename: \
- { \
- int d_; \
- _type *scores_ = (_type *) scoreinput; \
- _type *boxes_ = (_type *) boxesinput; \
- int num_ = max_detection; \
- size_t boxbpi_ = config->info.info[0].dimension[0]; \
- results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num_); \
- for (d_ = 0; d_ < num_; d_++) { \
- gfloat y_center, x_center, h, w; \
- gfloat ymin, xmin; \
- int y, x, width, height; \
- detectedObject object; \
- gfloat score = (gfloat) scores_[d_]; \
- _type *box = boxes_ + boxbpi_ * d_; \
- anchor *a = &g_array_index (this->anchors, anchor, d_); \
- score = MAX (score, -100.0f); \
- score = MIN (score, 100.0f); \
- score = 1.0f / (1.0f + exp (-score)); \
- if (score < min_score_threshold) \
- continue; \
- y_center = (box[0] * 1.f) / i_height * a->h + a->y_center; \
- x_center = (box[1] * 1.f) / i_width * a->w + a->x_center; \
- h = (box[2] * 1.f) / i_height * a->h; \
- w = (box[3] * 1.f) / i_width * a->w; \
- ymin = y_center - h / 2.f; \
- xmin = x_center - w / 2.f; \
- y = ymin * i_height; \
- x = xmin * i_width; \
- width = w * i_width; \
- height = h * i_height; \
- object.class_id = 0; \
- object.x = MAX (0, x); \
- object.y = MAX (0, y); \
- object.width = width; \
- object.height = height; \
- object.prob = score; \
- object.valid = TRUE; \
- g_array_append_val (results, object); \
- } \
- } \
- break
-
-/** @brief Macro to simplify calling _get_objects_mp_palm_detection */
-#define _get_objects_mp_palm_detection_(type, typename) \
- _get_objects_mp_palm_detection (type, typename, (detections->data), (boxes->data), config)
-
class BoxProperties
{
public:
guint total_labels;
};
-
+/**
+ * @brief Class for Bounding box tensor decoder
+ */
class BoundingBox
{
public:
gboolean flag_use_label;
};
-/** @brief Mathematic inverse of sigmoid function, aka logit */
-static float
-logit (float x)
-{
- if (x <= 0.0f)
- return -INFINITY;
-
- if (x >= 1.0f)
- return INFINITY;
-
- return log (x / (1.0 - x));
-}
-
+/**
+ * @brief Class for MobilenetSSD box properties
+ */
class MobilenetSSD : public BoxProperties
{
public:
static const int BOX_SIZE = 4;
static const int DETECTION_MAX = 2034; /* add ssd_mobilenet v3 support */
- static const guint MAX_TENSORS = 2U;
-
- static const int THRESHOLD_IDX = 0;
- static const int Y_SCALE_IDX = 1;
- static const int X_SCALE_IDX = 2;
- static const int H_SCALE_IDX = 3;
- static const int W_SCALE_IDX = 4;
- static const int IOU_THRESHOLD_IDX = 5;
static const int PARAMS_MAX = 6;
- static constexpr gfloat DETECTION_THRESHOLD_DEFAULT = 0.5f;
- static constexpr gfloat THRESHOLD_IOU_DEFAULT = 0.5f;
- static constexpr gfloat Y_SCALE_DEFAULT = 10.0f;
- static constexpr gfloat X_SCALE_DEFAULT = 10.0f;
- static constexpr gfloat H_SCALE_DEFAULT = 5.0f;
- static constexpr gfloat W_SCALE_DEFAULT = 5.0f;
-
private:
char *box_prior_path; /**< Box Prior file path */
gfloat box_priors[BOX_SIZE][DETECTION_MAX + 1]; /** loaded box prior */
gfloat sigmoid_threshold; /** Inverse value of valid detection threshold in sigmoid domain */
};
+/**
+ * @brief Class for MobilenetSSDPP box properties
+ */
class MobilenetSSDPP : public BoxProperties
{
public:
int checkCompatible (const GstTensorsConfig *config);
GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
- static const int BOX_SIZE = 4;
- static const guint DETECTION_MAX = 100;
static const guint MAX_TENSORS = 4U;
- static const int LOCATIONS_IDX = 0;
- static const int CLASSES_IDX = 1;
- static const int SCORES_IDX = 2;
- static const int NUM_IDX = 3;
-
- static const gint LOCATIONS_DEFAULT = 3;
- static const gint CLASSES_DEFAULT = 1;
- static const gint SCORES_DEFAULT = 2;
- static const gint NUM_DEFAULT = 0;
- static constexpr gfloat THRESHOLD_DEFAULT = G_MINFLOAT;
private:
gint tensor_mapping[MAX_TENSORS]; /* Output tensor index mapping */
gfloat threshold; /* Detection threshold */
};
+/**
+ * @brief Class for OVDetection box properties
+ */
class OVDetection : public BoxProperties
{
public:
static const guint DEFAULT_SIZE_DETECTION_DESC = 7;
};
-#define YOLO_DETECTION_CONF_THRESHOLD (0.25)
-#define YOLO_DETECTION_IOU_THRESHOLD (0.45)
-
+/**
+ * @brief Class for YoloV5 box properties
+ */
class YoloV5 : public BoxProperties
{
public:
- YoloV5 ()
- : scaled_output (0), conf_threshold (YOLO_DETECTION_CONF_THRESHOLD),
- iou_threshold (YOLO_DETECTION_IOU_THRESHOLD)
- {
- }
+ YoloV5 ();
int setOptionInternal (const char *param);
int checkCompatible (const GstTensorsConfig *config);
GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
- static const int DEFAULT_DETECTION_NUM_INFO = 5;
-
private:
/* From option3, whether the output values are scaled or not */
int scaled_output;
gfloat iou_threshold;
};
+/**
+ * @brief Class for YoloV8 box properties
+ */
class YoloV8 : public BoxProperties
{
public:
- YoloV8 ()
- : scaled_output (0), conf_threshold (YOLO_DETECTION_CONF_THRESHOLD),
- iou_threshold (YOLO_DETECTION_IOU_THRESHOLD)
- {
- }
+ YoloV8 ();
int setOptionInternal (const char *param);
int checkCompatible (const GstTensorsConfig *config);
GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
- static const int DEFAULT_DETECTION_NUM_INFO = 4;
-
private:
/* From option3, whether the output values are scaled or not */
int scaled_output;
gfloat iou_threshold;
};
+/**
+ * @brief Class for MpPalmDetection box properties
+ */
class MpPalmDetection : public BoxProperties
{
public:
GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
- static const guint INFO_SIZE = 18;
- static const guint MAX_TENSORS = 2U;
- static const guint MAX_DETECTION = 2016;
-
- static const gint NUM_LAYERS_DEFAULT = 4;
- static constexpr gfloat MIN_SCALE_DEFAULT = 1.0;
- static constexpr gfloat MAX_SCALE_DEFAULT = 1.0;
- static constexpr gfloat OFFSET_X_DEFAULT = 0.5;
- static constexpr gfloat OFFSET_Y_DEFAULT = 0.5;
- static const gint STRIDE_0_DEFAULT = 8;
- static const gint STRIDE_1_DEFAULT = 16;
- static const gint STRIDE_2_DEFAULT = 16;
- static const gint STRIDE_3_DEFAULT = 16;
- static constexpr gfloat MIN_SCORE_THRESHOLD_DEFAULT = 0.5;
-
- static const int PARAMS_STRIDE_SIZE = 8;
static const int PARAMS_MAX = 13;
private: