[decoder] Remove subclasses in the header

author Yelin Jeong <yelini.jeong@samsung.com>

Wed, 8 May 2024 08:48:22 +0000 (17:48 +0900)

committer MyungJoo Ham <myungjoo.ham@samsung.com>

Thu, 16 May 2024 10:55:50 +0000 (19:55 +0900)
author Yelin Jeong <yelini.jeong@samsung.com>
Wed, 8 May 2024 08:48:22 +0000 (17:48 +0900)
committer MyungJoo Ham <myungjoo.ham@samsung.com>
Thu, 16 May 2024 10:55:50 +0000 (19:55 +0900)
diff --git a/ext/nnstreamer/tensor_decoder/box_properties/meson.build b/ext/nnstreamer/tensor_decoder/box_properties/meson.build

new file mode 100644 (file)

index 0000000..d07c9b4
--- /dev/null
+++ b/ext/nnstreamer/tensor_decoder/box_properties/meson.build
@@ -0,0 +1,7 @@
+decoder_sub_bounding_boxes_sources += files(
+'mobilenetssd.cc',
+  'mobilenetssdpp.cc',
+  'ovdetection.cc',
+  'yolo.cc',
+  'mppalmdetection.cc'
+)
diff --git a/ext/nnstreamer/tensor_decoder/box_properties/mobilenetssd.cc b/ext/nnstreamer/tensor_decoder/box_properties/mobilenetssd.cc

new file mode 100644 (file)

index 0000000..316c3ee
--- /dev/null
+++ b/ext/nnstreamer/tensor_decoder/box_properties/mobilenetssd.cc
@@ -0,0 +1,362 @@
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file        mobilenetssd.cc
+ * @date        13 May 2024
+ * @brief       NNStreamer tensor-decoder bounding box properties
+ *
+ * @see         https://github.com/nnstreamer/nnstreamer
+ * @author      Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug         No known bugs except for NYI items
+ *
+ */
+
+#include "../tensordec-boundingbox.h"
+
+#define MAX_TENSORS (2U)
+
+#define THRESHOLD_IDX (0)
+#define Y_SCALE_IDX (1)
+#define X_SCALE_IDX (2)
+#define H_SCALE_IDX (3)
+#define W_SCALE_IDX (4)
+#define IOU_THRESHOLD_IDX (5)
+
+#define DETECTION_THRESHOLD_DEFAULT (0.5f)
+#define THRESHOLD_IOU_DEFAULT (0.5f)
+#define Y_SCALE_DEFAULT (10.0f)
+#define X_SCALE_DEFAULT (10.0f)
+#define H_SCALE_DEFAULT (5.0f)
+#define W_SCALE_DEFAULT (5.0f)
+
+#define _expit(x) (1.f / (1.f + expf (-((float) x))))
+
+/**
+ * @brief C++-Template-like box location calculation for box-priors
+ * @bug This is not macro-argument safe. Use paranthesis!
+ * @param[in] bb The configuration, "bounding_boxes"
+ * @param[in] index The index (3rd dimension of BOX_SIZE:1:DETECTION_MAX:1)
+ * @param[in] total_labels The count of total labels. We can get this from input tensor info. (1st dimension of LABEL_SIZE:DETECTION_MAX:1:1)
+ * @param[in] boxprior The box prior data from the box file of SSD.
+ * @param[in] boxinputptr Cursor pointer of input + byte-per-index * index (box)
+ * @param[in] detinputptr Cursor pointer of input + byte-per-index * index (detection)
+ * @param[in] result The object returned. (pointer to object)
+ */
+#define _get_object_i_mobilenet_ssd(index, total_labels, boxprior,                \
+    boxinputptr, detinputptr, result, i_width, i_height)                          \
+  do {                                                                            \
+    unsigned int c;                                                               \
+    gfloat highscore = -FLT_MAX;                                                  \
+    float y_scale = params[Y_SCALE_IDX];                                          \
+    float x_scale = params[X_SCALE_IDX];                                          \
+    float h_scale = params[H_SCALE_IDX];                                          \
+    float w_scale = params[W_SCALE_IDX];                                          \
+    result->valid = FALSE;                                                        \
+    for (c = 1; c < total_labels; c++) {                                          \
+      if (detinputptr[c] >= sigmoid_threshold) {                                  \
+        gfloat score = _expit (detinputptr[c]);                                   \
+        float ycenter                                                             \
+            = boxinputptr[0] / y_scale * boxprior[2][index] + boxprior[0][index]; \
+        float xcenter                                                             \
+            = boxinputptr[1] / x_scale * boxprior[3][index] + boxprior[1][index]; \
+        float h = (float) expf (boxinputptr[2] / h_scale) * boxprior[2][index];   \
+        float w = (float) expf (boxinputptr[3] / w_scale) * boxprior[3][index];   \
+        float ymin = ycenter - h / 2.f;                                           \
+        float xmin = xcenter - w / 2.f;                                           \
+        int x = xmin * i_width;                                                   \
+        int y = ymin * i_height;                                                  \
+        int width = w * i_width;                                                  \
+        int height = h * i_height;                                                \
+        if (highscore < score) {                                                  \
+          result->class_id = c;                                                   \
+          result->x = MAX (0, x);                                                 \
+          result->y = MAX (0, y);                                                 \
+          result->width = width;                                                  \
+          result->height = height;                                                \
+          result->prob = score;                                                   \
+          result->valid = TRUE;                                                   \
+        }                                                                         \
+      }                                                                           \
+    }                                                                             \
+  } while (0);
+
+/**
+ * @brief C++-Template-like box location calculation for box-priors for Mobilenet SSD Model
+ * @param[in] type The tensor type of inputptr
+ * @param[in] typename nnstreamer enum corresponding to the type
+ * @param[in] boxprior The box prior data from the box file of MOBILENET_SSD.
+ * @param[in] boxinput Input Tensor Data (Boxes)
+ * @param[in] detinput Input Tensor Data (Detection). Null if not available. (numtensor ==1)
+ * @param[in] config Tensor configs of the input tensors
+ * @param[out] results The object returned. (GArray with detectedObject)
+ */
+#define _get_objects_mobilenet_ssd(_type, typename, boxprior, boxinput,                                      \
+    detinput, config, results, i_width, i_height, max_detection)                                             \
+  case typename:                                                                                             \
+    {                                                                                                        \
+      int d;                                                                                                 \
+      _type *boxinput_ = (_type *) boxinput;                                                                 \
+      size_t boxbpi = config->info.info[0].dimension[0];                                                     \
+      _type *detinput_ = (_type *) detinput;                                                                 \
+      size_t detbpi = config->info.info[1].dimension[0];                                                     \
+      int num = (DETECTION_MAX > max_detection) ? max_detection : DETECTION_MAX;                             \
+      detectedObject object = {                                                                              \
+        .valid = FALSE, .class_id = 0, .x = 0, .y = 0, .width = 0, .height = 0, .prob = .0, .tracking_id = 0 \
+      };                                                                                                     \
+      for (d = 0; d < num; d++) {                                                                            \
+        _get_object_i_mobilenet_ssd (d, detbpi, boxprior, (boxinput_ + (d * boxbpi)),                        \
+            (detinput_ + (d * detbpi)), (&object), i_width, i_height);                                       \
+        if (object.valid == TRUE) {                                                                          \
+          g_array_append_val (results, object);                                                              \
+        }                                                                                                    \
+      }                                                                                                      \
+    }                                                                                                        \
+    break
+
+
+/** @brief Macro to simplify calling _get_objects_mobilenet_ssd */
+#define _get_objects_mobilenet_ssd_(type, typename)                      \
+  _get_objects_mobilenet_ssd (type, typename, box_priors, (boxes->data), \
+      (detections->data), config, results, i_width, i_height, max_detection)
+
+/** @brief Mathematic inverse of sigmoid function, aka logit */
+static float
+logit (float x)
+{
+  if (x <= 0.0f)
+    return -INFINITY;
+
+  if (x >= 1.0f)
+    return INFINITY;
+
+  return log (x / (1.0 - x));
+}
+
+/** @brief Constructor of MobilenetSSD */
+MobilenetSSD::MobilenetSSD ()
+{
+  params[THRESHOLD_IDX] = DETECTION_THRESHOLD_DEFAULT;
+  params[Y_SCALE_IDX] = Y_SCALE_DEFAULT;
+  params[X_SCALE_IDX] = X_SCALE_DEFAULT;
+  params[H_SCALE_IDX] = H_SCALE_DEFAULT;
+  params[W_SCALE_IDX] = W_SCALE_DEFAULT;
+  params[IOU_THRESHOLD_IDX] = THRESHOLD_IOU_DEFAULT;
+  sigmoid_threshold = logit (DETECTION_THRESHOLD_DEFAULT);
+
+  max_detection = 0;
+  total_labels = 0;
+  box_prior_path = nullptr;
+}
+
+/**
+ * @brief Load box-prior data from a file
+ * @param[in/out] bdata The internal data.
+ * @return TRUE if loaded and configured. FALSE if failed to do so.
+ */
+int
+MobilenetSSD::mobilenet_ssd_loadBoxPrior ()
+{
+  gboolean failed = FALSE;
+  GError *err = NULL;
+  gchar **priors;
+  gchar *line = NULL;
+  gchar *contents = NULL;
+  guint row;
+  gint prev_reg = -1;
+
+  /* Read file contents */
+  if (!g_file_get_contents (box_prior_path, &contents, NULL, &err)) {
+    GST_ERROR ("Decoder/Bound-Box/SSD's box prior file %s cannot be read: %s",
+        box_prior_path, err->message);
+    g_clear_error (&err);
+    return FALSE;
+  }
+
+  priors = g_strsplit (contents, "\n", -1);
+  /* If given prior file is inappropriate, report back to tensor-decoder */
+  if (g_strv_length (priors) < BOX_SIZE) {
+    ml_loge ("The given prior file, %s, should have at least %d lines.\n",
+        box_prior_path, BOX_SIZE);
+    failed = TRUE;
+    goto error;
+  }
+
+  for (row = 0; row < BOX_SIZE; row++) {
+    gint column = 0, registered = 0;
+
+    line = priors[row];
+    if (line) {
+      gchar **list = g_strsplit_set (line, " \t,", -1);
+      gchar *word;
+
+      while ((word = list[column]) != NULL) {
+        column++;
+
+        if (word && *word) {
+          if (registered > DETECTION_MAX) {
+            GST_WARNING ("Decoder/Bound-Box/SSD's box prior data file has too many priors. %d >= %d",
+                registered, DETECTION_MAX);
+            break;
+          }
+          box_priors[row][registered] = (gfloat) g_ascii_strtod (word, NULL);
+          registered++;
+        }
+      }
+
+      g_strfreev (list);
+    }
+
+    if (prev_reg != -1 && prev_reg != registered) {
+      GST_ERROR ("Decoder/Bound-Box/SSD's box prior data file is not consistent.");
+      failed = TRUE;
+      break;
+    }
+    prev_reg = registered;
+  }
+
+error:
+  g_strfreev (priors);
+  g_free (contents);
+  return !failed;
+}
+
+/** @brief Set internal option of MobilenetSSD
+ *  @param[in] param The option string.
+ */
+int
+MobilenetSSD::setOptionInternal (const char *param)
+{
+  gchar **options;
+  int noptions, idx;
+  int ret = 1;
+
+  options = g_strsplit (param, ":", -1);
+  noptions = g_strv_length (options);
+
+  if (noptions > (PARAMS_MAX + 1))
+    noptions = PARAMS_MAX + 1;
+
+  if (box_prior_path) {
+    g_free (box_prior_path);
+    box_prior_path = nullptr;
+  }
+
+  box_prior_path = g_strdup (options[0]);
+
+  if (NULL != box_prior_path) {
+    ret = mobilenet_ssd_loadBoxPrior ();
+    if (ret == 0)
+      goto exit_mobilenet_ssd;
+  }
+
+  for (idx = 1; idx < noptions; idx++) {
+    if (strlen (options[idx]) == 0)
+      continue;
+    params[idx - 1] = strtod (options[idx], NULL);
+  }
+
+  sigmoid_threshold = logit (params[THRESHOLD_IDX]);
+
+  return TRUE;
+
+exit_mobilenet_ssd:
+  g_strfreev (options);
+  return ret;
+}
+
+/** @brief Check compatibility of given tensors config
+ *  @param[in] config The tensors config to check compatibility
+ */
+int
+MobilenetSSD::checkCompatible (const GstTensorsConfig *config)
+{
+  const uint32_t *dim1, *dim2;
+  int i;
+  guint max_detection, max_label;
+
+  if (!check_tensors (config, MAX_TENSORS))
+    return FALSE;
+
+  /* Check if the first tensor is compatible */
+  dim1 = config->info.info[0].dimension;
+  g_return_val_if_fail (dim1[0] == BOX_SIZE, FALSE);
+  g_return_val_if_fail (dim1[1] == 1, FALSE);
+  max_detection = dim1[2];
+  g_return_val_if_fail (max_detection > 0, FALSE);
+
+  /** @todo unused dimension value should be 0 */
+  for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
+    g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
+
+  /* Check if the second tensor is compatible */
+  dim2 = config->info.info[1].dimension;
+
+  max_label = dim2[0];
+  g_return_val_if_fail (max_label <= total_labels, FALSE);
+  if (max_label < total_labels)
+    GST_WARNING ("The given tensor (2nd) has max_label (first dimension: %u) smaller than the number of labels in labels file (%u).",
+        max_label, total_labels);
+  g_return_val_if_fail (max_detection == dim2[1], FALSE);
+  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
+    g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
+
+  /* Check consistency with max_detection */
+  if (this->max_detection == 0)
+    this->max_detection = max_detection;
+  else
+    g_return_val_if_fail (max_detection == this->max_detection, FALSE);
+
+  if (this->max_detection > DETECTION_MAX) {
+    GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+MobilenetSSD::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+  const GstTensorMemory *boxes, *detections = NULL;
+  GArray *results;
+  const guint num_tensors = config->info.num_tensors;
+
+  /**
+   * @todo 100 is a heuristic number of objects in a picture frame
+   *       We may have better "heuristics" than this.
+   *       For the sake of performance, don't make it too small.
+   */
+
+  /* Already checked with getOutCaps. Thus, this is an internal bug */
+  g_assert (num_tensors >= MAX_TENSORS);
+  results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), 100);
+
+  boxes = &input[0];
+  if (num_tensors >= MAX_TENSORS) /* lgtm[cpp/constant-comparison] */
+    detections = &input[1];
+
+  switch (config->info.info[0].type) {
+    _get_objects_mobilenet_ssd_ (uint8_t, _NNS_UINT8);
+    _get_objects_mobilenet_ssd_ (int8_t, _NNS_INT8);
+    _get_objects_mobilenet_ssd_ (uint16_t, _NNS_UINT16);
+    _get_objects_mobilenet_ssd_ (int16_t, _NNS_INT16);
+    _get_objects_mobilenet_ssd_ (uint32_t, _NNS_UINT32);
+    _get_objects_mobilenet_ssd_ (int32_t, _NNS_INT32);
+    _get_objects_mobilenet_ssd_ (uint64_t, _NNS_UINT64);
+    _get_objects_mobilenet_ssd_ (int64_t, _NNS_INT64);
+    _get_objects_mobilenet_ssd_ (float, _NNS_FLOAT32);
+    _get_objects_mobilenet_ssd_ (double, _NNS_FLOAT64);
+    default:
+      g_assert (0);
+  }
+  nms (results, params[IOU_THRESHOLD_IDX]);
+  return results;
+}
diff --git a/ext/nnstreamer/tensor_decoder/box_properties/mobilenetssdpp.cc b/ext/nnstreamer/tensor_decoder/box_properties/mobilenetssdpp.cc

new file mode 100644 (file)

index 0000000..570d3be
--- /dev/null
+++ b/ext/nnstreamer/tensor_decoder/box_properties/mobilenetssdpp.cc
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file        mobilenetssdpp.cc
+ * @date        13 May 2024
+ * @brief       NNStreamer tensor-decoder bounding box properties
+ *
+ * @see         https://github.com/nnstreamer/nnstreamer
+ * @author      Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug         No known bugs except for NYI items
+ *
+ */
+
+#include <stdio.h>
+#include "../tensordec-boundingbox.h"
+
+#define BOX_SIZE (4)
+#define DETECTION_MAX (100)
+#define LOCATIONS_IDX (0)
+#define CLASSES_IDX (1)
+#define SCORES_IDX (2)
+#define NUM_IDX (3)
+
+#define LOCATIONS_DEFAULT (3)
+#define CLASSES_DEFAULT (1)
+#define SCORES_DEFAULT (2)
+#define NUM_DEFAULT (0)
+#define THRESHOLD_DEFAULT (G_MINFLOAT)
+
+/**
+ * @brief C++-Template-like box location calculation for Tensorflow SSD model
+ * @param[in] type The tensor type of inputptr
+ * @param[in] typename nnstreamer enum corresponding to the type
+ * @param[in] numinput Input Tensor Data (The number of detections)
+ * @param[in] classinput Input Tensor Data (Detected classes)
+ * @param[in] scoreinput Input Tensor Data (Detection scores)
+ * @param[in] boxesinput Input Tensor Data (Boxes)
+ * @param[in] config Tensor configs of the input tensors
+ * @param[out] results The object returned. (GArray with detectedObject)
+ */
+#define _get_objects_mobilenet_ssd_pp(_type, typename, numinput, classinput,       \
+    scoreinput, boxesinput, config, results, i_width, i_height)                    \
+  case typename:                                                                   \
+    {                                                                              \
+      int d, num;                                                                  \
+      size_t boxbpi;                                                               \
+      _type *num_detection_ = (_type *) numinput;                                  \
+      _type *classes_ = (_type *) classinput;                                      \
+      _type *scores_ = (_type *) scoreinput;                                       \
+      _type *boxes_ = (_type *) boxesinput;                                        \
+      int locations_idx                                                            \
+          = get_mobilenet_ssd_pp_tensor_idx (MOBILENET_SSD_PP_BBOX_IDX_LOCATIONS); \
+      num = (int) num_detection_[0];                                               \
+      results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num);     \
+      boxbpi = config->info.info[locations_idx].dimension[0];                      \
+      for (d = 0; d < num; d++) {                                                  \
+        _type x1, x2, y1, y2;                                                      \
+        detectedObject object;                                                     \
+        if (scores_[d] < threshold)                                                \
+          continue;                                                                \
+        object.valid = TRUE;                                                       \
+        object.class_id = (int) classes_[d];                                       \
+        x1 = MIN (MAX (boxes_[d * boxbpi + 1], 0), 1);                             \
+        y1 = MIN (MAX (boxes_[d * boxbpi], 0), 1);                                 \
+        x2 = MIN (MAX (boxes_[d * boxbpi + 3], 0), 1);                             \
+        y2 = MIN (MAX (boxes_[d * boxbpi + 2], 0), 1);                             \
+        object.x = (int) (x1 * i_width);                                           \
+        object.y = (int) (y1 * i_height);                                          \
+        object.width = (int) ((x2 - x1) * i_width);                                \
+        object.height = (int) ((y2 - y1) * i_height);                              \
+        object.prob = scores_[d];                                                  \
+        g_array_append_val (results, object);                                      \
+      }                                                                            \
+    }                                                                              \
+    break
+
+/** @brief Macro to simplify calling _get_objects_mobilenet_ssd_pp */
+#define _get_objects_mobilenet_ssd_pp_(type, typename)                                 \
+  _get_objects_mobilenet_ssd_pp (type, typename, (mem_num->data), (mem_classes->data), \
+      (mem_scores->data), (mem_boxes->data), config, results, i_width, i_height)
+
+/**
+ * @brief MOBILENET SSD PostProcess Output tensor feature mapping.
+ */
+typedef enum {
+  MOBILENET_SSD_PP_BBOX_IDX_LOCATIONS = 0,
+  MOBILENET_SSD_PP_BBOX_IDX_CLASSES = 1,
+  MOBILENET_SSD_PP_BBOX_IDX_SCORES = 2,
+  MOBILENET_SSD_PP_BBOX_IDX_NUM = 3,
+  MOBILENET_SSD_PP_BBOX_IDX_UNKNOWN
+} mobilenet_ssd_pp_bbox_idx_t;
+
+MobilenetSSDPP::MobilenetSSDPP ()
+{
+  tensor_mapping[LOCATIONS_IDX] = LOCATIONS_DEFAULT;
+  tensor_mapping[CLASSES_IDX] = CLASSES_DEFAULT;
+  tensor_mapping[SCORES_IDX] = SCORES_DEFAULT;
+  tensor_mapping[NUM_IDX] = NUM_DEFAULT;
+  threshold = THRESHOLD_DEFAULT;
+}
+
+/** @brief Helper to retrieve tensor index by feature */
+int
+MobilenetSSDPP::get_mobilenet_ssd_pp_tensor_idx (int idx)
+{
+  return tensor_mapping[idx];
+}
+
+/** @brief Set internal option of MobilenetSSDPP
+ *  @param[in] param The option string.
+ */
+int
+MobilenetSSDPP::setOptionInternal (const char *param)
+{
+  int threshold_percent;
+  int ret = sscanf (param, "%i:%i:%i:%i,%i", &tensor_mapping[LOCATIONS_IDX],
+      &tensor_mapping[CLASSES_IDX], &tensor_mapping[SCORES_IDX],
+      &tensor_mapping[NUM_IDX], &threshold_percent);
+
+  if ((ret == EOF) || (ret < 5)) {
+    GST_ERROR ("Invalid options, must be \"locations idx:classes idx:scores idx:num idx,threshold\"");
+    return FALSE;
+  }
+
+  GST_INFO ("MOBILENET SSD POST PROCESS output tensors mapping: "
+            "locations idx (%d), classes idx (%d), scores idx (%d), num detections idx (%d)",
+      tensor_mapping[LOCATIONS_IDX], tensor_mapping[CLASSES_IDX],
+      tensor_mapping[SCORES_IDX], tensor_mapping[NUM_IDX]);
+
+  if ((threshold_percent > 100) || (threshold_percent < 0)) {
+    GST_ERROR ("Invalid MOBILENET SSD POST PROCESS threshold detection (%i), must be in range [0 100]",
+        threshold_percent);
+  } else {
+    threshold = threshold_percent / 100.0;
+  }
+
+  GST_INFO ("MOBILENET SSD POST PROCESS object detection threshold: %.2f", threshold);
+
+  return TRUE;
+}
+
+/** @brief Check compatibility of given tensors config */
+int
+MobilenetSSDPP::checkCompatible (const GstTensorsConfig *config)
+{
+  const uint32_t *dim1, *dim2, *dim3, *dim4;
+  int locations_idx, classes_idx, scores_idx, num_idx, i;
+
+  if (!check_tensors (config, MAX_TENSORS))
+    return FALSE;
+
+  locations_idx = get_mobilenet_ssd_pp_tensor_idx (LOCATIONS_IDX);
+  classes_idx = get_mobilenet_ssd_pp_tensor_idx (CLASSES_IDX);
+  scores_idx = get_mobilenet_ssd_pp_tensor_idx (SCORES_IDX);
+  num_idx = get_mobilenet_ssd_pp_tensor_idx (NUM_IDX);
+
+  /* Check if the number of detections tensor is compatible */
+  dim1 = config->info.info[num_idx].dimension;
+  g_return_val_if_fail (dim1[0] == 1, FALSE);
+  for (i = 1; i < NNS_TENSOR_RANK_LIMIT; ++i)
+    g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
+
+  /* Check if the classes & scores tensors are compatible */
+  dim2 = config->info.info[classes_idx].dimension;
+  dim3 = config->info.info[scores_idx].dimension;
+  g_return_val_if_fail (dim3[0] == dim2[0], FALSE);
+  max_detection = dim2[0];
+  for (i = 1; i < NNS_TENSOR_RANK_LIMIT; ++i) {
+    g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
+    g_return_val_if_fail (dim3[i] == 0 || dim3[i] == 1, FALSE);
+  }
+
+  /* Check if the bbox locations tensor is compatible */
+  dim4 = config->info.info[locations_idx].dimension;
+  g_return_val_if_fail (BOX_SIZE == dim4[0], FALSE);
+  g_return_val_if_fail (max_detection == dim4[1], FALSE);
+  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
+    g_return_val_if_fail (dim4[i] == 0 || dim4[i] == 1, FALSE);
+
+  /* Check consistency with max_detection */
+  if (this->max_detection == 0)
+    this->max_detection = max_detection;
+  else
+    g_return_val_if_fail (max_detection == this->max_detection, FALSE);
+
+  if (this->max_detection > DETECTION_MAX) {
+    GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
+    return FALSE;
+  }
+  return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+MobilenetSSDPP::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+
+  const GstTensorMemory *mem_num, *mem_classes, *mem_scores, *mem_boxes;
+  int locations_idx, classes_idx, scores_idx, num_idx;
+  GArray *results = NULL;
+  const guint num_tensors = config->info.num_tensors;
+
+  /* Already checked with getOutCaps. Thus, this is an internal bug */
+  g_assert (num_tensors >= MAX_TENSORS);
+
+  locations_idx = get_mobilenet_ssd_pp_tensor_idx (LOCATIONS_IDX);
+  classes_idx = get_mobilenet_ssd_pp_tensor_idx (CLASSES_IDX);
+  scores_idx = get_mobilenet_ssd_pp_tensor_idx (SCORES_IDX);
+  num_idx = get_mobilenet_ssd_pp_tensor_idx (NUM_IDX);
+
+  mem_num = &input[num_idx];
+  mem_classes = &input[classes_idx];
+  mem_scores = &input[scores_idx];
+  mem_boxes = &input[locations_idx];
+
+  switch (config->info.info[num_idx].type) {
+    _get_objects_mobilenet_ssd_pp_ (uint8_t, _NNS_UINT8);
+    _get_objects_mobilenet_ssd_pp_ (int8_t, _NNS_INT8);
+    _get_objects_mobilenet_ssd_pp_ (uint16_t, _NNS_UINT16);
+    _get_objects_mobilenet_ssd_pp_ (int16_t, _NNS_INT16);
+    _get_objects_mobilenet_ssd_pp_ (uint32_t, _NNS_UINT32);
+    _get_objects_mobilenet_ssd_pp_ (int32_t, _NNS_INT32);
+    _get_objects_mobilenet_ssd_pp_ (uint64_t, _NNS_UINT64);
+    _get_objects_mobilenet_ssd_pp_ (int64_t, _NNS_INT64);
+    _get_objects_mobilenet_ssd_pp_ (float, _NNS_FLOAT32);
+    _get_objects_mobilenet_ssd_pp_ (double, _NNS_FLOAT64);
+    default:
+      g_assert (0);
+  }
+  return results;
+}
diff --git a/ext/nnstreamer/tensor_decoder/box_properties/mppalmdetection.cc b/ext/nnstreamer/tensor_decoder/box_properties/mppalmdetection.cc

new file mode 100644 (file)

index 0000000..b529003
--- /dev/null
+++ b/ext/nnstreamer/tensor_decoder/box_properties/mppalmdetection.cc
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file        mppalmdetection.cc
+ * @date        13 May 2024
+ * @brief       NNStreamer tensor-decoder bounding box properties
+ *
+ * @see         https://github.com/nnstreamer/nnstreamer
+ * @author      Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug         No known bugs except for NYI items
+ *
+ */
+
+#include "../tensordec-boundingbox.h"
+
+#define INFO_SIZE (18)
+#define MAX_TENSORS (2U)
+#define MAX_DETECTION (2016)
+
+#define NUM_LAYERS_DEFAULT (4)
+#define MIN_SCALE_DEFAULT (1.0)
+#define MAX_SCALE_DEFAULT (1.0)
+#define OFFSET_X_DEFAULT (0.5)
+#define OFFSET_Y_DEFAULT (0.5)
+#define STRIDE_0_DEFAULT (8)
+#define STRIDE_1_DEFAULT (16)
+#define STRIDE_2_DEFAULT (16)
+#define STRIDE_3_DEFAULT (16)
+#define MIN_SCORE_THRESHOLD_DEFAULT (0.5)
+
+#define PARAMS_STRIDE_SIZE (8)
+
+/**
+ * @brief C++-Template-like box location calculation for Tensorflow model
+ * @param[in] type The tensor type of inputptr
+ * @param[in] typename nnstreamer enum corresponding to the type
+ * @param[in] scoreinput Input Tensor Data (Detection scores)
+ * @param[in] boxesinput Input Tensor Data (Boxes)
+ * @param[in] config Tensor configs of the input tensors
+ * @param[out] results The object returned. (GArray with detectedObject)
+ */
+#define _get_objects_mp_palm_detection(_type, typename, scoreinput, boxesinput, config) \
+  case typename:                                                                        \
+    {                                                                                   \
+      int d_;                                                                           \
+      _type *scores_ = (_type *) scoreinput;                                            \
+      _type *boxes_ = (_type *) boxesinput;                                             \
+      int num_ = max_detection;                                                         \
+      size_t boxbpi_ = config->info.info[0].dimension[0];                               \
+      results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num_);         \
+      for (d_ = 0; d_ < num_; d_++) {                                                   \
+        gfloat y_center, x_center, h, w;                                                \
+        gfloat ymin, xmin;                                                              \
+        int y, x, width, height;                                                        \
+        detectedObject object;                                                          \
+        gfloat score = (gfloat) scores_[d_];                                            \
+        _type *box = boxes_ + boxbpi_ * d_;                                             \
+        anchor *a = &g_array_index (this->anchors, anchor, d_);                         \
+        score = MAX (score, -100.0f);                                                   \
+        score = MIN (score, 100.0f);                                                    \
+        score = 1.0f / (1.0f + exp (-score));                                           \
+        if (score < min_score_threshold)                                                \
+          continue;                                                                     \
+        y_center = (box[0] * 1.f) / i_height * a->h + a->y_center;                      \
+        x_center = (box[1] * 1.f) / i_width * a->w + a->x_center;                       \
+        h = (box[2] * 1.f) / i_height * a->h;                                           \
+        w = (box[3] * 1.f) / i_width * a->w;                                            \
+        ymin = y_center - h / 2.f;                                                      \
+        xmin = x_center - w / 2.f;                                                      \
+        y = ymin * i_height;                                                            \
+        x = xmin * i_width;                                                             \
+        width = w * i_width;                                                            \
+        height = h * i_height;                                                          \
+        object.class_id = 0;                                                            \
+        object.x = MAX (0, x);                                                          \
+        object.y = MAX (0, y);                                                          \
+        object.width = width;                                                           \
+        object.height = height;                                                         \
+        object.prob = score;                                                            \
+        object.valid = TRUE;                                                            \
+        g_array_append_val (results, object);                                           \
+      }                                                                                 \
+    }                                                                                   \
+    break
+
+/** @brief Macro to simplify calling _get_objects_mp_palm_detection */
+#define _get_objects_mp_palm_detection_(type, typename) \
+  _get_objects_mp_palm_detection (type, typename, (detections->data), (boxes->data), config)
+
+#define mp_palm_detection_option(option, type, idx) \
+  if (noptions > idx)                               \
+  option = (type) g_strtod (options[idx], NULL)
+
+/**
+ * @brief Calculate anchor scale
+ */
+static gfloat
+_calculate_scale (float min_scale, float max_scale, int stride_index, int num_strides)
+{
+  if (num_strides == 1) {
+    return (min_scale + max_scale) * 0.5f;
+  } else {
+    return min_scale + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
+  }
+}
+
+/**
+ * @brief Generate anchor information
+ */
+void
+MpPalmDetection::mp_palm_detection_generate_anchors ()
+{
+  int layer_id = 0;
+  guint i;
+
+  while (layer_id < num_layers) {
+    GArray *aspect_ratios = g_array_new (FALSE, TRUE, sizeof (gfloat));
+    GArray *scales = g_array_new (FALSE, TRUE, sizeof (gfloat));
+    GArray *anchor_height = g_array_new (FALSE, TRUE, sizeof (gfloat));
+    GArray *anchor_width = g_array_new (FALSE, TRUE, sizeof (gfloat));
+
+    int last_same_stride_layer = layer_id;
+
+    while (last_same_stride_layer < num_layers
+           && strides[last_same_stride_layer] == strides[layer_id]) {
+      gfloat scale;
+      gfloat ratio = 1.0f;
+      g_array_append_val (aspect_ratios, ratio);
+      g_array_append_val (aspect_ratios, ratio);
+      scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer, num_layers);
+      g_array_append_val (scales, scale);
+      scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer + 1, num_layers);
+      g_array_append_val (scales, scale);
+      last_same_stride_layer++;
+    }
+
+    for (i = 0; i < aspect_ratios->len; ++i) {
+      const float ratio_sqrts = sqrt (g_array_index (aspect_ratios, gfloat, i));
+      const gfloat sc = g_array_index (scales, gfloat, i);
+      gfloat anchor_height_ = sc / ratio_sqrts;
+      gfloat anchor_width_ = sc * ratio_sqrts;
+      g_array_append_val (anchor_height, anchor_height_);
+      g_array_append_val (anchor_width, anchor_width_);
+    }
+
+    {
+      int feature_map_height = 0;
+      int feature_map_width = 0;
+      int x, y;
+      int anchor_id;
+
+      const int stride = strides[layer_id];
+      feature_map_height = ceil (1.0f * 192 / stride);
+      feature_map_width = ceil (1.0f * 192 / stride);
+      for (y = 0; y < feature_map_height; ++y) {
+        for (x = 0; x < feature_map_width; ++x) {
+          for (anchor_id = 0; anchor_id < (int) aspect_ratios->len; ++anchor_id) {
+            const float x_center = (x + offset_x) * 1.0f / feature_map_width;
+            const float y_center = (y + offset_y) * 1.0f / feature_map_height;
+
+            const anchor a = { .x_center = x_center,
+              .y_center = y_center,
+              .w = g_array_index (anchor_width, gfloat, anchor_id),
+              .h = g_array_index (anchor_height, gfloat, anchor_id) };
+            g_array_append_val (anchors, a);
+          }
+        }
+      }
+      layer_id = last_same_stride_layer;
+    }
+
+    g_array_free (anchor_height, TRUE);
+    g_array_free (anchor_width, TRUE);
+    g_array_free (aspect_ratios, TRUE);
+    g_array_free (scales, TRUE);
+  }
+}
+
+/** @brief Constructor of MpPalmDetection */
+MpPalmDetection::MpPalmDetection ()
+{
+  num_layers = NUM_LAYERS_DEFAULT;
+  min_scale = MIN_SCALE_DEFAULT;
+  max_scale = MAX_SCALE_DEFAULT;
+  offset_x = OFFSET_X_DEFAULT;
+  offset_y = OFFSET_Y_DEFAULT;
+  strides[0] = STRIDE_0_DEFAULT;
+  strides[1] = STRIDE_1_DEFAULT;
+  strides[2] = STRIDE_2_DEFAULT;
+  strides[3] = STRIDE_3_DEFAULT;
+  min_score_threshold = MIN_SCORE_THRESHOLD_DEFAULT;
+  anchors = g_array_new (FALSE, TRUE, sizeof (anchor));
+}
+
+/** @brief Destructor of MpPalmDetection */
+MpPalmDetection::~MpPalmDetection ()
+{
+  if (anchors)
+    g_array_free (anchors, TRUE);
+  anchors = NULL;
+}
+
+/** @brief Set internal option of MpPalmDetection
+ *  @param[in] param The option string.
+ */
+int
+MpPalmDetection::setOptionInternal (const char *param)
+{
+  /* Load palm detection info from option3 */
+  gchar **options;
+  int noptions, idx;
+  int ret = TRUE;
+
+  options = g_strsplit (param, ":", -1);
+  noptions = g_strv_length (options);
+
+  if (noptions > PARAMS_MAX) {
+    GST_ERROR ("Invalid MP PALM DETECTION PARAM length: %d", noptions);
+    ret = FALSE;
+    goto exit_mp_palm_detection;
+  }
+
+  mp_palm_detection_option (min_score_threshold, gfloat, 0);
+  mp_palm_detection_option (num_layers, gint, 1);
+  mp_palm_detection_option (min_scale, gfloat, 2);
+  mp_palm_detection_option (max_scale, gfloat, 3);
+  mp_palm_detection_option (offset_x, gfloat, 4);
+  mp_palm_detection_option (offset_y, gfloat, 5);
+
+  for (idx = 6; idx < num_layers + 6; idx++) {
+    mp_palm_detection_option (strides[idx - 6], gint, idx);
+  }
+  mp_palm_detection_generate_anchors ();
+
+exit_mp_palm_detection:
+  g_strfreev (options);
+  return ret;
+}
+
+/** @brief Check compatibility of given tensors config */
+int
+MpPalmDetection::checkCompatible (const GstTensorsConfig *config)
+{
+  const uint32_t *dim1, *dim2;
+  int i;
+  if (!check_tensors (config, MAX_TENSORS))
+    return FALSE;
+
+  /* Check if the first tensor is compatible */
+  dim1 = config->info.info[0].dimension;
+
+  g_return_val_if_fail (dim1[0] == INFO_SIZE, FALSE);
+  max_detection = dim1[1];
+  g_return_val_if_fail (max_detection > 0, FALSE);
+  g_return_val_if_fail (dim1[2] == 1, FALSE);
+  for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
+    g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
+
+  /* Check if the second tensor is compatible */
+  dim2 = config->info.info[1].dimension;
+  g_return_val_if_fail (dim2[0] == 1, FALSE);
+  g_return_val_if_fail (max_detection == dim2[1], FALSE);
+  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
+    g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
+
+  /* Check consistency with max_detection */
+  if (this->max_detection == 0)
+    this->max_detection = max_detection;
+  else
+    g_return_val_if_fail (max_detection == this->max_detection, FALSE);
+
+  if (this->max_detection > MAX_DETECTION) {
+    GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
+    return FALSE;
+  }
+  return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+MpPalmDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+  GArray *results = NULL;
+  const GstTensorMemory *boxes = NULL;
+  const GstTensorMemory *detections = NULL;
+  const guint num_tensors = config->info.num_tensors;
+
+  /* Already checked with getOutCaps. Thus, this is an internal bug */
+  g_assert (num_tensors >= MAX_TENSORS);
+
+  /* results will be allocated by _get_objects_mp_palm_detection_ */
+  boxes = &input[0];
+  detections = &input[1];
+  switch (config->info.info[0].type) {
+    _get_objects_mp_palm_detection_ (uint8_t, _NNS_UINT8);
+    _get_objects_mp_palm_detection_ (int8_t, _NNS_INT8);
+    _get_objects_mp_palm_detection_ (uint16_t, _NNS_UINT16);
+    _get_objects_mp_palm_detection_ (int16_t, _NNS_INT16);
+    _get_objects_mp_palm_detection_ (uint32_t, _NNS_UINT32);
+    _get_objects_mp_palm_detection_ (int32_t, _NNS_INT32);
+    _get_objects_mp_palm_detection_ (uint64_t, _NNS_UINT64);
+    _get_objects_mp_palm_detection_ (int64_t, _NNS_INT64);
+    _get_objects_mp_palm_detection_ (float, _NNS_FLOAT32);
+    _get_objects_mp_palm_detection_ (double, _NNS_FLOAT64);
+
+    default:
+      g_assert (0);
+  }
+  nms (results, 0.05f);
+  return results;
+}
diff --git a/ext/nnstreamer/tensor_decoder/box_properties/ovdetection.cc b/ext/nnstreamer/tensor_decoder/box_properties/ovdetection.cc

new file mode 100644 (file)

index 0000000..fcb141d
--- /dev/null
+++ b/ext/nnstreamer/tensor_decoder/box_properties/ovdetection.cc
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file        ovdetection.cc
+ * @date        13 May 2024
+ * @brief       NNStreamer tensor-decoder bounding box properties
+ *
+ * @see         https://github.com/nnstreamer/nnstreamer
+ * @author      Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug         No known bugs except for NYI items
+ *
+ */
+
+#include "../tensordec-boundingbox.h"
+
+#define OV_PERSON_DETECTION_CONF_THRESHOLD (0.8)
+/**
+ * @brief C++-Template-like box location calculation for OpenVino Person Detection Model
+ * @param[in] type The tensor type of inputptr
+ * @param[in] intputptr Input tensor Data
+ * @param[in] typename nnstreamer enum corresponding to the type
+ * @param[out] results The object returned. (GArray with detectedObject)
+ */
+#define _get_persons_ov(type, inputptr, typename, results)                                                   \
+  case typename:                                                                                             \
+    {                                                                                                        \
+      detectedObject object = {                                                                              \
+        .valid = FALSE, .class_id = 0, .x = 0, .y = 0, .width = 0, .height = 0, .prob = .0, .tracking_id = 0 \
+      };                                                                                                     \
+      type *typed_inputptr = (type *) inputptr;                                                              \
+      guint d;                                                                                               \
+                                                                                                             \
+      for (d = 1; d <= DETECTION_MAX; ++d) {                                                                 \
+        struct {                                                                                             \
+          type image_id;                                                                                     \
+          type label;                                                                                        \
+          type conf;                                                                                         \
+          type x_min;                                                                                        \
+          type y_min;                                                                                        \
+          type x_max;                                                                                        \
+          type y_max;                                                                                        \
+        } desc;                                                                                              \
+                                                                                                             \
+        memcpy (&desc, typed_inputptr, sizeof (desc));                                                       \
+        typed_inputptr += (sizeof (desc) / sizeof (type));                                                   \
+        object.valid = FALSE;                                                                                \
+                                                                                                             \
+        if ((int) desc.image_id < 0) {                                                                       \
+          max_detection = (d - 1);                                                                           \
+          break;                                                                                             \
+        }                                                                                                    \
+        object.class_id = -1;                                                                                \
+        object.x = (int) (desc.x_min * (type) i_width);                                                      \
+        object.y = (int) (desc.y_min * (type) i_height);                                                     \
+        object.width = (int) ((desc.x_max - desc.x_min) * (type) i_width);                                   \
+        object.height = (int) ((desc.y_max - desc.y_min) * (type) i_height);                                 \
+        if (desc.conf < OV_PERSON_DETECTION_CONF_THRESHOLD)                                                  \
+          continue;                                                                                          \
+        object.prob = 1;                                                                                     \
+        object.valid = TRUE;                                                                                 \
+        g_array_append_val (results, object);                                                                \
+      }                                                                                                      \
+    }                                                                                                        \
+    break
+
+/** @brief Check compatibility of given tensors config */
+int
+OVDetection::checkCompatible (const GstTensorsConfig *config)
+{
+  const guint *dim;
+  int i;
+  UNUSED (total_labels);
+
+  if (!check_tensors (config, DEFAULT_MAX_TENSORS))
+    return FALSE;
+
+  /**
+   * The shape of the ouput tensor is [7, N, 1, 1], where N is the maximum
+   * number (i.e., 200) of detected bounding boxes.
+   */
+  dim = config->info.info[0].dimension;
+  g_return_val_if_fail (dim[0] == DEFAULT_SIZE_DETECTION_DESC, FALSE);
+  g_return_val_if_fail (dim[1] == DETECTION_MAX, FALSE);
+  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
+    g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
+
+  return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+OVDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+  GArray *results = NULL;
+  const guint num_tensors = config->info.num_tensors;
+
+  /* Already checked with getOutCaps. Thus, this is an internal bug */
+  g_assert (num_tensors >= DEFAULT_MAX_TENSORS);
+
+  results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), DETECTION_MAX);
+  switch (config->info.info[0].type) {
+    _get_persons_ov (uint8_t, input[0].data, _NNS_UINT8, results);
+    _get_persons_ov (int8_t, input[0].data, _NNS_INT8, results);
+    _get_persons_ov (uint16_t, input[0].data, _NNS_UINT16, results);
+    _get_persons_ov (int16_t, input[0].data, _NNS_INT16, results);
+    _get_persons_ov (uint32_t, input[0].data, _NNS_UINT32, results);
+    _get_persons_ov (int32_t, input[0].data, _NNS_INT32, results);
+    _get_persons_ov (uint64_t, input[0].data, _NNS_UINT64, results);
+    _get_persons_ov (int64_t, input[0].data, _NNS_INT64, results);
+    _get_persons_ov (float, input[0].data, _NNS_FLOAT32, results);
+    _get_persons_ov (double, input[0].data, _NNS_FLOAT64, results);
+    default:
+      g_assert (0);
+  }
+  return results;
+}
diff --git a/ext/nnstreamer/tensor_decoder/box_properties/yolo.cc b/ext/nnstreamer/tensor_decoder/box_properties/yolo.cc

new file mode 100644 (file)

index 0000000..1ec34b6
--- /dev/null
+++ b/ext/nnstreamer/tensor_decoder/box_properties/yolo.cc
@@ -0,0 +1,287 @@
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * GStreamer / NNStreamer tensor-decoder bounding box properties
+ * Copyright (C) 2024 Yelin Jeong <yelini.jeong@samsung.com>
+ */
+/**
+ * @file        yolo.cc
+ * @date        13 May 2024
+ * @brief       NNStreamer tensor-decoder bounding box properties
+ *
+ * @see         https://github.com/nnstreamer/nnstreamer
+ * @author      Yelin Jeong <yelini.jeong@samsung.com>
+ * @bug         No known bugs except for NYI items
+ *
+ */
+
+#include <nnstreamer_plugin_api_util.h>
+#include "../tensordec-boundingbox.h"
+
+#define YOLO_DETECTION_CONF_THRESHOLD (0.25)
+#define YOLO_DETECTION_IOU_THRESHOLD (0.45)
+#define DEFAULT_DETECTION_NUM_INFO_YOLO5 (5)
+#define DEFAULT_DETECTION_NUM_INFO_YOLO8 (4)
+
+/** @brief Constructor of YoloV5 */
+YoloV5::YoloV5 ()
+{
+  scaled_output = 0;
+  conf_threshold = YOLO_DETECTION_CONF_THRESHOLD;
+  iou_threshold = YOLO_DETECTION_IOU_THRESHOLD;
+}
+
+/** @brief Set internal option of YoloV5
+ *  @param[in] param The option string.
+ */
+int
+YoloV5::setOptionInternal (const char *param)
+{
+  gchar **options;
+  int noptions;
+
+  options = g_strsplit (param, ":", -1);
+  noptions = g_strv_length (options);
+  if (noptions > 0)
+    scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
+  if (noptions > 1)
+    conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
+  if (noptions > 2)
+    iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
+
+  nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
+      scaled_output, conf_threshold, iou_threshold);
+
+  g_strfreev (options);
+  return TRUE;
+}
+
+/** @brief Check compatibility of given tensors config
+ *  @param[in] param The option string.
+ */
+int
+YoloV5::checkCompatible (const GstTensorsConfig *config)
+{
+  const guint *dim = config->info.info[0].dimension;
+  int i;
+
+  if (!check_tensors (config, 1U))
+    return FALSE;
+
+  max_detection = ((i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
+                      + (i_width / 8) * (i_height / 8))
+                  * 3;
+
+  g_return_val_if_fail (dim[0] == (total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO5), FALSE);
+  g_return_val_if_fail (dim[1] == max_detection, FALSE);
+  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
+    g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
+  return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+YoloV5::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+  GArray *results = NULL;
+
+  int bIdx, numTotalBox;
+  int cIdx, numTotalClass, cStartIdx, cIdxMax;
+  float *boxinput;
+  int is_output_scaled = scaled_output;
+
+  numTotalBox = max_detection;
+  numTotalClass = total_labels;
+  cStartIdx = DEFAULT_DETECTION_NUM_INFO_YOLO5;
+  cIdxMax = numTotalClass + cStartIdx;
+
+  /* boxinput[numTotalBox][cIdxMax] */
+  boxinput = (float *) input[0].data;
+
+  /** Only support for float type model */
+  g_assert (config->info.info[0].type == _NNS_FLOAT32);
+
+  results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
+  for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
+    float maxClassConfVal = -INFINITY;
+    int maxClassIdx = -1;
+    for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
+      if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
+        maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
+        maxClassIdx = cIdx;
+      }
+    }
+
+    if (maxClassConfVal * boxinput[bIdx * cIdxMax + 4] > conf_threshold) {
+      detectedObject object;
+      float cx, cy, w, h;
+      cx = boxinput[bIdx * cIdxMax + 0];
+      cy = boxinput[bIdx * cIdxMax + 1];
+      w = boxinput[bIdx * cIdxMax + 2];
+      h = boxinput[bIdx * cIdxMax + 3];
+
+      if (!is_output_scaled) {
+        cx *= (float) i_width;
+        cy *= (float) i_height;
+        w *= (float) i_width;
+        h *= (float) i_height;
+      }
+
+      object.x = (int) (MAX (0.f, (cx - w / 2.f)));
+      object.y = (int) (MAX (0.f, (cy - h / 2.f)));
+      object.width = (int) (MIN ((float) i_width, w));
+      object.height = (int) (MIN ((float) i_height, h));
+
+      object.prob = maxClassConfVal * boxinput[bIdx * cIdxMax + 4];
+      object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO_YOLO5;
+      object.tracking_id = 0;
+      object.valid = TRUE;
+      g_array_append_val (results, object);
+    }
+  }
+
+  nms (results, iou_threshold);
+  return results;
+}
+
+/** @brief Constructor of YoloV8 */
+YoloV8::YoloV8 ()
+{
+  scaled_output = 0;
+  conf_threshold = YOLO_DETECTION_CONF_THRESHOLD;
+  iou_threshold = YOLO_DETECTION_IOU_THRESHOLD;
+}
+
+/** @brief Set internal option of YoloV8 */
+int
+YoloV8::setOptionInternal (const char *param)
+{
+  gchar **options;
+  int noptions;
+
+  options = g_strsplit (param, ":", -1);
+  noptions = g_strv_length (options);
+  if (noptions > 0)
+    scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
+  if (noptions > 1)
+    conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
+  if (noptions > 2)
+    iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
+
+  nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
+      scaled_output, conf_threshold, iou_threshold);
+
+  g_strfreev (options);
+  return TRUE;
+}
+
+/** @brief Check compatibility of given tensors config */
+int
+YoloV8::checkCompatible (const GstTensorsConfig *config)
+{
+  const guint *dim = config->info.info[0].dimension;
+  int i;
+  if (!check_tensors (config, 1U)) {
+    gchar *typestr = gst_tensors_info_to_string (&config->info);
+    nns_loge ("Yolov8 bounding-box decoder needs at least 1 valid tensor. The given input tensor is: %s.",
+        typestr);
+    g_free (typestr);
+    return FALSE;
+  }
+  /** Only support for float type model */
+  if (config->info.info[0].type != _NNS_FLOAT32) {
+    gchar *typestr = gst_tensors_info_to_string (&config->info);
+    nns_loge ("Yolov8 bounding-box decoder accepts float32 input tensors only. The given input tensor is: %s.",
+        typestr);
+    g_free (typestr);
+    return FALSE;
+  }
+
+  max_detection = (i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
+                  + (i_width / 8) * (i_height / 8);
+
+  if (dim[0] != (total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO8) || dim[1] != max_detection) {
+    nns_loge ("yolov8 boundingbox decoder requires the input shape to be %d:%d:1. But given shape is %d:%d:1. `tensor_transform mode=transpose` would be helpful.",
+        total_labels + DEFAULT_DETECTION_NUM_INFO_YOLO8, max_detection, dim[0], dim[1]);
+    return FALSE;
+  }
+
+  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
+    if (dim[i] != 0 && dim[i] != 1) {
+      gchar *typestr = gst_tensors_info_to_string (&config->info);
+      nns_loge ("Yolov8 bounding-box decoder accepts RANK=2 tensors (3rd and later dimensions should be 1 or 0). The given input tensor is: %s.",
+          typestr);
+      g_free (typestr);
+      return FALSE;
+    }
+  return TRUE;
+}
+
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ */
+GArray *
+YoloV8::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
+{
+  GArray *results = NULL;
+  int bIdx, numTotalBox;
+  int cIdx, numTotalClass, cStartIdx, cIdxMax;
+  float *boxinput;
+  int is_output_scaled = scaled_output;
+  UNUSED (config);
+
+  numTotalBox = max_detection;
+  numTotalClass = total_labels;
+  cStartIdx = DEFAULT_DETECTION_NUM_INFO_YOLO8;
+  cIdxMax = numTotalClass + cStartIdx;
+
+  /* boxinput[numTotalBox][cIdxMax] */
+  boxinput = (float *) input[0].data;
+
+  results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
+  for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
+    float maxClassConfVal = -INFINITY;
+    int maxClassIdx = -1;
+    for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
+      if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
+        maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
+        maxClassIdx = cIdx;
+      }
+    }
+
+    if (maxClassConfVal > conf_threshold) {
+      detectedObject object;
+      float cx, cy, w, h;
+      cx = boxinput[bIdx * cIdxMax + 0];
+      cy = boxinput[bIdx * cIdxMax + 1];
+      w = boxinput[bIdx * cIdxMax + 2];
+      h = boxinput[bIdx * cIdxMax + 3];
+
+      if (!is_output_scaled) {
+        cx *= (float) i_width;
+        cy *= (float) i_height;
+        w *= (float) i_width;
+        h *= (float) i_height;
+      }
+
+      object.x = (int) (MAX (0.f, (cx - w / 2.f)));
+      object.y = (int) (MAX (0.f, (cy - h / 2.f)));
+      object.width = (int) (MIN ((float) i_width, w));
+      object.height = (int) (MIN ((float) i_height, h));
+
+      object.prob = maxClassConfVal;
+      object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO_YOLO8;
+      object.tracking_id = 0;
+      object.valid = TRUE;
+      g_array_append_val (results, object);
+    }
+  }
+
+  nms (results, iou_threshold);
+  return results;
+}
diff --git a/ext/nnstreamer/tensor_decoder/meson.build b/ext/nnstreamer/tensor_decoder/meson.build

index 466a445..b2b00b5 100644 (file)
--- a/ext/nnstreamer/tensor_decoder/meson.build
+++ b/ext/nnstreamer/tensor_decoder/meson.build
@@ -37,12 +37,13 @@ static_library('nnstreamer_decoder_image_labeling',
  )
  
  # bounding boxes
-decoder_sub_bounding_boxes_sources = [
+decoder_sub_bounding_boxes_sources = files(
    'tensordec-boundingbox.cc',
    'tensordecutil.c',
    'tensordec-font.c'
-]
+)
  
+subdir('box_properties')
  shared_library('nnstreamer_decoder_bounding_boxes',
    decoder_sub_bounding_boxes_sources,
    dependencies: [nnstreamer_dep, glib_dep, gst_dep, libm_dep],
diff --git a/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.cc b/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.cc

index ff0999d..a99b81d 100644 (file)
--- a/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.cc
+++ b/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.cc
@@ -26,81 +26,6 @@
   * @see         https://github.com/nnstreamer/nnstreamer
   * @author      MyungJoo Ham <myungjoo.ham@samsung.com>
   * @bug         No known bugs except for NYI items
- *
- * option1: Decoder mode of bounding box.
- *          Available: yolov5
- *                     mobilenet-ssd (single shot multibox detector with priors.)
- *                     mobilenet-ssd-postprocess
- *                     ov-person-detection
- *                     tf-ssd (deprecated, recommend to use mobilenet-ssd-postprocess)
- *                     tflite-ssd (deprecated, recommend to use mobilenet-ssd)
- * option2: Location of label file
- *          This is independent from option1
- * option3: Any option1-dependent values
- *          !!This depends on option1 values!!
- *          for yolov5 and yolov8 mode:
- *            The option3 requires up to 3 numbers, which tell
- *              - whether the output values are scaled or not
- *                0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)
- *              - the threshold of confidence (optional, default set to 0.25)
- *              - the threshold of IOU (optional, default set to 0.45)
- *            An example of option3 is "option3=0:0.65:0.6"
- *          for mobilenet-ssd mode:
- *            The option3 definition scheme is, in order, the following:
- *                - box priors location file (mandatory)
- *                - Detection threshold (optional, default set to 0.5)
- *                - Y box scale (optional, default set to 10.0)
- *                - X box scale (optional, default set to 10.0)
- *                - h box scale (optional, default set to 5.0)
- *                - w box scale (optional, default set to 5.0)
- *                - IOU box valid threshold (optional, default set to 0.5)
- *            The default parameters value could be set in the following ways:
- *            option3=box-priors.txt:0.5:10.0:10.0:5.0:5.0:0.5
- *            option3=box-priors.txt
- *            option3=box-priors.txt::::::
- *
- *            It's possible to set only few values, using the default values for
- *            those not specified through the command line.
- *            You could specify respectively the detection and IOU thresholds to 0.65
- *            and 0.6 with the option3 parameter as follow:
- *            option3=box-priors.txt:0.65:::::0.6
- *          for mobilenet-ssd-postprocess mode:
- *            The option3 is required to have 5 integer numbers, which tell
- *            the tensor-dec how to interpret the given tensor inputs.
- *            The first 4 numbers separated by colon, ':', designate which
- *            are location:class:score:number of the tensors.
- *            The last number separated by comma, ',' from the first 4 numbers
- *            designate the threshold in percent.
- *            In other words, "option3=%i:%i:%i:%i,%i".
- *          for mp-palm-detection mode:
- *            The option3 is required to have 5 float numbers, as following
- *                - box score threshold (mandatory)
- *                - number of layers for anchor generation (optional, default set to 4)
- *                - minimum scale factor for anchor generation (optional, default set to 1.0)
- *                - maximum scale factor for anchor generation (optional, default set to 1.0)
- *                - X offset (optional, default set to 0.5)
- *                - Y offset (optional, default set to 0.5)
- *                - strides for each layer for anchor generation (optional, default set to 8:16:16:16)
- *            The default parameter value could be set in the following ways:
- *            option3=0.5
- *            option3=0.5:4:0.2:0.8
- *            option3=0.5:4:1.0:1.0:0.5:0.5:8:16:16:16
- *
- * option4: Video Output Dimension (WIDTH:HEIGHT)
- *          This is independent from option1
- * option5: Input Dimension (WIDTH:HEIGHT)
- *          This is independent from option1
- * option6: Whether to track result bounding boxes or not
- *          0 (default, do not track)
- *          1 (track result bounding boxes, with naive centroid based algorithm)
- * option7: Whether to log the result bounding boxes or not
- *          0 (default, do not log)
- *          1 (log result bounding boxes)
- * option8: Box Style (NYI)
- *
- * MAJOR TODO: Support other colorspaces natively from _decode for performance gain
- * (e.g., BGRA, ARGB, ...)
- *
   */
  
  /** @todo _GNU_SOURCE fix build warning expf (nested-externs). remove this later. */
@@ -108,13 +33,11 @@
  #define _GNU_SOURCE
  #endif
  #include <glib.h>
-#include <gst/gst.h>
-#include <nnstreamer_log.h>
+
  #include <nnstreamer_plugin_api.h>
  #include <nnstreamer_plugin_api_decoder.h>
-#include <nnstreamer_util.h>
+
  #include <stdint.h>
-#include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
  #include "tensordec-boundingbox.h"
@@ -138,6 +61,22 @@ extern uint8_t rasters[][13];
   */
  static singleLineSprite_t singleLineSprite;
  
+/**
+ * @brief List of bounding-box decoding schemes in string
+ */
+static const char *bb_modes[] = {
+  [MOBILENET_SSD_BOUNDING_BOX] = "mobilenet-ssd",
+  [MOBILENET_SSD_PP_BOUNDING_BOX] = "mobilenet-ssd-postprocess",
+  [OV_PERSON_DETECTION_BOUNDING_BOX] = "ov-person-detection",
+  [OV_FACE_DETECTION_BOUNDING_BOX] = "ov-face-detection",
+  [OLDNAME_MOBILENET_SSD_BOUNDING_BOX] = "tflite-ssd",
+  [OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX] = "tf-ssd",
+  [YOLOV5_BOUNDING_BOX] = "yolov5",
+  [MP_PALM_DETECTION_BOUNDING_BOX] = "mp-palm-detection",
+  [YOLOV8_BOUNDING_BOX] = "yolov8",
+  NULL,
+};
+
  /** @brief tensordec-plugin's GstTensorDecoderDef callback */
  static int
  bb_init (void **pdata)
@@ -322,11 +261,96 @@ fini_bb (void)
    nnstreamer_decoder_exit (boundingBox.modename);
  }
  
+/** @brief Compare function for sorting distances. */
+static int
+distance_compare (const void *a, const void *b)
+{
+  const distanceArrayData *da = (const distanceArrayData *) a;
+  const distanceArrayData *db = (const distanceArrayData *) b;
+
+  if (da->distance < db->distance)
+    return -1;
+  if (da->distance > db->distance)
+    return 1;
+  return 0;
+}
+
+/**
+ * @brief Compare Function for g_array_sort with detectedObject.
+ */
+static gint
+compare_detection (gconstpointer _a, gconstpointer _b)
+{
+  const detectedObject *a = static_cast<const detectedObject *> (_a);
+  const detectedObject *b = static_cast<const detectedObject *> (_b);
+
+  /* Larger comes first */
+  return (a->prob > b->prob) ? -1 : ((a->prob == b->prob) ? 0 : 1);
+}
+
+/**
+ * @brief Calculate the intersected surface
+ */
+static gfloat
+iou (detectedObject *a, detectedObject *b)
+{
+  int x1 = MAX (a->x, b->x);
+  int y1 = MAX (a->y, b->y);
+  int x2 = MIN (a->x + a->width, b->x + b->width);
+  int y2 = MIN (a->y + a->height, b->y + b->height);
+  int w = MAX (0, (x2 - x1 + 1));
+  int h = MAX (0, (y2 - y1 + 1));
+  float inter = w * h;
+  float areaA = a->width * a->height;
+  float areaB = b->width * b->height;
+  float o = inter / (areaA + areaB - inter);
+  return (o >= 0) ? o : 0;
+}
+
+/**
+ * @brief Apply NMS to the given results (objects[DETECTION_MAX])
+ */
+void
+nms (GArray *results, gfloat threshold)
+{
+  guint boxes_size;
+  guint i, j;
+
+  boxes_size = results->len;
+  if (boxes_size == 0U)
+    return;
+
+  g_array_sort (results, compare_detection);
+
+  for (i = 0; i < boxes_size; i++) {
+    detectedObject *a = &g_array_index (results, detectedObject, i);
+    if (a->valid == TRUE) {
+      for (j = i + 1; j < boxes_size; j++) {
+        detectedObject *b = &g_array_index (results, detectedObject, j);
+        if (b->valid == TRUE) {
+          if (iou (a, b) > threshold) {
+            b->valid = FALSE;
+          }
+        }
+      }
+    }
+  }
+
+  i = 0;
+  do {
+    detectedObject *a = &g_array_index (results, detectedObject, i);
+    if (a->valid == FALSE)
+      g_array_remove_index (results, i);
+    else
+      i++;
+  } while (i < results->len);
+}
+
  /**
   * @brief check the num_tensors is valid
   */
-static int
-_check_tensors (const GstTensorsConfig *config, const unsigned int limit)
+int
+check_tensors (const GstTensorsConfig *config, const unsigned int limit)
  {
    unsigned int i;
    g_return_val_if_fail (config != NULL, FALSE);
@@ -344,20 +368,7 @@ _check_tensors (const GstTensorsConfig *config, const unsigned int limit)
    return TRUE;
  }
  
-/** @brief Compare function for sorting distances. */
-static int
-distance_compare (const void *a, const void *b)
-{
-  const distanceArrayData *da = (const distanceArrayData *) a;
-  const distanceArrayData *db = (const distanceArrayData *) b;
-
-  if (da->distance < db->distance)
-    return -1;
-  if (da->distance > db->distance)
-    return 1;
-  return 0;
-}
-
+/** @brief Constructor of BoundingBox */
  BoundingBox::BoundingBox ()
  {
    mode = BOUNDING_BOX_UNKNOWN;
@@ -380,6 +391,7 @@ BoundingBox::BoundingBox ()
    bdata = nullptr;
  }
  
+/** @brief destructor of BoundingBox */
  BoundingBox::~BoundingBox ()
  {
    _free_labels (&labeldata);
@@ -549,78 +561,6 @@ BoundingBox::updateCentroids (GArray *boxes)
  }
  
  /**
- * @brief Compare Function for g_array_sort with detectedObject.
- */
-static gint
-compare_detection (gconstpointer _a, gconstpointer _b)
-{
-  const detectedObject *a = static_cast<const detectedObject *> (_a);
-  const detectedObject *b = static_cast<const detectedObject *> (_b);
-
-  /* Larger comes first */
-  return (a->prob > b->prob) ? -1 : ((a->prob == b->prob) ? 0 : 1);
-}
-
-/**
- * @brief Calculate the intersected surface
- */
-static gfloat
-iou (detectedObject *a, detectedObject *b)
-{
-  int x1 = MAX (a->x, b->x);
-  int y1 = MAX (a->y, b->y);
-  int x2 = MIN (a->x + a->width, b->x + b->width);
-  int y2 = MIN (a->y + a->height, b->y + b->height);
-  int w = MAX (0, (x2 - x1 + 1));
-  int h = MAX (0, (y2 - y1 + 1));
-  float inter = w * h;
-  float areaA = a->width * a->height;
-  float areaB = b->width * b->height;
-  float o = inter / (areaA + areaB - inter);
-  return (o >= 0) ? o : 0;
-}
-
-/**
- * @brief Apply NMS to the given results (objects[DETECTION_MAX])
- * @param[in/out] results The results to be filtered with nms
- */
-static void
-nms (GArray *results, gfloat threshold)
-{
-  guint boxes_size;
-  guint i, j;
-
-  boxes_size = results->len;
-  if (boxes_size == 0U)
-    return;
-
-  g_array_sort (results, compare_detection);
-
-  for (i = 0; i < boxes_size; i++) {
-    detectedObject *a = &g_array_index (results, detectedObject, i);
-    if (a->valid == TRUE) {
-      for (j = i + 1; j < boxes_size; j++) {
-        detectedObject *b = &g_array_index (results, detectedObject, j);
-        if (b->valid == TRUE) {
-          if (iou (a, b) > threshold) {
-            b->valid = FALSE;
-          }
-        }
-      }
-    }
-  }
-
-  i = 0;
-  do {
-    detectedObject *a = &g_array_index (results, detectedObject, i);
-    if (a->valid == FALSE)
-      g_array_remove_index (results, i);
-    else
-      i++;
-  } while (i < results->len);
-}
-
-/**
   * @brief Draw with the given results (objects[DETECTION_MAX]) to the output buffer
   * @param[out] out_info The output buffer (RGBA plain)
   * @param[in] bdata The bounding-box internal data.
@@ -732,7 +672,7 @@ BoundingBox::logBoxes (GArray *results)
  }
  
  /**
- * @brief check the label relevant properties are valid
+ * @brief Check the label relevant properties are valid
   */
  gboolean
  BoundingBox::checkLabelProps ()
@@ -742,6 +682,9 @@ BoundingBox::checkLabelProps ()
    return TRUE;
  }
  
+/**
+ * @brief Set mode of bounding box
+ */
  int
  BoundingBox::setBoxDecodingMode (const char *param)
  {
@@ -789,6 +732,9 @@ BoundingBox::setBoxDecodingMode (const char *param)
    return TRUE;
  }
  
+/**
+ * @brief Set label path of bounding box
+ */
  int
  BoundingBox::setLabelPath (const char *param)
  {
@@ -812,6 +758,9 @@ BoundingBox::setLabelPath (const char *param)
    /** @todo Do not die for this */
  }
  
+/**
+ * @brief Set video size of bounding box
+ */
  int
  BoundingBox::setVideoSize (const char *param)
  {
@@ -838,6 +787,9 @@ BoundingBox::setVideoSize (const char *param)
    return TRUE;
  }
  
+/**
+ * @brief Set input model size of bounding box
+ */
  int
  BoundingBox::setInputModelSize (const char *param)
  {
@@ -864,6 +816,9 @@ BoundingBox::setInputModelSize (const char *param)
    return TRUE;
  }
  
+/**
+ * @brief Set option of bounding box
+ */
  int
  BoundingBox::setOption (BoundingBoxOption option, const char *param)
  {
@@ -893,6 +848,9 @@ BoundingBox::setOption (BoundingBoxOption option, const char *param)
    return TRUE;
  }
  
+/**
+ * @brief Get out caps of bounding box
+ */
  GstCaps *
  BoundingBox::getOutCaps (const GstTensorsConfig *config)
  {
@@ -913,6 +871,12 @@ BoundingBox::getOutCaps (const GstTensorsConfig *config)
    return caps;
  }
  
+/**
+ * @brief Decode input memory to out buffer
+ * @param[in] config The structure of input tensor info.
+ * @param[in] input The array of input tensor data. The maximum array size of input data is NNS_TENSOR_SIZE_LIMIT.
+ * @param[out] outbuf A sub-plugin should update or append proper memory for the negotiated media type.
+ */
  GstFlowReturn
  BoundingBox::decode (const GstTensorsConfig *config,
      const GstTensorMemory *input, GstBuffer *outbuf)
@@ -981,833 +945,3 @@ error_free:
  
    return GST_FLOW_ERROR;
  }
-
-MobilenetSSD::MobilenetSSD ()
-{
-  params[THRESHOLD_IDX] = DETECTION_THRESHOLD_DEFAULT;
-  params[Y_SCALE_IDX] = Y_SCALE_DEFAULT;
-  params[X_SCALE_IDX] = X_SCALE_DEFAULT;
-  params[H_SCALE_IDX] = H_SCALE_DEFAULT;
-  params[W_SCALE_IDX] = W_SCALE_DEFAULT;
-  params[IOU_THRESHOLD_IDX] = THRESHOLD_IOU_DEFAULT;
-  sigmoid_threshold = logit (DETECTION_THRESHOLD_DEFAULT);
-
-  max_detection = 0;
-  total_labels = 0;
-  box_prior_path = nullptr;
-}
-
-/**
- * @brief Load box-prior data from a file
- * @param[in/out] bdata The internal data.
- * @return TRUE if loaded and configured. FALSE if failed to do so.
- */
-int
-MobilenetSSD::mobilenet_ssd_loadBoxPrior ()
-{
-  gboolean failed = FALSE;
-  GError *err = NULL;
-  gchar **priors;
-  gchar *line = NULL;
-  gchar *contents = NULL;
-  guint row;
-  gint prev_reg = -1;
-
-  /* Read file contents */
-  if (!g_file_get_contents (box_prior_path, &contents, NULL, &err)) {
-    GST_ERROR ("Decoder/Bound-Box/SSD's box prior file %s cannot be read: %s",
-        box_prior_path, err->message);
-    g_clear_error (&err);
-    return FALSE;
-  }
-
-  priors = g_strsplit (contents, "\n", -1);
-  /* If given prior file is inappropriate, report back to tensor-decoder */
-  if (g_strv_length (priors) < BOX_SIZE) {
-    ml_loge ("The given prior file, %s, should have at least %d lines.\n",
-        box_prior_path, BOX_SIZE);
-    failed = TRUE;
-    goto error;
-  }
-
-  for (row = 0; row < BOX_SIZE; row++) {
-    gint column = 0, registered = 0;
-
-    line = priors[row];
-    if (line) {
-      gchar **list = g_strsplit_set (line, " \t,", -1);
-      gchar *word;
-
-      while ((word = list[column]) != NULL) {
-        column++;
-
-        if (word && *word) {
-          if (registered > DETECTION_MAX) {
-            GST_WARNING ("Decoder/Bound-Box/SSD's box prior data file has too many priors. %d >= %d",
-                registered, DETECTION_MAX);
-            break;
-          }
-          box_priors[row][registered] = (gfloat) g_ascii_strtod (word, NULL);
-          registered++;
-        }
-      }
-
-      g_strfreev (list);
-    }
-
-    if (prev_reg != -1 && prev_reg != registered) {
-      GST_ERROR ("Decoder/Bound-Box/SSD's box prior data file is not consistent.");
-      failed = TRUE;
-      break;
-    }
-    prev_reg = registered;
-  }
-
-error:
-  g_strfreev (priors);
-  g_free (contents);
-  return !failed;
-}
-
-int
-MobilenetSSD::setOptionInternal (const char *param)
-{
-  gchar **options;
-  int noptions, idx;
-  int ret = 1;
-
-  options = g_strsplit (param, ":", -1);
-  noptions = g_strv_length (options);
-
-  if (noptions > (PARAMS_MAX + 1))
-    noptions = PARAMS_MAX + 1;
-
-  if (box_prior_path) {
-    g_free (box_prior_path);
-    box_prior_path = nullptr;
-  }
-
-  box_prior_path = g_strdup (options[0]);
-
-  if (NULL != box_prior_path) {
-    ret = mobilenet_ssd_loadBoxPrior ();
-    if (ret == 0)
-      goto exit_mobilenet_ssd;
-  }
-
-  for (idx = 1; idx < noptions; idx++) {
-    if (strlen (options[idx]) == 0)
-      continue;
-    params[idx - 1] = strtod (options[idx], NULL);
-  }
-
-  sigmoid_threshold = logit (params[THRESHOLD_IDX]);
-
-  return TRUE;
-
-exit_mobilenet_ssd:
-  g_strfreev (options);
-  return ret;
-}
-
-int
-MobilenetSSD::checkCompatible (const GstTensorsConfig *config)
-{
-  const uint32_t *dim1, *dim2;
-  int i;
-  guint max_detection, max_label;
-
-  if (!_check_tensors (config, MAX_TENSORS))
-    return FALSE;
-
-  /* Check if the first tensor is compatible */
-  dim1 = config->info.info[0].dimension;
-  g_return_val_if_fail (dim1[0] == BOX_SIZE, FALSE);
-  g_return_val_if_fail (dim1[1] == 1, FALSE);
-  max_detection = dim1[2];
-  g_return_val_if_fail (max_detection > 0, FALSE);
-
-  /** @todo unused dimension value should be 0 */
-  for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
-    g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
-
-  /* Check if the second tensor is compatible */
-  dim2 = config->info.info[1].dimension;
-
-  max_label = dim2[0];
-  g_return_val_if_fail (max_label <= total_labels, FALSE);
-  if (max_label < total_labels)
-    GST_WARNING ("The given tensor (2nd) has max_label (first dimension: %u) smaller than the number of labels in labels file (%u).",
-        max_label, total_labels);
-  g_return_val_if_fail (max_detection == dim2[1], FALSE);
-  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
-    g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
-
-  /* Check consistency with max_detection */
-  if (this->max_detection == 0)
-    this->max_detection = max_detection;
-  else
-    g_return_val_if_fail (max_detection == this->max_detection, FALSE);
-
-  if (this->max_detection > DETECTION_MAX) {
-    GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
-    return FALSE;
-  }
-
-  return TRUE;
-}
-
-GArray *
-MobilenetSSD::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
-  const GstTensorMemory *boxes, *detections = NULL;
-  GArray *results;
-  const guint num_tensors = config->info.num_tensors;
-
-  /**
-   * @todo 100 is a heuristic number of objects in a picture frame
-   *       We may have better "heuristics" than this.
-   *       For the sake of performance, don't make it too small.
-   */
-
-  /* Already checked with getOutCaps. Thus, this is an internal bug */
-  g_assert (num_tensors >= MAX_TENSORS);
-  results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), 100);
-
-  boxes = &input[0];
-  if (num_tensors >= MAX_TENSORS) /* lgtm[cpp/constant-comparison] */
-    detections = &input[1];
-
-  switch (config->info.info[0].type) {
-    _get_objects_mobilenet_ssd_ (uint8_t, _NNS_UINT8);
-    _get_objects_mobilenet_ssd_ (int8_t, _NNS_INT8);
-    _get_objects_mobilenet_ssd_ (uint16_t, _NNS_UINT16);
-    _get_objects_mobilenet_ssd_ (int16_t, _NNS_INT16);
-    _get_objects_mobilenet_ssd_ (uint32_t, _NNS_UINT32);
-    _get_objects_mobilenet_ssd_ (int32_t, _NNS_INT32);
-    _get_objects_mobilenet_ssd_ (uint64_t, _NNS_UINT64);
-    _get_objects_mobilenet_ssd_ (int64_t, _NNS_INT64);
-    _get_objects_mobilenet_ssd_ (float, _NNS_FLOAT32);
-    _get_objects_mobilenet_ssd_ (double, _NNS_FLOAT64);
-    default:
-      g_assert (0);
-  }
-  nms (results, params[IOU_THRESHOLD_IDX]);
-  return results;
-}
-
-MobilenetSSDPP::MobilenetSSDPP ()
-{
-  tensor_mapping[LOCATIONS_IDX] = LOCATIONS_DEFAULT;
-  tensor_mapping[CLASSES_IDX] = CLASSES_DEFAULT;
-  tensor_mapping[SCORES_IDX] = SCORES_DEFAULT;
-  tensor_mapping[NUM_IDX] = NUM_DEFAULT;
-  threshold = THRESHOLD_DEFAULT;
-}
-
-/** @brief Helper to retrieve tensor index by feature */
-int
-MobilenetSSDPP::get_mobilenet_ssd_pp_tensor_idx (int idx)
-{
-  return tensor_mapping[idx];
-}
-
-int
-MobilenetSSDPP::setOptionInternal (const char *param)
-{
-  int threshold_percent;
-  int ret = sscanf (param, "%i:%i:%i:%i,%i", &tensor_mapping[LOCATIONS_IDX],
-      &tensor_mapping[CLASSES_IDX], &tensor_mapping[SCORES_IDX],
-      &tensor_mapping[NUM_IDX], &threshold_percent);
-
-  if ((ret == EOF) || (ret < 5)) {
-    GST_ERROR ("Invalid options, must be \"locations idx:classes idx:scores idx:num idx,threshold\"");
-    return FALSE;
-  }
-
-  GST_INFO ("MOBILENET SSD POST PROCESS output tensors mapping: "
-            "locations idx (%d), classes idx (%d), scores idx (%d), num detections idx (%d)",
-      tensor_mapping[LOCATIONS_IDX], tensor_mapping[CLASSES_IDX],
-      tensor_mapping[SCORES_IDX], tensor_mapping[NUM_IDX]);
-
-  if ((threshold_percent > 100) || (threshold_percent < 0)) {
-    GST_ERROR ("Invalid MOBILENET SSD POST PROCESS threshold detection (%i), must be in range [0 100]",
-        threshold_percent);
-  } else {
-    threshold = threshold_percent / 100.0;
-  }
-
-  GST_INFO ("MOBILENET SSD POST PROCESS object detection threshold: %.2f", threshold);
-
-  return TRUE;
-}
-
-int
-MobilenetSSDPP::checkCompatible (const GstTensorsConfig *config)
-{
-  const uint32_t *dim1, *dim2, *dim3, *dim4;
-  int locations_idx, classes_idx, scores_idx, num_idx, i;
-
-  if (!_check_tensors (config, MAX_TENSORS))
-    return FALSE;
-
-  locations_idx = get_mobilenet_ssd_pp_tensor_idx (LOCATIONS_IDX);
-  classes_idx = get_mobilenet_ssd_pp_tensor_idx (CLASSES_IDX);
-  scores_idx = get_mobilenet_ssd_pp_tensor_idx (SCORES_IDX);
-  num_idx = get_mobilenet_ssd_pp_tensor_idx (NUM_IDX);
-
-  /* Check if the number of detections tensor is compatible */
-  dim1 = config->info.info[num_idx].dimension;
-  g_return_val_if_fail (dim1[0] == 1, FALSE);
-  for (i = 1; i < NNS_TENSOR_RANK_LIMIT; ++i)
-    g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
-
-  /* Check if the classes & scores tensors are compatible */
-  dim2 = config->info.info[classes_idx].dimension;
-  dim3 = config->info.info[scores_idx].dimension;
-  g_return_val_if_fail (dim3[0] == dim2[0], FALSE);
-  max_detection = dim2[0];
-  for (i = 1; i < NNS_TENSOR_RANK_LIMIT; ++i) {
-    g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
-    g_return_val_if_fail (dim3[i] == 0 || dim3[i] == 1, FALSE);
-  }
-
-  /* Check if the bbox locations tensor is compatible */
-  dim4 = config->info.info[locations_idx].dimension;
-  g_return_val_if_fail (BOX_SIZE == dim4[0], FALSE);
-  g_return_val_if_fail (max_detection == dim4[1], FALSE);
-  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
-    g_return_val_if_fail (dim4[i] == 0 || dim4[i] == 1, FALSE);
-
-  /* Check consistency with max_detection */
-  if (this->max_detection == 0)
-    this->max_detection = max_detection;
-  else
-    g_return_val_if_fail (max_detection == this->max_detection, FALSE);
-
-  if (this->max_detection > DETECTION_MAX) {
-    GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
-    return FALSE;
-  }
-  return TRUE;
-}
-
-GArray *
-MobilenetSSDPP::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
-
-  const GstTensorMemory *mem_num, *mem_classes, *mem_scores, *mem_boxes;
-  int locations_idx, classes_idx, scores_idx, num_idx;
-  GArray *results = NULL;
-  const guint num_tensors = config->info.num_tensors;
-
-  /* Already checked with getOutCaps. Thus, this is an internal bug */
-  g_assert (num_tensors >= MAX_TENSORS);
-
-  locations_idx = get_mobilenet_ssd_pp_tensor_idx (LOCATIONS_IDX);
-  classes_idx = get_mobilenet_ssd_pp_tensor_idx (CLASSES_IDX);
-  scores_idx = get_mobilenet_ssd_pp_tensor_idx (SCORES_IDX);
-  num_idx = get_mobilenet_ssd_pp_tensor_idx (NUM_IDX);
-
-  mem_num = &input[num_idx];
-  mem_classes = &input[classes_idx];
-  mem_scores = &input[scores_idx];
-  mem_boxes = &input[locations_idx];
-
-  switch (config->info.info[num_idx].type) {
-    _get_objects_mobilenet_ssd_pp_ (uint8_t, _NNS_UINT8);
-    _get_objects_mobilenet_ssd_pp_ (int8_t, _NNS_INT8);
-    _get_objects_mobilenet_ssd_pp_ (uint16_t, _NNS_UINT16);
-    _get_objects_mobilenet_ssd_pp_ (int16_t, _NNS_INT16);
-    _get_objects_mobilenet_ssd_pp_ (uint32_t, _NNS_UINT32);
-    _get_objects_mobilenet_ssd_pp_ (int32_t, _NNS_INT32);
-    _get_objects_mobilenet_ssd_pp_ (uint64_t, _NNS_UINT64);
-    _get_objects_mobilenet_ssd_pp_ (int64_t, _NNS_INT64);
-    _get_objects_mobilenet_ssd_pp_ (float, _NNS_FLOAT32);
-    _get_objects_mobilenet_ssd_pp_ (double, _NNS_FLOAT64);
-    default:
-      g_assert (0);
-  }
-  return results;
-}
-
-int
-OVDetection::checkCompatible (const GstTensorsConfig *config)
-{
-  const guint *dim;
-  int i;
-  UNUSED (total_labels);
-
-  if (!_check_tensors (config, DEFAULT_MAX_TENSORS))
-    return FALSE;
-
-  /**
-   * The shape of the ouput tensor is [7, N, 1, 1], where N is the maximum
-   * number (i.e., 200) of detected bounding boxes.
-   */
-  dim = config->info.info[0].dimension;
-  g_return_val_if_fail (dim[0] == DEFAULT_SIZE_DETECTION_DESC, FALSE);
-  g_return_val_if_fail (dim[1] == DETECTION_MAX, FALSE);
-  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
-    g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
-
-  return TRUE;
-}
-
-GArray *
-OVDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
-  GArray *results = NULL;
-  const guint num_tensors = config->info.num_tensors;
-
-  /* Already checked with getOutCaps. Thus, this is an internal bug */
-  g_assert (num_tensors >= DEFAULT_MAX_TENSORS);
-
-  results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), DETECTION_MAX);
-  switch (config->info.info[0].type) {
-    _get_persons_ov (uint8_t, input[0].data, _NNS_UINT8, results);
-    _get_persons_ov (int8_t, input[0].data, _NNS_INT8, results);
-    _get_persons_ov (uint16_t, input[0].data, _NNS_UINT16, results);
-    _get_persons_ov (int16_t, input[0].data, _NNS_INT16, results);
-    _get_persons_ov (uint32_t, input[0].data, _NNS_UINT32, results);
-    _get_persons_ov (int32_t, input[0].data, _NNS_INT32, results);
-    _get_persons_ov (uint64_t, input[0].data, _NNS_UINT64, results);
-    _get_persons_ov (int64_t, input[0].data, _NNS_INT64, results);
-    _get_persons_ov (float, input[0].data, _NNS_FLOAT32, results);
-    _get_persons_ov (double, input[0].data, _NNS_FLOAT64, results);
-    default:
-      g_assert (0);
-  }
-  return results;
-}
-
-int
-YoloV5::setOptionInternal (const char *param)
-{
-  gchar **options;
-  int noptions;
-
-  options = g_strsplit (param, ":", -1);
-  noptions = g_strv_length (options);
-  if (noptions > 0)
-    scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
-  if (noptions > 1)
-    conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
-  if (noptions > 2)
-    iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
-
-  nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
-      scaled_output, conf_threshold, iou_threshold);
-
-  g_strfreev (options);
-  return TRUE;
-}
-
-int
-YoloV5::checkCompatible (const GstTensorsConfig *config)
-{
-  const guint *dim = config->info.info[0].dimension;
-  int i;
-
-  if (!_check_tensors (config, 1U))
-    return FALSE;
-
-  max_detection = ((i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
-                      + (i_width / 8) * (i_height / 8))
-                  * 3;
-
-  g_return_val_if_fail (dim[0] == (total_labels + DEFAULT_DETECTION_NUM_INFO), FALSE);
-  g_return_val_if_fail (dim[1] == max_detection, FALSE);
-  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
-    g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
-  return TRUE;
-}
-
-GArray *
-YoloV5::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
-  GArray *results = NULL;
-
-  int bIdx, numTotalBox;
-  int cIdx, numTotalClass, cStartIdx, cIdxMax;
-  float *boxinput;
-  int is_output_scaled = scaled_output;
-
-  numTotalBox = max_detection;
-  numTotalClass = total_labels;
-  cStartIdx = DEFAULT_DETECTION_NUM_INFO;
-  cIdxMax = numTotalClass + cStartIdx;
-
-  /* boxinput[numTotalBox][cIdxMax] */
-  boxinput = (float *) input[0].data;
-
-  /** Only support for float type model */
-  g_assert (config->info.info[0].type == _NNS_FLOAT32);
-
-  results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
-  for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
-    float maxClassConfVal = -INFINITY;
-    int maxClassIdx = -1;
-    for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
-      if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
-        maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
-        maxClassIdx = cIdx;
-      }
-    }
-
-    if (maxClassConfVal * boxinput[bIdx * cIdxMax + 4] > conf_threshold) {
-      detectedObject object;
-      float cx, cy, w, h;
-      cx = boxinput[bIdx * cIdxMax + 0];
-      cy = boxinput[bIdx * cIdxMax + 1];
-      w = boxinput[bIdx * cIdxMax + 2];
-      h = boxinput[bIdx * cIdxMax + 3];
-
-      if (!is_output_scaled) {
-        cx *= (float) i_width;
-        cy *= (float) i_height;
-        w *= (float) i_width;
-        h *= (float) i_height;
-      }
-
-      object.x = (int) (MAX (0.f, (cx - w / 2.f)));
-      object.y = (int) (MAX (0.f, (cy - h / 2.f)));
-      object.width = (int) (MIN ((float) i_width, w));
-      object.height = (int) (MIN ((float) i_height, h));
-
-      object.prob = maxClassConfVal * boxinput[bIdx * cIdxMax + 4];
-      object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO;
-      object.tracking_id = 0;
-      object.valid = TRUE;
-      g_array_append_val (results, object);
-    }
-  }
-
-  nms (results, iou_threshold);
-  return results;
-}
-
-
-int
-YoloV8::setOptionInternal (const char *param)
-{
-  gchar **options;
-  int noptions;
-
-  options = g_strsplit (param, ":", -1);
-  noptions = g_strv_length (options);
-  if (noptions > 0)
-    scaled_output = (int) g_ascii_strtoll (options[0], NULL, 10);
-  if (noptions > 1)
-    conf_threshold = (gfloat) g_ascii_strtod (options[1], NULL);
-  if (noptions > 2)
-    iou_threshold = (gfloat) g_ascii_strtod (options[2], NULL);
-
-  nns_logi ("Setting YOLOV5/YOLOV8 decoder as scaled_output: %d, conf_threshold: %.2f, iou_threshold: %.2f",
-      scaled_output, conf_threshold, iou_threshold);
-
-  g_strfreev (options);
-  return TRUE;
-}
-
-int
-YoloV8::checkCompatible (const GstTensorsConfig *config)
-{
-  const guint *dim = config->info.info[0].dimension;
-  int i;
-  if (!_check_tensors (config, 1U))
-    return FALSE;
-
-  /** Only support for float type model */
-  g_return_val_if_fail (config->info.info[0].type == _NNS_FLOAT32, FALSE);
-
-  max_detection = (i_width / 32) * (i_height / 32) + (i_width / 16) * (i_height / 16)
-                  + (i_width / 8) * (i_height / 8);
-
-  if (dim[0] != (total_labels + DEFAULT_DETECTION_NUM_INFO) || dim[1] != max_detection) {
-    nns_loge ("yolov8 boundingbox decoder requires the input shape to be %d:%d:1. But given shape is %d:%d:1. `tensor_transform mode=transpose` would be helpful.",
-        total_labels + DEFAULT_DETECTION_NUM_INFO, max_detection, dim[0], dim[1]);
-    return FALSE;
-  }
-
-  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; ++i)
-    g_return_val_if_fail (dim[i] == 0 || dim[i] == 1, FALSE);
-  return TRUE;
-}
-
-GArray *
-YoloV8::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
-  GArray *results = NULL;
-  int bIdx, numTotalBox;
-  int cIdx, numTotalClass, cStartIdx, cIdxMax;
-  float *boxinput;
-  int is_output_scaled = scaled_output;
-  UNUSED (config);
-
-  numTotalBox = max_detection;
-  numTotalClass = total_labels;
-  cStartIdx = DEFAULT_DETECTION_NUM_INFO;
-  cIdxMax = numTotalClass + cStartIdx;
-
-  /* boxinput[numTotalBox][cIdxMax] */
-  boxinput = (float *) input[0].data;
-
-  results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), numTotalBox);
-  for (bIdx = 0; bIdx < numTotalBox; ++bIdx) {
-    float maxClassConfVal = -INFINITY;
-    int maxClassIdx = -1;
-    for (cIdx = cStartIdx; cIdx < cIdxMax; ++cIdx) {
-      if (boxinput[bIdx * cIdxMax + cIdx] > maxClassConfVal) {
-        maxClassConfVal = boxinput[bIdx * cIdxMax + cIdx];
-        maxClassIdx = cIdx;
-      }
-    }
-
-    if (maxClassConfVal > conf_threshold) {
-      detectedObject object;
-      float cx, cy, w, h;
-      cx = boxinput[bIdx * cIdxMax + 0];
-      cy = boxinput[bIdx * cIdxMax + 1];
-      w = boxinput[bIdx * cIdxMax + 2];
-      h = boxinput[bIdx * cIdxMax + 3];
-
-      if (!is_output_scaled) {
-        cx *= (float) i_width;
-        cy *= (float) i_height;
-        w *= (float) i_width;
-        h *= (float) i_height;
-      }
-
-      object.x = (int) (MAX (0.f, (cx - w / 2.f)));
-      object.y = (int) (MAX (0.f, (cy - h / 2.f)));
-      object.width = (int) (MIN ((float) i_width, w));
-      object.height = (int) (MIN ((float) i_height, h));
-
-      object.prob = maxClassConfVal;
-      object.class_id = maxClassIdx - DEFAULT_DETECTION_NUM_INFO;
-      object.tracking_id = 0;
-      object.valid = TRUE;
-      g_array_append_val (results, object);
-    }
-  }
-
-  nms (results, iou_threshold);
-  return results;
-}
-
-#define mp_palm_detection_option(option, type, idx) \
-  if (noptions > idx)                               \
-  option = (type) g_strtod (options[idx], NULL)
-
-MpPalmDetection::MpPalmDetection ()
-{
-  num_layers = NUM_LAYERS_DEFAULT;
-  min_scale = MIN_SCALE_DEFAULT;
-  max_scale = MAX_SCALE_DEFAULT;
-  offset_x = OFFSET_X_DEFAULT;
-  offset_y = OFFSET_Y_DEFAULT;
-  strides[0] = STRIDE_0_DEFAULT;
-  strides[1] = STRIDE_1_DEFAULT;
-  strides[2] = STRIDE_2_DEFAULT;
-  strides[3] = STRIDE_3_DEFAULT;
-  min_score_threshold = MIN_SCORE_THRESHOLD_DEFAULT;
-  anchors = g_array_new (FALSE, TRUE, sizeof (anchor));
-}
-
-MpPalmDetection::~MpPalmDetection ()
-{
-  if (anchors)
-    g_array_free (anchors, TRUE);
-  anchors = NULL;
-}
-
-/**
- * @brief Calculate anchor scale
- */
-static gfloat
-_calculate_scale (float min_scale, float max_scale, int stride_index, int num_strides)
-{
-  if (num_strides == 1) {
-    return (min_scale + max_scale) * 0.5f;
-  } else {
-    return min_scale + (max_scale - min_scale) * 1.0 * stride_index / (num_strides - 1.0f);
-  }
-}
-
-/**
- * @brief Generate anchor information
- */
-void
-MpPalmDetection::mp_palm_detection_generate_anchors ()
-{
-  int layer_id = 0;
-  guint i;
-
-  while (layer_id < num_layers) {
-    GArray *aspect_ratios = g_array_new (FALSE, TRUE, sizeof (gfloat));
-    GArray *scales = g_array_new (FALSE, TRUE, sizeof (gfloat));
-    GArray *anchor_height = g_array_new (FALSE, TRUE, sizeof (gfloat));
-    GArray *anchor_width = g_array_new (FALSE, TRUE, sizeof (gfloat));
-
-    int last_same_stride_layer = layer_id;
-
-    while (last_same_stride_layer < num_layers
-           && strides[last_same_stride_layer] == strides[layer_id]) {
-      gfloat scale;
-      gfloat ratio = 1.0f;
-      g_array_append_val (aspect_ratios, ratio);
-      g_array_append_val (aspect_ratios, ratio);
-      scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer, num_layers);
-      g_array_append_val (scales, scale);
-      scale = _calculate_scale (min_scale, max_scale, last_same_stride_layer + 1, num_layers);
-      g_array_append_val (scales, scale);
-      last_same_stride_layer++;
-    }
-
-    for (i = 0; i < aspect_ratios->len; ++i) {
-      const float ratio_sqrts = sqrt (g_array_index (aspect_ratios, gfloat, i));
-      const gfloat sc = g_array_index (scales, gfloat, i);
-      gfloat anchor_height_ = sc / ratio_sqrts;
-      gfloat anchor_width_ = sc * ratio_sqrts;
-      g_array_append_val (anchor_height, anchor_height_);
-      g_array_append_val (anchor_width, anchor_width_);
-    }
-
-    {
-      int feature_map_height = 0;
-      int feature_map_width = 0;
-      int x, y;
-      int anchor_id;
-
-      const int stride = strides[layer_id];
-      feature_map_height = ceil (1.0f * 192 / stride);
-      feature_map_width = ceil (1.0f * 192 / stride);
-      for (y = 0; y < feature_map_height; ++y) {
-        for (x = 0; x < feature_map_width; ++x) {
-          for (anchor_id = 0; anchor_id < (int) aspect_ratios->len; ++anchor_id) {
-            const float x_center = (x + offset_x) * 1.0f / feature_map_width;
-            const float y_center = (y + offset_y) * 1.0f / feature_map_height;
-
-            const anchor a = { .x_center = x_center,
-              .y_center = y_center,
-              .w = g_array_index (anchor_width, gfloat, anchor_id),
-              .h = g_array_index (anchor_height, gfloat, anchor_id) };
-            g_array_append_val (anchors, a);
-          }
-        }
-      }
-      layer_id = last_same_stride_layer;
-    }
-
-    g_array_free (anchor_height, TRUE);
-    g_array_free (anchor_width, TRUE);
-    g_array_free (aspect_ratios, TRUE);
-    g_array_free (scales, TRUE);
-  }
-}
-
-int
-MpPalmDetection::setOptionInternal (const char *param)
-{
-  /* Load palm detection info from option3 */
-  gchar **options;
-  int noptions, idx;
-  int ret = TRUE;
-
-  options = g_strsplit (param, ":", -1);
-  noptions = g_strv_length (options);
-
-  if (noptions > PARAMS_MAX) {
-    GST_ERROR ("Invalid MP PALM DETECTION PARAM length: %d", noptions);
-    ret = FALSE;
-    goto exit_mp_palm_detection;
-  }
-
-  mp_palm_detection_option (min_score_threshold, gfloat, 0);
-  mp_palm_detection_option (num_layers, gint, 1);
-  mp_palm_detection_option (min_scale, gfloat, 2);
-  mp_palm_detection_option (max_scale, gfloat, 3);
-  mp_palm_detection_option (offset_x, gfloat, 4);
-  mp_palm_detection_option (offset_y, gfloat, 5);
-
-  for (idx = 6; idx < num_layers + 6; idx++) {
-    mp_palm_detection_option (strides[idx - 6], gint, idx);
-  }
-  mp_palm_detection_generate_anchors ();
-
-exit_mp_palm_detection:
-  g_strfreev (options);
-  return ret;
-}
-
-int
-MpPalmDetection::checkCompatible (const GstTensorsConfig *config)
-{
-  const uint32_t *dim1, *dim2;
-  int i;
-  if (!_check_tensors (config, MAX_TENSORS))
-    return FALSE;
-
-  /* Check if the first tensor is compatible */
-  dim1 = config->info.info[0].dimension;
-
-  g_return_val_if_fail (dim1[0] == INFO_SIZE, FALSE);
-  max_detection = dim1[1];
-  g_return_val_if_fail (max_detection > 0, FALSE);
-  g_return_val_if_fail (dim1[2] == 1, FALSE);
-  for (i = 3; i < NNS_TENSOR_RANK_LIMIT; i++)
-    g_return_val_if_fail (dim1[i] == 0 || dim1[i] == 1, FALSE);
-
-  /* Check if the second tensor is compatible */
-  dim2 = config->info.info[1].dimension;
-  g_return_val_if_fail (dim2[0] == 1, FALSE);
-  g_return_val_if_fail (max_detection == dim2[1], FALSE);
-  for (i = 2; i < NNS_TENSOR_RANK_LIMIT; i++)
-    g_return_val_if_fail (dim2[i] == 0 || dim2[i] == 1, FALSE);
-
-  /* Check consistency with max_detection */
-  if (this->max_detection == 0)
-    this->max_detection = max_detection;
-  else
-    g_return_val_if_fail (max_detection == this->max_detection, FALSE);
-
-  if (this->max_detection > MAX_DETECTION) {
-    GST_ERROR ("Incoming tensor has too large detection-max : %u", max_detection);
-    return FALSE;
-  }
-  return TRUE;
-}
-
-GArray *
-MpPalmDetection::decode (const GstTensorsConfig *config, const GstTensorMemory *input)
-{
-  GArray *results = NULL;
-  const GstTensorMemory *boxes = NULL;
-  const GstTensorMemory *detections = NULL;
-  const guint num_tensors = config->info.num_tensors;
-
-  /* Already checked with getOutCaps. Thus, this is an internal bug */
-  g_assert (num_tensors >= MAX_TENSORS);
-
-  /* results will be allocated by _get_objects_mp_palm_detection_ */
-  boxes = &input[0];
-  detections = &input[1];
-  switch (config->info.info[0].type) {
-    _get_objects_mp_palm_detection_ (uint8_t, _NNS_UINT8);
-    _get_objects_mp_palm_detection_ (int8_t, _NNS_INT8);
-    _get_objects_mp_palm_detection_ (uint16_t, _NNS_UINT16);
-    _get_objects_mp_palm_detection_ (int16_t, _NNS_INT16);
-    _get_objects_mp_palm_detection_ (uint32_t, _NNS_UINT32);
-    _get_objects_mp_palm_detection_ (int32_t, _NNS_INT32);
-    _get_objects_mp_palm_detection_ (uint64_t, _NNS_UINT64);
-    _get_objects_mp_palm_detection_ (int64_t, _NNS_INT64);
-    _get_objects_mp_palm_detection_ (float, _NNS_FLOAT32);
-    _get_objects_mp_palm_detection_ (double, _NNS_FLOAT64);
-
-    default:
-      g_assert (0);
-  }
-  nms (results, 0.05f);
-  return results;
-}
diff --git a/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.h b/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.h

index a0edfc2..43729ad 100644 (file)
--- a/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.h
+++ b/ext/nnstreamer/tensor_decoder/tensordec-boundingbox.h
@@ -1,102 +1,121 @@
+/**
+ * GStreamer / NNStreamer tensor_decoder subplugin, "bounding boxes"
+ * Copyright (C) 2018 Samsung Electronics Co. Ltd.
+ * Copyright (C) 2018 MyungJoo Ham <myungjoo.ham@samsung.com>
+ * Copyright 2021 NXP
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ */
+/**
+ * @file        tensordec-boundingbox.h
+ * @date        15 Nov 2018
+ * @brief       NNStreamer tensor-decoder subplugin, "bounding boxes",
+ *              which converts tensors to video stream w/ boxes on
+ *              transparent background.
+ *              This code is NYI/WIP and not compilable.
+ *
+ * @see         https://github.com/nnstreamer/nnstreamer
+ * @author      MyungJoo Ham <myungjoo.ham@samsung.com>
+ * @bug         No known bugs except for NYI items
+ *
+ * option1: Decoder mode of bounding box.
+ *          Available: yolov5
+ *                     mobilenet-ssd (single shot multibox detector with priors.)
+ *                     mobilenet-ssd-postprocess
+ *                     ov-person-detection
+ *                     tf-ssd (deprecated, recommend to use mobilenet-ssd-postprocess)
+ *                     tflite-ssd (deprecated, recommend to use mobilenet-ssd)
+ * option2: Location of label file
+ *          This is independent from option1
+ * option3: Any option1-dependent values
+ *          !!This depends on option1 values!!
+ *          for yolov5 and yolov8 mode:
+ *            The option3 requires up to 3 numbers, which tell
+ *              - whether the output values are scaled or not
+ *                0: not scaled (default), 1: scaled (e.g., 0.0 ~ 1.0)
+ *              - the threshold of confidence (optional, default set to 0.25)
+ *              - the threshold of IOU (optional, default set to 0.45)
+ *            An example of option3 is "option3=0:0.65:0.6"
+ *          for mobilenet-ssd mode:
+ *            The option3 definition scheme is, in order, the following:
+ *                - box priors location file (mandatory)
+ *                - Detection threshold (optional, default set to 0.5)
+ *                - Y box scale (optional, default set to 10.0)
+ *                - X box scale (optional, default set to 10.0)
+ *                - h box scale (optional, default set to 5.0)
+ *                - w box scale (optional, default set to 5.0)
+ *                - IOU box valid threshold (optional, default set to 0.5)
+ *            The default parameters value could be set in the following ways:
+ *            option3=box-priors.txt:0.5:10.0:10.0:5.0:5.0:0.5
+ *            option3=box-priors.txt
+ *            option3=box-priors.txt::::::
+ *
+ *            It's possible to set only few values, using the default values for
+ *            those not specified through the command line.
+ *            You could specify respectively the detection and IOU thresholds to 0.65
+ *            and 0.6 with the option3 parameter as follow:
+ *            option3=box-priors.txt:0.65:::::0.6
+ *          for mobilenet-ssd-postprocess mode:
+ *            The option3 is required to have 5 integer numbers, which tell
+ *            the tensor-dec how to interpret the given tensor inputs.
+ *            The first 4 numbers separated by colon, ':', designate which
+ *            are location:class:score:number of the tensors.
+ *            The last number separated by comma, ',' from the first 4 numbers
+ *            designate the threshold in percent.
+ *            In other words, "option3=%i:%i:%i:%i,%i".
+ *          for mp-palm-detection mode:
+ *            The option3 is required to have 5 float numbers, as following
+ *                - box score threshold (mandatory)
+ *                - number of layers for anchor generation (optional, default set to 4)
+ *                - minimum scale factor for anchor generation (optional, default set to 1.0)
+ *                - maximum scale factor for anchor generation (optional, default set to 1.0)
+ *                - X offset (optional, default set to 0.5)
+ *                - Y offset (optional, default set to 0.5)
+ *                - strides for each layer for anchor generation (optional, default set to 8:16:16:16)
+ *            The default parameter value could be set in the following ways:
+ *            option3=0.5
+ *            option3=0.5:4:0.2:0.8
+ *            option3=0.5:4:1.0:1.0:0.5:0.5:8:16:16:16
+ *
+ * option4: Video Output Dimension (WIDTH:HEIGHT)
+ *          This is independent from option1
+ * option5: Input Dimension (WIDTH:HEIGHT)
+ *          This is independent from option1
+ * option6: Whether to track result bounding boxes or not
+ *          0 (default, do not track)
+ *          1 (track result bounding boxes, with naive centroid based algorithm)
+ * option7: Whether to log the result bounding boxes or not
+ *          0 (default, do not log)
+ *          1 (log result bounding boxes)
+ * option8: Box Style (NYI)
+ *
+ * MAJOR TODO: Support other colorspaces natively from _decode for performance gain
+ * (e.g., BGRA, ARGB, ...)
+ *
+ */
+
  #ifndef _TENSORDECBB_H__
  #define _TENSORDECBB_H__
-
+#include <gst/gst.h>
  #include <math.h> /* expf */
+#include <nnstreamer_log.h>
+#include <nnstreamer_util.h>
  #include "tensordecutil.h"
  
  #define PIXEL_VALUE (0xFF0000FF) /* RED 100% in RGBA */
  
  /**
- * @brief C++-Template-like box location calculation for box-priors for Mobilenet SSD Model
- * @param[in] type The tensor type of inputptr
- * @param[in] typename nnstreamer enum corresponding to the type
- * @param[in] boxprior The box prior data from the box file of MOBILENET_SSD.
- * @param[in] boxinput Input Tensor Data (Boxes)
- * @param[in] detinput Input Tensor Data (Detection). Null if not available. (numtensor ==1)
- * @param[in] config Tensor configs of the input tensors
- * @param[out] results The object returned. (GArray with detectedObject)
+ * @brief Option of bounding box
   */
-#define _get_objects_mobilenet_ssd(_type, typename, boxprior, boxinput,                                      \
-    detinput, config, results, i_width, i_height, max_detection)                                             \
-  case typename:                                                                                             \
-    {                                                                                                        \
-      int d;                                                                                                 \
-      _type *boxinput_ = (_type *) boxinput;                                                                 \
-      size_t boxbpi = config->info.info[0].dimension[0];                                                     \
-      _type *detinput_ = (_type *) detinput;                                                                 \
-      size_t detbpi = config->info.info[1].dimension[0];                                                     \
-      int num = (DETECTION_MAX > max_detection) ? max_detection : DETECTION_MAX;                             \
-      detectedObject object = {                                                                              \
-        .valid = FALSE, .class_id = 0, .x = 0, .y = 0, .width = 0, .height = 0, .prob = .0, .tracking_id = 0 \
-      };                                                                                                     \
-      for (d = 0; d < num; d++) {                                                                            \
-        _get_object_i_mobilenet_ssd (d, detbpi, boxprior, (boxinput_ + (d * boxbpi)),                        \
-            (detinput_ + (d * detbpi)), (&object), i_width, i_height);                                       \
-        if (object.valid == TRUE) {                                                                          \
-          g_array_append_val (results, object);                                                              \
-        }                                                                                                    \
-      }                                                                                                      \
-    }                                                                                                        \
-    break
-
-
-/** @brief Macro to simplify calling _get_objects_mobilenet_ssd */
-#define _get_objects_mobilenet_ssd_(type, typename)                      \
-  _get_objects_mobilenet_ssd (type, typename, box_priors, (boxes->data), \
-      (detections->data), config, results, i_width, i_height, max_detection)
-
-
-#define _expit(x) (1.f / (1.f + expf (-((float) x))))
-
-/**
- * @brief C++-Template-like box location calculation for box-priors
- * @bug This is not macro-argument safe. Use paranthesis!
- * @param[in] bb The configuration, "bounding_boxes"
- * @param[in] index The index (3rd dimension of BOX_SIZE:1:DETECTION_MAX:1)
- * @param[in] total_labels The count of total labels. We can get this from input tensor info. (1st dimension of LABEL_SIZE:DETECTION_MAX:1:1)
- * @param[in] boxprior The box prior data from the box file of SSD.
- * @param[in] boxinputptr Cursor pointer of input + byte-per-index * index (box)
- * @param[in] detinputptr Cursor pointer of input + byte-per-index * index (detection)
- * @param[in] result The object returned. (pointer to object)
- */
-#define _get_object_i_mobilenet_ssd(index, total_labels, boxprior,                \
-    boxinputptr, detinputptr, result, i_width, i_height)                          \
-  do {                                                                            \
-    unsigned int c;                                                               \
-    gfloat highscore = -FLT_MAX;                                                  \
-    float y_scale = params[Y_SCALE_IDX];                                          \
-    float x_scale = params[X_SCALE_IDX];                                          \
-    float h_scale = params[H_SCALE_IDX];                                          \
-    float w_scale = params[W_SCALE_IDX];                                          \
-    result->valid = FALSE;                                                        \
-    for (c = 1; c < total_labels; c++) {                                          \
-      if (detinputptr[c] >= sigmoid_threshold) {                                  \
-        gfloat score = _expit (detinputptr[c]);                                   \
-        float ycenter                                                             \
-            = boxinputptr[0] / y_scale * boxprior[2][index] + boxprior[0][index]; \
-        float xcenter                                                             \
-            = boxinputptr[1] / x_scale * boxprior[3][index] + boxprior[1][index]; \
-        float h = (float) expf (boxinputptr[2] / h_scale) * boxprior[2][index];   \
-        float w = (float) expf (boxinputptr[3] / w_scale) * boxprior[3][index];   \
-        float ymin = ycenter - h / 2.f;                                           \
-        float xmin = xcenter - w / 2.f;                                           \
-        int x = xmin * i_width;                                                   \
-        int y = ymin * i_height;                                                  \
-        int width = w * i_width;                                                  \
-        int height = h * i_height;                                                \
-        if (highscore < score) {                                                  \
-          result->class_id = c;                                                   \
-          result->x = MAX (0, x);                                                 \
-          result->y = MAX (0, y);                                                 \
-          result->width = width;                                                  \
-          result->height = height;                                                \
-          result->prob = score;                                                   \
-          result->valid = TRUE;                                                   \
-        }                                                                         \
-      }                                                                           \
-    }                                                                             \
-  } while (0);
-
  enum class BoundingBoxOption {
    MODE = 0,
    LABEL_PATH = 1,
@@ -131,33 +150,6 @@ typedef enum {
  } bounding_box_modes;
  
  /**
- * @brief MOBILENET SSD PostProcess Output tensor feature mapping.
- */
-typedef enum {
-  MOBILENET_SSD_PP_BBOX_IDX_LOCATIONS = 0,
-  MOBILENET_SSD_PP_BBOX_IDX_CLASSES = 1,
-  MOBILENET_SSD_PP_BBOX_IDX_SCORES = 2,
-  MOBILENET_SSD_PP_BBOX_IDX_NUM = 3,
-  MOBILENET_SSD_PP_BBOX_IDX_UNKNOWN
-} mobilenet_ssd_pp_bbox_idx_t;
-
-/**
- * @brief List of bounding-box decoding schemes in string
- */
-static const char *bb_modes[] = {
-  [MOBILENET_SSD_BOUNDING_BOX] = "mobilenet-ssd",
-  [MOBILENET_SSD_PP_BOUNDING_BOX] = "mobilenet-ssd-postprocess",
-  [OV_PERSON_DETECTION_BOUNDING_BOX] = "ov-person-detection",
-  [OV_FACE_DETECTION_BOUNDING_BOX] = "ov-face-detection",
-  [OLDNAME_MOBILENET_SSD_BOUNDING_BOX] = "tflite-ssd",
-  [OLDNAME_MOBILENET_SSD_PP_BOUNDING_BOX] = "tf-ssd",
-  [YOLOV5_BOUNDING_BOX] = "yolov5",
-  [MP_PALM_DETECTION_BOUNDING_BOX] = "mp-palm-detection",
-  [YOLOV8_BOUNDING_BOX] = "yolov8",
-  NULL,
-};
-
-/**
   * @brief Structure for object centroid tracking.
   */
  typedef struct {
@@ -200,166 +192,24 @@ typedef struct {
    int tracking_id;
  } detectedObject;
  
+
  /**
- * @brief C++-Template-like box location calculation for Tensorflow SSD model
- * @param[in] type The tensor type of inputptr
- * @param[in] typename nnstreamer enum corresponding to the type
- * @param[in] numinput Input Tensor Data (The number of detections)
- * @param[in] classinput Input Tensor Data (Detected classes)
- * @param[in] scoreinput Input Tensor Data (Detection scores)
- * @param[in] boxesinput Input Tensor Data (Boxes)
- * @param[in] config Tensor configs of the input tensors
- * @param[out] results The object returned. (GArray with detectedObject)
+ * @brief Apply NMS to the given results (objects[DETECTION_MAX])
+ * @param[in/out] results The results to be filtered with nms
   */
-#define _get_objects_mobilenet_ssd_pp(_type, typename, numinput, classinput,       \
-    scoreinput, boxesinput, config, results, i_width, i_height)                    \
-  case typename:                                                                   \
-    {                                                                              \
-      int d, num;                                                                  \
-      size_t boxbpi;                                                               \
-      _type *num_detection_ = (_type *) numinput;                                  \
-      _type *classes_ = (_type *) classinput;                                      \
-      _type *scores_ = (_type *) scoreinput;                                       \
-      _type *boxes_ = (_type *) boxesinput;                                        \
-      int locations_idx                                                            \
-          = get_mobilenet_ssd_pp_tensor_idx (MOBILENET_SSD_PP_BBOX_IDX_LOCATIONS); \
-      num = (int) num_detection_[0];                                               \
-      results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num);     \
-      boxbpi = config->info.info[locations_idx].dimension[0];                      \
-      for (d = 0; d < num; d++) {                                                  \
-        _type x1, x2, y1, y2;                                                      \
-        detectedObject object;                                                     \
-        if (scores_[d] < threshold)                                                \
-          continue;                                                                \
-        object.valid = TRUE;                                                       \
-        object.class_id = (int) classes_[d];                                       \
-        x1 = MIN (MAX (boxes_[d * boxbpi + 1], 0), 1);                             \
-        y1 = MIN (MAX (boxes_[d * boxbpi], 0), 1);                                 \
-        x2 = MIN (MAX (boxes_[d * boxbpi + 3], 0), 1);                             \
-        y2 = MIN (MAX (boxes_[d * boxbpi + 2], 0), 1);                             \
-        object.x = (int) (x1 * i_width);                                           \
-        object.y = (int) (y1 * i_height);                                          \
-        object.width = (int) ((x2 - x1) * i_width);                                \
-        object.height = (int) ((y2 - y1) * i_height);                              \
-        object.prob = scores_[d];                                                  \
-        g_array_append_val (results, object);                                      \
-      }                                                                            \
-    }                                                                              \
-    break
-
-/** @brief Macro to simplify calling _get_objects_mobilenet_ssd_pp */
-#define _get_objects_mobilenet_ssd_pp_(type, typename)                                 \
-  _get_objects_mobilenet_ssd_pp (type, typename, (mem_num->data), (mem_classes->data), \
-      (mem_scores->data), (mem_boxes->data), config, results, i_width, i_height)
-
-
-#define OV_PERSON_DETECTION_CONF_THRESHOLD (0.8)
+void nms (GArray *results, gfloat threshold);
+
  /**
- * @brief C++-Template-like box location calculation for OpenVino Person Detection Model
- * @param[in] type The tensor type of inputptr
- * @param[in] intputptr Input tensor Data
- * @param[in] typename nnstreamer enum corresponding to the type
- * @param[out] results The object returned. (GArray with detectedObject)
+ * @brief check the num_tensors is valid
+ * @param[in] config The structure of tensors info to check.
+ * @param[in] limit The limit of tensors number.
+ * @return TRUE if tensors info is valid.
   */
-#define _get_persons_ov(type, inputptr, typename, results)                                                   \
-  case typename:                                                                                             \
-    {                                                                                                        \
-      detectedObject object = {                                                                              \
-        .valid = FALSE, .class_id = 0, .x = 0, .y = 0, .width = 0, .height = 0, .prob = .0, .tracking_id = 0 \
-      };                                                                                                     \
-      type *typed_inputptr = (type *) inputptr;                                                              \
-      guint d;                                                                                               \
-                                                                                                             \
-      for (d = 1; d <= DETECTION_MAX; ++d) {                                                                 \
-        struct {                                                                                             \
-          type image_id;                                                                                     \
-          type label;                                                                                        \
-          type conf;                                                                                         \
-          type x_min;                                                                                        \
-          type y_min;                                                                                        \
-          type x_max;                                                                                        \
-          type y_max;                                                                                        \
-        } desc;                                                                                              \
-                                                                                                             \
-        memcpy (&desc, typed_inputptr, sizeof (desc));                                                       \
-        typed_inputptr += (sizeof (desc) / sizeof (type));                                                   \
-        object.valid = FALSE;                                                                                \
-                                                                                                             \
-        if ((int) desc.image_id < 0) {                                                                       \
-          max_detection = (d - 1);                                                                           \
-          break;                                                                                             \
-        }                                                                                                    \
-        object.class_id = -1;                                                                                \
-        object.x = (int) (desc.x_min * (type) i_width);                                                      \
-        object.y = (int) (desc.y_min * (type) i_height);                                                     \
-        object.width = (int) ((desc.x_max - desc.x_min) * (type) i_width);                                   \
-        object.height = (int) ((desc.y_max - desc.y_min) * (type) i_height);                                 \
-        if (desc.conf < OV_PERSON_DETECTION_CONF_THRESHOLD)                                                  \
-          continue;                                                                                          \
-        object.prob = 1;                                                                                     \
-        object.valid = TRUE;                                                                                 \
-        g_array_append_val (results, object);                                                                \
-      }                                                                                                      \
-    }                                                                                                        \
-    break
+int check_tensors (const GstTensorsConfig *config, const unsigned int limit);
  
  /**
- * @brief C++-Template-like box location calculation for Tensorflow model
- * @param[in] type The tensor type of inputptr
- * @param[in] typename nnstreamer enum corresponding to the type
- * @param[in] scoreinput Input Tensor Data (Detection scores)
- * @param[in] boxesinput Input Tensor Data (Boxes)
- * @param[in] config Tensor configs of the input tensors
- * @param[out] results The object returned. (GArray with detectedObject)
+ * @brief      Interface for Bounding box's properties
   */
-#define _get_objects_mp_palm_detection(_type, typename, scoreinput, boxesinput, config) \
-  case typename:                                                                        \
-    {                                                                                   \
-      int d_;                                                                           \
-      _type *scores_ = (_type *) scoreinput;                                            \
-      _type *boxes_ = (_type *) boxesinput;                                             \
-      int num_ = max_detection;                                                         \
-      size_t boxbpi_ = config->info.info[0].dimension[0];                               \
-      results = g_array_sized_new (FALSE, TRUE, sizeof (detectedObject), num_);         \
-      for (d_ = 0; d_ < num_; d_++) {                                                   \
-        gfloat y_center, x_center, h, w;                                                \
-        gfloat ymin, xmin;                                                              \
-        int y, x, width, height;                                                        \
-        detectedObject object;                                                          \
-        gfloat score = (gfloat) scores_[d_];                                            \
-        _type *box = boxes_ + boxbpi_ * d_;                                             \
-        anchor *a = &g_array_index (this->anchors, anchor, d_);                         \
-        score = MAX (score, -100.0f);                                                   \
-        score = MIN (score, 100.0f);                                                    \
-        score = 1.0f / (1.0f + exp (-score));                                           \
-        if (score < min_score_threshold)                                                \
-          continue;                                                                     \
-        y_center = (box[0] * 1.f) / i_height * a->h + a->y_center;                      \
-        x_center = (box[1] * 1.f) / i_width * a->w + a->x_center;                       \
-        h = (box[2] * 1.f) / i_height * a->h;                                           \
-        w = (box[3] * 1.f) / i_width * a->w;                                            \
-        ymin = y_center - h / 2.f;                                                      \
-        xmin = x_center - w / 2.f;                                                      \
-        y = ymin * i_height;                                                            \
-        x = xmin * i_width;                                                             \
-        width = w * i_width;                                                            \
-        height = h * i_height;                                                          \
-        object.class_id = 0;                                                            \
-        object.x = MAX (0, x);                                                          \
-        object.y = MAX (0, y);                                                          \
-        object.width = width;                                                           \
-        object.height = height;                                                         \
-        object.prob = score;                                                            \
-        object.valid = TRUE;                                                            \
-        g_array_append_val (results, object);                                           \
-      }                                                                                 \
-    }                                                                                   \
-    break
-
-/** @brief Macro to simplify calling _get_objects_mp_palm_detection */
-#define _get_objects_mp_palm_detection_(type, typename) \
-  _get_objects_mp_palm_detection (type, typename, (detections->data), (boxes->data), config)
-
  class BoxProperties
  {
    public:
@@ -400,7 +250,9 @@ class BoxProperties
    guint total_labels;
  };
  
-
+/**
+ * @brief      Class for Bounding box tensor decoder
+ */
  class BoundingBox
  {
    public:
@@ -448,19 +300,9 @@ class BoundingBox
    gboolean flag_use_label;
  };
  
-/** @brief Mathematic inverse of sigmoid function, aka logit */
-static float
-logit (float x)
-{
-  if (x <= 0.0f)
-    return -INFINITY;
-
-  if (x >= 1.0f)
-    return INFINITY;
-
-  return log (x / (1.0 - x));
-}
-
+/**
+ * @brief      Class for MobilenetSSD box properties
+ */
  class MobilenetSSD : public BoxProperties
  {
    public:
@@ -473,23 +315,8 @@ class MobilenetSSD : public BoxProperties
  
    static const int BOX_SIZE = 4;
    static const int DETECTION_MAX = 2034; /* add ssd_mobilenet v3 support */
-  static const guint MAX_TENSORS = 2U;
-
-  static const int THRESHOLD_IDX = 0;
-  static const int Y_SCALE_IDX = 1;
-  static const int X_SCALE_IDX = 2;
-  static const int H_SCALE_IDX = 3;
-  static const int W_SCALE_IDX = 4;
-  static const int IOU_THRESHOLD_IDX = 5;
    static const int PARAMS_MAX = 6;
  
-  static constexpr gfloat DETECTION_THRESHOLD_DEFAULT = 0.5f;
-  static constexpr gfloat THRESHOLD_IOU_DEFAULT = 0.5f;
-  static constexpr gfloat Y_SCALE_DEFAULT = 10.0f;
-  static constexpr gfloat X_SCALE_DEFAULT = 10.0f;
-  static constexpr gfloat H_SCALE_DEFAULT = 5.0f;
-  static constexpr gfloat W_SCALE_DEFAULT = 5.0f;
-
    private:
    char *box_prior_path; /**< Box Prior file path */
    gfloat box_priors[BOX_SIZE][DETECTION_MAX + 1]; /** loaded box prior */
@@ -497,6 +324,9 @@ class MobilenetSSD : public BoxProperties
    gfloat sigmoid_threshold; /** Inverse value of valid detection threshold in sigmoid domain */
  };
  
+/**
+ * @brief      Class for MobilenetSSDPP box properties
+ */
  class MobilenetSSDPP : public BoxProperties
  {
    public:
@@ -507,25 +337,16 @@ class MobilenetSSDPP : public BoxProperties
    int checkCompatible (const GstTensorsConfig *config);
    GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
  
-  static const int BOX_SIZE = 4;
-  static const guint DETECTION_MAX = 100;
    static const guint MAX_TENSORS = 4U;
-  static const int LOCATIONS_IDX = 0;
-  static const int CLASSES_IDX = 1;
-  static const int SCORES_IDX = 2;
-  static const int NUM_IDX = 3;
-
-  static const gint LOCATIONS_DEFAULT = 3;
-  static const gint CLASSES_DEFAULT = 1;
-  static const gint SCORES_DEFAULT = 2;
-  static const gint NUM_DEFAULT = 0;
-  static constexpr gfloat THRESHOLD_DEFAULT = G_MINFLOAT;
  
    private:
    gint tensor_mapping[MAX_TENSORS]; /* Output tensor index mapping */
    gfloat threshold; /* Detection threshold */
  };
  
+/**
+ * @brief      Class for OVDetection box properties
+ */
  class OVDetection : public BoxProperties
  {
    public:
@@ -542,23 +363,17 @@ class OVDetection : public BoxProperties
    static const guint DEFAULT_SIZE_DETECTION_DESC = 7;
  };
  
-#define YOLO_DETECTION_CONF_THRESHOLD (0.25)
-#define YOLO_DETECTION_IOU_THRESHOLD (0.45)
-
+/**
+ * @brief      Class for YoloV5 box properties
+ */
  class YoloV5 : public BoxProperties
  {
    public:
-  YoloV5 ()
-      : scaled_output (0), conf_threshold (YOLO_DETECTION_CONF_THRESHOLD),
-        iou_threshold (YOLO_DETECTION_IOU_THRESHOLD)
-  {
-  }
+  YoloV5 ();
    int setOptionInternal (const char *param);
    int checkCompatible (const GstTensorsConfig *config);
    GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
  
-  static const int DEFAULT_DETECTION_NUM_INFO = 5;
-
    private:
    /* From option3, whether the output values are scaled or not */
    int scaled_output;
@@ -566,20 +381,17 @@ class YoloV5 : public BoxProperties
    gfloat iou_threshold;
  };
  
+/**
+ * @brief      Class for YoloV8 box properties
+ */
  class YoloV8 : public BoxProperties
  {
    public:
-  YoloV8 ()
-      : scaled_output (0), conf_threshold (YOLO_DETECTION_CONF_THRESHOLD),
-        iou_threshold (YOLO_DETECTION_IOU_THRESHOLD)
-  {
-  }
+  YoloV8 ();
    int setOptionInternal (const char *param);
    int checkCompatible (const GstTensorsConfig *config);
    GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
  
-  static const int DEFAULT_DETECTION_NUM_INFO = 4;
-
    private:
    /* From option3, whether the output values are scaled or not */
    int scaled_output;
@@ -587,6 +399,9 @@ class YoloV8 : public BoxProperties
    gfloat iou_threshold;
  };
  
+/**
+ * @brief      Class for MpPalmDetection box properties
+ */
  class MpPalmDetection : public BoxProperties
  {
    public:
@@ -598,22 +413,6 @@ class MpPalmDetection : public BoxProperties
  
    GArray *decode (const GstTensorsConfig *config, const GstTensorMemory *input);
  
-  static const guint INFO_SIZE = 18;
-  static const guint MAX_TENSORS = 2U;
-  static const guint MAX_DETECTION = 2016;
-
-  static const gint NUM_LAYERS_DEFAULT = 4;
-  static constexpr gfloat MIN_SCALE_DEFAULT = 1.0;
-  static constexpr gfloat MAX_SCALE_DEFAULT = 1.0;
-  static constexpr gfloat OFFSET_X_DEFAULT = 0.5;
-  static constexpr gfloat OFFSET_Y_DEFAULT = 0.5;
-  static const gint STRIDE_0_DEFAULT = 8;
-  static const gint STRIDE_1_DEFAULT = 16;
-  static const gint STRIDE_2_DEFAULT = 16;
-  static const gint STRIDE_3_DEFAULT = 16;
-  static constexpr gfloat MIN_SCORE_THRESHOLD_DEFAULT = 0.5;
-
-  static const int PARAMS_STRIDE_SIZE = 8;
    static const int PARAMS_MAX = 13;
  
    private:
author	Yelin Jeong <yelini.jeong@samsung.com>
	Wed, 8 May 2024 08:48:22 +0000 (17:48 +0900)
committer	MyungJoo Ham <myungjoo.ham@samsung.com>
	Thu, 16 May 2024 10:55:50 +0000 (19:55 +0900)
ext/nnstreamer/tensor_decoder/box_properties/meson.build	[new file with mode: 0644]	patch \| blob
ext/nnstreamer/tensor_decoder/box_properties/mobilenetssd.cc	[new file with mode: 0644]	patch \| blob
ext/nnstreamer/tensor_decoder/box_properties/mobilenetssdpp.cc	[new file with mode: 0644]	patch \| blob
ext/nnstreamer/tensor_decoder/box_properties/mppalmdetection.cc	[new file with mode: 0644]	patch \| blob
ext/nnstreamer/tensor_decoder/box_properties/ovdetection.cc	[new file with mode: 0644]	patch \| blob
ext/nnstreamer/tensor_decoder/box_properties/yolo.cc	[new file with mode: 0644]	patch \| blob
ext/nnstreamer/tensor_decoder/meson.build		patch \| blob \| history
ext/nnstreamer/tensor_decoder/tensordec-boundingbox.cc		patch \| blob \| history
ext/nnstreamer/tensor_decoder/tensordec-boundingbox.h		patch \| blob \| history