[yolo_v2] yolo v2 loss scaffold
authorhyeonseok lee <hs89.lee@samsung.com>
Thu, 9 Mar 2023 02:22:12 +0000 (11:22 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Tue, 12 Sep 2023 15:21:01 +0000 (00:21 +0900)
 - Added yolo v2 loss scaffold

Signed-off-by: hyeonseok lee <hs89.lee@samsung.com>
Applications/YOLO/jni/meson.build
Applications/YOLO/jni/yolo_v2_loss.cpp [new file with mode: 0644]
Applications/YOLO/jni/yolo_v2_loss.h [new file with mode: 0644]
nntrainer/utils/util_func.cpp
nntrainer/utils/util_func.h

index ee6c383..113bdc1 100644 (file)
@@ -19,6 +19,7 @@ yolo_sources = [
   'main.cpp',
   'det_dataloader.cpp',
   layer_reorg_src
+  'yolo_v2_loss.cpp'
 ]
 
 yolo_dependencies = [app_utils_dep,
diff --git a/Applications/YOLO/jni/yolo_v2_loss.cpp b/Applications/YOLO/jni/yolo_v2_loss.cpp
new file mode 100644 (file)
index 0000000..875ae60
--- /dev/null
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file   yolo_v2_loss.cpp
+ * @date   07 March 2023
+ * @brief  This file contains the yolo v2 loss layer
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#include "yolo_v2_loss.h"
+
+namespace custom {
+
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
+enum YoloV2LossParams {
+  bbox_x_pred,
+  bbox_y_pred,
+  bbox_w_pred,
+  bbox_h_pred,
+  confidence_pred,
+  class_pred,
+  bbox_w_pred_anchor,
+  bbox_h_pred_anchor,
+  bbox_x_gt,
+  bbox_y_gt,
+  bbox_w_gt,
+  bbox_h_gt,
+  confidence_gt,
+  class_gt,
+  bbox_class_mask,
+  iou_mask
+};
+
+namespace props {
+MaxObjectNumber::MaxObjectNumber(const unsigned &value) { set(value); }
+ClassNumber::ClassNumber(const unsigned &value) { set(value); }
+GridHeightNumber::GridHeightNumber(const unsigned &value) { set(value); }
+GridWidthNumber::GridWidthNumber(const unsigned &value) { set(value); }
+ImageHeightSize::ImageHeightSize(const unsigned &value) { set(value); }
+ImageWidthSize::ImageWidthSize(const unsigned &value) { set(value); }
+} // namespace props
+
+YoloV2LossLayer::YoloV2LossLayer() :
+  anchors_w({1, 1, NUM_ANCHOR, 1}, anchors_w_buf),
+  anchors_h({1, 1, NUM_ANCHOR, 1}, anchors_h_buf),
+  sigmoid(nntrainer::ActivationType::ACT_SIGMOID, false),
+  softmax(nntrainer::ActivationType::ACT_SOFTMAX, false),
+  yolo_v2_loss_props(props::MaxObjectNumber(), props::ClassNumber(),
+                     props::GridHeightNumber(), props::GridWidthNumber(),
+                     props::ImageHeightSize(), props::ImageWidthSize()) {
+  anchors_ratio = anchors_w.divide(anchors_h);
+  wt_idx.fill(std::numeric_limits<unsigned>::max());
+}
+
+void YoloV2LossLayer::finalize(nntrainer::InitLayerContext &context) {
+  /** NYI */
+}
+
+void YoloV2LossLayer::forwarding(nntrainer::RunLayerContext &context,
+                                 bool training) {
+  /** NYI */
+}
+
+void YoloV2LossLayer::calcDerivative(nntrainer::RunLayerContext &context) {
+  /** NYI */
+}
+
+void YoloV2LossLayer::exportTo(nntrainer::Exporter &exporter,
+                               const ml::train::ExportMethods &method) const {
+  /** NYI */
+}
+
+void YoloV2LossLayer::setProperty(const std::vector<std::string> &values) {
+  /** NYI */
+}
+
+unsigned int YoloV2LossLayer::find_responsible_anchors(float bbox_ratio) {
+  /** NYI */
+  return 0;
+}
+
+void YoloV2LossLayer::generate_ground_truth(
+  nntrainer::Tensor &bbox_x_pred, nntrainer::Tensor &bbox_y_pred,
+  nntrainer::Tensor &bbox_w_pred, nntrainer::Tensor &bbox_h_pred,
+  nntrainer::Tensor &labels, nntrainer::Tensor &bbox_x_gt,
+  nntrainer::Tensor &bbox_y_gt, nntrainer::Tensor &bbox_w_gt,
+  nntrainer::Tensor &bbox_h_gt, nntrainer::Tensor &confidence_gt,
+  nntrainer::Tensor &class_gt, nntrainer::Tensor &bbox_class_mask,
+  nntrainer::Tensor &iou_mask) {
+  /** NYI */
+}
+
+#ifdef PLUGGABLE
+
+nntrainer::Layer *create_yolo_v2_loss_layer() {
+  auto layer = new YoloV2LossLayer();
+  return layer;
+}
+
+void destory_yolo_v2_loss_layer(nntrainer::Layer *layer) { delete layer; }
+
+/**
+ * @note ml_train_layer_pluggable defines the entry point for nntrainer to
+ * register a plugin layer
+ */
+extern "C" {
+nntrainer::LayerPluggable ml_train_layer_pluggable{create_yolo_v2_loss_layer,
+                                                   destory_yolo_v2_loss_layer};
+}
+
+#endif
+} // namespace custom
diff --git a/Applications/YOLO/jni/yolo_v2_loss.h b/Applications/YOLO/jni/yolo_v2_loss.h
new file mode 100644 (file)
index 0000000..11237ea
--- /dev/null
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2023 Hyeonseok Lee <hs89.lee@samsung.com>
+ *
+ * @file   yolo_v2_loss.h
+ * @date   07 March 2023
+ * @brief  This file contains the yolo v2 loss layer
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Hyeonseok Lee <hs89.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+#ifndef __YOLO_V2_LOSS_LAYER_H__
+#define __YOLO_V2_LOSS_LAYER_H__
+
+#include <string>
+
+#include <acti_func.h>
+#include <base_properties.h>
+#include <layer_context.h>
+#include <layer_devel.h>
+#include <node_exporter.h>
+
+namespace custom {
+
+namespace props {
+
+/**
+ * @brief maximum object number in 1 image for given dataset
+ *
+ */
+class MaxObjectNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+  MaxObjectNumber(const unsigned &value = 1);
+  static constexpr const char *key = "max_object_number";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief class number for given dataset
+ *
+ */
+class ClassNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+  ClassNumber(const unsigned &value = 1);
+  static constexpr const char *key = "class_number";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief grid height number
+ *
+ */
+class GridHeightNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+  GridHeightNumber(const unsigned &value = 1);
+  static constexpr const char *key = "grid_height_number";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief grid width number
+ *
+ */
+class GridWidthNumber final : public nntrainer::PositiveIntegerProperty {
+public:
+  GridWidthNumber(const unsigned &value = 1);
+  static constexpr const char *key = "grid_width_number";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief image height size
+ *
+ */
+class ImageHeightSize final : public nntrainer::PositiveIntegerProperty {
+public:
+  ImageHeightSize(const unsigned &value = 1);
+  static constexpr const char *key = "image_height_size";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+/**
+ * @brief image width size
+ *
+ */
+class ImageWidthSize final : public nntrainer::PositiveIntegerProperty {
+public:
+  ImageWidthSize(const unsigned &value = 1);
+  static constexpr const char *key = "image_width_size";
+  using prop_tag = nntrainer::uint_prop_tag;
+};
+
+} // namespace props
+
+/**
+ * @brief Yolo V2 loss layer
+ *
+ */
+class YoloV2LossLayer final : public nntrainer::Layer {
+public:
+  /**
+   * @brief Construct a new YoloV2Loss Layer object
+   *
+   */
+  YoloV2LossLayer();
+
+  /**
+   * @brief Destroy the YoloV2Loss Layer object
+   *
+   */
+  ~YoloV2LossLayer() {}
+
+  /**
+   * @copydoc Layer::finalize(InitLayerContext &context)
+   */
+  void finalize(nntrainer::InitLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
+   */
+  void forwarding(nntrainer::RunLayerContext &context, bool training) override;
+
+  /**
+   * @copydoc Layer::calcDerivative(RunLayerContext &context)
+   */
+  void calcDerivative(nntrainer::RunLayerContext &context) override;
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
+   */
+  void exportTo(nntrainer::Exporter &exporter,
+                const ml::train::ExportMethods &method) const override;
+
+  /**
+   * @copydoc Layer::setProperty(const std::vector<std::string> &values)
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  /**
+   * @copydoc bool supportBackwarding() const
+   */
+  bool supportBackwarding() const override { return true; };
+
+  /**
+   * @copydoc Layer::requireLabel()
+   */
+  bool requireLabel() const { return true; }
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return YoloV2LossLayer::type; };
+
+  inline static const std::string type = "yolo_v2_loss";
+
+private:
+  static constexpr unsigned int NUM_ANCHOR = 5;
+  const float anchors_w_buf[NUM_ANCHOR] = {1.3221, 3.19275, 5.05587, 9.47112,
+                                           11.2364};
+  const float anchors_h_buf[NUM_ANCHOR] = {1.73145, 4.00944, 8.09892, 4.84053,
+                                           10.0071};
+  const nntrainer::Tensor anchors_w;
+  const nntrainer::Tensor anchors_h;
+  nntrainer::Tensor anchors_ratio;
+
+  nntrainer::ActiFunc sigmoid; /** sigmoid activation operation */
+  nntrainer::ActiFunc softmax; /** softmax activation operation */
+
+  std::tuple<props::MaxObjectNumber, props::ClassNumber,
+             props::GridHeightNumber, props::GridWidthNumber,
+             props::ImageHeightSize, props::ImageWidthSize>
+    yolo_v2_loss_props;
+  std::array<unsigned int, 8> wt_idx; /**< indices of the weights */
+
+  /**
+   * @brief find responsible anchors per object
+   */
+  unsigned int find_responsible_anchors(float bbox_ratio);
+
+  /**
+   * @brief generate ground truth, mask from labels
+   */
+  void generate_ground_truth(
+    nntrainer::Tensor &bbox_x_pred, nntrainer::Tensor &bbox_y_pred,
+    nntrainer::Tensor &bbox_w_pred, nntrainer::Tensor &bbox_h_pred,
+    nntrainer::Tensor &labels, nntrainer::Tensor &bbox_x_gt,
+    nntrainer::Tensor &bbox_y_gt, nntrainer::Tensor &bbox_w_gt,
+    nntrainer::Tensor &bbox_h_gt, nntrainer::Tensor &confidence_gt,
+    nntrainer::Tensor &class_gt, nntrainer::Tensor &bbox_class_mask,
+    nntrainer::Tensor &iou_mask);
+};
+
+} // namespace custom
+
+#endif /* __YOLO_V2_LOSS_LAYER_H__ */
index bf53d36..2198933 100644 (file)
@@ -39,6 +39,33 @@ float sqrtFloat(float x) { return sqrt(x); };
 
 double sqrtDouble(double x) { return sqrt(x); };
 
+float logFloat(float x) { return log(x + 1.0e-20); }
+
+float exp_util(float x) { return exp(x); }
+
+Tensor rotate_180(Tensor in) {
+  Tensor output(in.getDim());
+  output.setZero();
+  for (unsigned int i = 0; i < in.batch(); ++i) {
+    for (unsigned int j = 0; j < in.channel(); ++j) {
+      for (unsigned int k = 0; k < in.height(); ++k) {
+        for (unsigned int l = 0; l < in.width(); ++l) {
+          output.setValue(
+            i, j, k, l,
+            in.getValue(i, j, (in.height() - k - 1), (in.width() - l - 1)));
+        }
+      }
+    }
+  }
+  return output;
+}
+
+Tensor calculateIOU(Tensor &b1_x1, Tensor &b1_y1, Tensor &b1_w, Tensor &b1_h,
+                    Tensor &b2_x1, Tensor &b2_y1, Tensor &b2_w, Tensor &b2_h) {
+  /** NYI */
+  return Tensor();
+}
+
 bool isFileExist(std::string file_name) {
   std::ifstream infile(file_name);
   return infile.good();
index ba0dc1a..8fff164 100644 (file)
@@ -105,6 +105,9 @@ template <typename T = float> T logFloat(T x) {
  */
 template <typename T = float> T exp_util(T x) { return static_cast<T>(exp(x)); }
 
+Tensor calculateIOU(Tensor &b1_x1, Tensor &b1_y1, Tensor &b1_x2, Tensor &b1_y2,
+                    Tensor &b2_x1, Tensor &b2_y1, Tensor &b2_x2, Tensor &b2_y2);
+
 /**
  * @brief     Check Existance of File
  * @param[in] file path of the file to be checked