[ Utils ] create NNTrThread Features
authorjijoong.moon <jijoong.moon@samsung.com>
Thu, 7 Jul 2022 09:32:37 +0000 (18:32 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Mon, 18 Jul 2022 07:11:17 +0000 (16:11 +0900)
This patch includes the NNTrThreads Features. This will be used for the
Multi-Thread Feature of nntrainer, such as Thread Pool, for loop
multi-threading along batch direction.

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
12 files changed:
debian/nntrainer-dev.install
jni/Android.mk
meson_options.txt
nntrainer/dataset/func_data_producer.cpp
nntrainer/dataset/func_data_producer.h
nntrainer/layers/common_properties.cpp
nntrainer/layers/common_properties.h
nntrainer/layers/conv2d_layer.cpp
nntrainer/utils/meson.build
nntrainer/utils/nntr_threads.cpp [new file with mode: 0644]
nntrainer/utils/nntr_threads.h [new file with mode: 0644]
packaging/nntrainer.spec

index f8f8e2a..b561ab4 100644 (file)
@@ -4,6 +4,8 @@
 /usr/include/nntrainer/common_properties.h
 /usr/include/nntrainer/base_properties.h
 /usr/include/nntrainer/node_exporter.h
+/usr/include/nntrainer/profiler.h
+/usr/include/nntrainer/nntr_threads.h
 # tensor headers
 /usr/include/nntrainer/tensor.h
 /usr/include/nntrainer/tensor_wrap_specs.h
index 10c4beb..c1410fd 100644 (file)
@@ -204,6 +204,7 @@ NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/utils/profiler.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/utils/node_exporter.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/utils/base_properties.cpp \
+                  $(NNTRAINER_ROOT)/nntrainer/utils/nntr_threads.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/compiler/ini_interpreter.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/compiler/flatten_realizer.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/compiler/activation_realizer.cpp \
index 6f67cb3..d5d2eb2 100644 (file)
@@ -21,8 +21,8 @@ option('capi-ml-common-actual', type: 'string', value: 'capi-ml-common',
 option('tizen-version-major', type: 'integer', min : 4, max : 9999, value: 9999) # 9999 means "not Tizen"
 option('tizen-version-minor', type: 'integer', min : 0, max : 9999, value: 0)
 option('openblas-num-threads', type: 'integer', min : 0, max : 9999, value: 0)
-#This is for the multi-threading in nntrainer. ( multi-threading along batch direction )
-option('nntr-num-threads', type: 'integer', min : 0, max : 9999, value: 2)
+#This is for the multi-threading in nntrainer
+option('nntr-num-threads', type: 'integer', min : 0, max : 9999, value: 1)
 
 # test related option
 option('reduce-tolerance', type: 'boolean', value: true)
index 55d91ae..889879c 100644 (file)
 
 namespace nntrainer {
 
-/**
- * @brief User data props
- *
- */
-class PropsUserData final : public Property<void *> {
-public:
-  static constexpr const char *key = "user_data";
-  PropsUserData(void *user_data) { set(user_data); }
-  using prop_tag = ptr_prop_tag;
-};
-
 FuncDataProducer::FuncDataProducer(datagen_cb datagen_cb, void *user_data_) :
   cb(datagen_cb),
-  user_data_prop(new PropsUserData(user_data_)) {}
+  user_data_prop(new props::PropsUserData(user_data_)) {}
 
 FuncDataProducer::~FuncDataProducer() {}
 
index e1beebe..07f25a4 100644 (file)
@@ -13,8 +13,8 @@
 #ifndef __FUNC_DATA_PRODUCER_H__
 #define __FUNC_DATA_PRODUCER_H__
 
+#include <common_properties.h>
 #include <data_producer.h>
-
 #include <dataset.h>
 
 #include <memory>
@@ -23,7 +23,6 @@
 
 namespace nntrainer {
 
-class PropsUserData;
 class Exporter;
 
 using datagen_cb = ml::train::datagen_cb;
@@ -78,7 +77,7 @@ public:
 
 private:
   datagen_cb cb;
-  std::unique_ptr<PropsUserData> user_data_prop;
+  std::unique_ptr<props::PropsUserData> user_data_prop;
 };
 
 } // namespace nntrainer
index f662e85..ab87da2 100644 (file)
@@ -240,6 +240,8 @@ WeightRegularizerConstant::WeightRegularizerConstant(float value) :
 WeightDecay::WeightDecay(float value) : BasicRegularizerConstant(value) {}
 BiasDecay::BiasDecay(float value) : BasicRegularizerConstant(value) {}
 
+PropsUserData::PropsUserData(void *user_data) { set(user_data); }
+
 bool BasicRegularizerConstant::isValid(const float &value) const {
   return value >= 0.0f;
 }
index 9f87180..1efa82f 100644 (file)
@@ -1167,6 +1167,17 @@ public:
   using prop_tag = uint_prop_tag;                   /**< property type */
 };
 
+/**
+ * @brief User data props
+ *
+ */
+class PropsUserData final : public Property<void *> {
+public:
+  PropsUserData(void *user_data);
+  static constexpr const char *key = "user_data";
+  using prop_tag = ptr_prop_tag;
+};
+
 } // namespace props
 } // namespace nntrainer
 
index 32c4ad8..e73c3de 100644 (file)
@@ -20,6 +20,7 @@
 #include <conv2d_layer.h>
 #include <layer_context.h>
 #include <lazy_tensor.h>
+#include <nntr_threads.h>
 #include <nntrainer_error.h>
 #include <nntrainer_log.h>
 #include <node_exporter.h>
@@ -451,16 +452,12 @@ void Conv2DLayer::calcDerivative(RunLayerContext &context) {
   /// filter_kernel^T X derivaitive  -> column matrix
   /// col2im(column matrix) to reconstruct the original image
 
-  unsigned int num_threads = NNTR_NUM_THREADS;
-
-  if (num_threads > derivative.batch())
-    num_threads = 1;
-
-  if (num_threads > 1) {
+  if (NNTR_NUM_THREADS > 1) {
     auto dowork = [&](size_t s, size_t e, void *user_data) {
+      Tensor result =
+        Tensor(calcCol2ImOutputDim(derivative.getDim(), filter_dim));
+
       for (size_t b = s; b < e; ++b) {
-        Tensor result =
-          Tensor(calcCol2ImOutputDim(derivative.getDim(), filter_dim));
         Tensor deriv_sub = derivative.getBatchSlice(b, 1);
         Tensor in_deriv_sub = input_derivative.getBatchSlice(b, 1);
         deriv_sub.reshape(
@@ -470,22 +467,9 @@ void Conv2DLayer::calcDerivative(RunLayerContext &context) {
       }
     };
 
-    size_t start = 0;
-    size_t end = derivative.batch();
-    size_t chunk = (end - start + (num_threads - 1)) / num_threads;
-
-    std::vector<std::thread> workers;
-
-    for (unsigned int i = 0; i < num_threads; ++i) {
-      size_t s = start + i * chunk;
-      size_t e = s + chunk;
-      if (e > end)
-        e = end;
-      workers.push_back(std::thread(dowork, s, e, nullptr));
-    }
+    auto workers = ParallelBatch(dowork, derivative.batch(), nullptr);
 
-    for (unsigned int i = 0; i < num_threads; ++i)
-      workers[i].join();
+    workers.run();
 
   } else {
 
@@ -499,7 +483,8 @@ void Conv2DLayer::calcDerivative(RunLayerContext &context) {
         {filter_size, derivative.width() * derivative.height()});
 
       filter_kernel.dot(deriv_sub, col2im_result, true, false);
-      col2im(col2im_result, filter_dim, padding, stride, dilation, in_deriv_sub);
+      col2im(col2im_result, filter_dim, padding, stride, dilation,
+             in_deriv_sub);
     }
   }
 
index 2c9ee7b..20d95bd 100644 (file)
@@ -3,13 +3,16 @@ util_sources = [
   'profiler.cpp',
   'ini_wrapper.cpp',
   'node_exporter.cpp',
-  'base_properties.cpp'
+  'base_properties.cpp',
+  'nntr_threads.cpp'
 ]
 
 util_headers = [
   'base_properties.h',
   'node_exporter.h',
   'util_func.h',
+  'profiler.h',
+  'nntr_threads.h'
 ]
 
 foreach s : util_sources
diff --git a/nntrainer/utils/nntr_threads.cpp b/nntrainer/utils/nntr_threads.cpp
new file mode 100644 (file)
index 0000000..4c27243
--- /dev/null
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2022 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file nntr_threads.cpp
+ * @date 07 July 2022
+ * @brief Thread Management for NNTrainer
+ * @see        https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <nntr_threads.h>
+
+namespace nntrainer {
+
+ParallelBatch::ParallelBatch(threaded_cb threaded_cb, unsigned int batch_size,
+                             void *user_data_) :
+  cb(threaded_cb),
+  batch(batch_size),
+  num_workers(NNTR_NUM_THREADS > batch ? 1 : NNTR_NUM_THREADS),
+  user_data_prop(new props::PropsUserData(user_data_)) {}
+
+ParallelBatch::~ParallelBatch() {}
+
+void ParallelBatch::run() {
+
+  unsigned int start = 0;
+  unsigned int end = batch;
+
+  unsigned int chunk = (end - start + (num_workers - 1)) / num_workers;
+
+  for (unsigned int i = 0; i < num_workers; ++i) {
+    unsigned int s = start + i * chunk;
+    unsigned int e = s + chunk;
+    if (e > end)
+      e = end;
+    workers.push_back(std::thread(cb, s, e, user_data_prop->get()));
+  }
+
+  for (unsigned int i = 0; i < num_workers; ++i)
+    workers[i].join();
+}
+
+} // namespace nntrainer
diff --git a/nntrainer/utils/nntr_threads.h b/nntrainer/utils/nntr_threads.h
new file mode 100644 (file)
index 0000000..2f9f8f1
--- /dev/null
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2022 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file nntr_threads.h
+ * @date 07 July 2022
+ * @brief Thread Management for NNTrainer
+ * @see        https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+#ifndef __NNTR_THREADS_H__
+#define __NNTR_THREADS_H__
+
+#include <string>
+#include <thread>
+#include <vector>
+
+#include <common_properties.h>
+#include <nntrainer_error.h>
+#include <util_func.h>
+
+typedef void (*loop_cb)(unsigned int start, unsigned int end, void *user_data);
+
+typedef std::function<std::remove_pointer<loop_cb>::type> threaded_cb;
+
+namespace nntrainer {
+
+/**
+ * @brief ParallelBatch class to parallelize along batch direction
+ *
+ */
+class ParallelBatch {
+public:
+  /**
+   * @brief Construct a new ParallelBatch object
+   *
+   */
+  ParallelBatch(threaded_cb threaded_cb, unsigned int batch, void *user_data_);
+
+  /**
+   * @brief Destroy the ParallelBatch object
+   *
+   */
+  ~ParallelBatch();
+
+  /**
+   * @brief Run the workders
+   *
+   */
+  void run();
+
+private:
+  threaded_cb cb;
+  unsigned int batch;
+  unsigned int num_workers;
+  std::vector<std::thread> workers;
+  std::unique_ptr<props::PropsUserData> user_data_prop;
+};
+
+} // namespace nntrainer
+#endif // __NODE_EXPORTER_H__
index 7f54fde..87e9962 100644 (file)
@@ -479,6 +479,8 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/
 %{_includedir}/nntrainer/common_properties.h
 %{_includedir}/nntrainer/base_properties.h
 %{_includedir}/nntrainer/node_exporter.h
+%{_includedir}/nntrainer/nntr_threads.h
+%{_includedir}/nntrainer/profiler.h
 # tensor headers
 %{_includedir}/nntrainer/tensor.h
 %{_includedir}/nntrainer/tensor_wrap_specs.h