From: 박종현/동작제어Lab(SR)/Senior Engineer/삼성전자 <jh1302.park@samsung.com>
Date: Wed, 30 May 2018 02:23:54 +0000 (+0900)
Subject: [Pure CL Runtime] Use SimpleArithmeticAdditionLayer (#1414)
X-Git-Tag: 0.2~794
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=83400e2d1aab1c748d623bb4c815dcd5fd3eec08;p=platform%2Fcore%2Fml%2Fnnfw.git

[Pure CL Runtime] Use SimpleArithmeticAdditionLayer (#1414)

This commit introduces SimpleArithmeticAddationLayer, and uses it by
default kernel on pure CL runtime.

Signed-off-by: Jonghyun Park <jh1302.park@samsung.com>
---

diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index da8d903..0d07d2c 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -16,6 +16,7 @@
 #include "internal/arm_compute/kernel/View.h"
 #include "internal/nnapi/kernel/Reader.h"
 #include "internal/layers/GenericReshapeLayer.h"
+#include "internal/layers/SimpleArithmeticAdditionLayer.h"
 
 #include "util/kernel/IndexIterator.h"
 
@@ -23,6 +24,18 @@
 #include "model.h"
 #include "logging.h"
 
+template<typename T> T from_env(const char *);
+
+template<> bool from_env(const char *s)
+{
+  if (s == nullptr)
+  {
+    return false;
+  }
+
+  return std::stoi(s) != 0;
+}
+
 const char *to_string(const PaddingCode &code)
 {
   assert((ANEURALNETWORKS_PADDING_SAME == code) || (ANEURALNETWORKS_PADDING_VALID == code));
@@ -305,10 +318,32 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
     auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
     auto rhs_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
 
-    auto fn = make_layer<::arm_compute::CLArithmeticAddition>();
+    std::unique_ptr<::arm_compute::IFunction> fn;
+
+    // NOTE CLArithmeticAddition emits incorrect values for ADAS model.
+    // TODO Figure out why this happens, and fix it
+    if(from_env<bool>(std::getenv("USE_CL_ARITHMETIC_ADDTION")))
+    {
+      auto l = make_layer<::arm_compute::CLArithmeticAddition>();
+
+      // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
+      l->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
+
+      fn = std::move(l);
+    }
+    else
+    {
+      // NOTE SimpleAritemeticAdditionLayer does not support broadcasting
+      assert(lhs_shape.depth() == rhs_shape.depth());
+      assert(lhs_shape.height() == rhs_shape.height());
+      assert(lhs_shape.width() == rhs_shape.width());
+
+      auto l = make_layer<SimpleArithmeticAdditionLayer>();
 
-    // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
-    fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
+      l->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+
+      fn = std::move(l);
+    }
 
     builder.append(std::move(fn));
 
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAdditionLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAdditionLayer.h
new file mode 100644
index 0000000..77f8962
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAdditionLayer.h
@@ -0,0 +1,46 @@
+#ifndef __SIMPLE_ARITHMETIC_LAYER_H__
+#define __SIMPLE_ARITHMETIC_LAYER_H__
+
+#include <arm_compute/core/CL/ICLTensor.h>
+
+class SimpleArithmeticAdditionLayer : public ::arm_compute::IFunction
+{
+public:
+  void configure(::arm_compute::ICLTensor *lhs, ::arm_compute::ICLTensor *rhs, ::arm_compute::ICLTensor *out)
+  {
+    _lhs = lhs;
+    _rhs = rhs;
+    _out = out;
+  }
+
+public:
+  void run(void) override
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    _lhs->map(q);
+    _rhs->map(q);
+    _out->map(q);
+
+    arm_compute::Window window;
+    window.use_tensor_dimensions(_out->info()->tensor_shape());
+
+    execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+      const auto lhs_value = *reinterpret_cast<float *>(_lhs->ptr_to_element(id));
+      const auto rhs_value = *reinterpret_cast<float *>(_rhs->ptr_to_element(id));
+
+      *reinterpret_cast<float *>(_out->ptr_to_element(id)) = lhs_value + rhs_value;
+    });
+
+    _out->unmap(q);
+    _rhs->unmap(q);
+    _lhs->unmap(q);
+  }
+
+private:
+  ::arm_compute::ICLTensor *_lhs;
+  ::arm_compute::ICLTensor *_rhs;
+  ::arm_compute::ICLTensor *_out;
+};
+
+#endif // __SIMPLE_ARITHMETIC_LAYER_H__