[Pure CL Runtime] Use SimpleArithmeticAdditionLayer (#1414)
author박종현/동작제어Lab(SR)/Senior Engineer/삼성전자 <jh1302.park@samsung.com>
Wed, 30 May 2018 02:23:54 +0000 (11:23 +0900)
committer서상민/동작제어Lab(SR)/Senior Engineer/삼성전자 <sangmin7.seo@samsung.com>
Wed, 30 May 2018 02:23:54 +0000 (11:23 +0900)
This commit introduces SimpleArithmeticAddationLayer, and uses it by
default kernel on pure CL runtime.

Signed-off-by: Jonghyun Park <jh1302.park@samsung.com>
runtimes/pure_arm_compute/src/compilation.cc
runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAdditionLayer.h [new file with mode: 0644]

index da8d903..0d07d2c 100644 (file)
@@ -16,6 +16,7 @@
 #include "internal/arm_compute/kernel/View.h"
 #include "internal/nnapi/kernel/Reader.h"
 #include "internal/layers/GenericReshapeLayer.h"
+#include "internal/layers/SimpleArithmeticAdditionLayer.h"
 
 #include "util/kernel/IndexIterator.h"
 
 #include "model.h"
 #include "logging.h"
 
+template<typename T> T from_env(const char *);
+
+template<> bool from_env(const char *s)
+{
+  if (s == nullptr)
+  {
+    return false;
+  }
+
+  return std::stoi(s) != 0;
+}
+
 const char *to_string(const PaddingCode &code)
 {
   assert((ANEURALNETWORKS_PADDING_SAME == code) || (ANEURALNETWORKS_PADDING_VALID == code));
@@ -305,10 +318,32 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
     auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
     auto rhs_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
 
-    auto fn = make_layer<::arm_compute::CLArithmeticAddition>();
+    std::unique_ptr<::arm_compute::IFunction> fn;
+
+    // NOTE CLArithmeticAddition emits incorrect values for ADAS model.
+    // TODO Figure out why this happens, and fix it
+    if(from_env<bool>(std::getenv("USE_CL_ARITHMETIC_ADDTION")))
+    {
+      auto l = make_layer<::arm_compute::CLArithmeticAddition>();
+
+      // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
+      l->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
+
+      fn = std::move(l);
+    }
+    else
+    {
+      // NOTE SimpleAritemeticAdditionLayer does not support broadcasting
+      assert(lhs_shape.depth() == rhs_shape.depth());
+      assert(lhs_shape.height() == rhs_shape.height());
+      assert(lhs_shape.width() == rhs_shape.width());
+
+      auto l = make_layer<SimpleArithmeticAdditionLayer>();
 
-    // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
-    fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
+      l->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+
+      fn = std::move(l);
+    }
 
     builder.append(std::move(fn));
 
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAdditionLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAdditionLayer.h
new file mode 100644 (file)
index 0000000..77f8962
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef __SIMPLE_ARITHMETIC_LAYER_H__
+#define __SIMPLE_ARITHMETIC_LAYER_H__
+
+#include <arm_compute/core/CL/ICLTensor.h>
+
+class SimpleArithmeticAdditionLayer : public ::arm_compute::IFunction
+{
+public:
+  void configure(::arm_compute::ICLTensor *lhs, ::arm_compute::ICLTensor *rhs, ::arm_compute::ICLTensor *out)
+  {
+    _lhs = lhs;
+    _rhs = rhs;
+    _out = out;
+  }
+
+public:
+  void run(void) override
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    _lhs->map(q);
+    _rhs->map(q);
+    _out->map(q);
+
+    arm_compute::Window window;
+    window.use_tensor_dimensions(_out->info()->tensor_shape());
+
+    execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+      const auto lhs_value = *reinterpret_cast<float *>(_lhs->ptr_to_element(id));
+      const auto rhs_value = *reinterpret_cast<float *>(_rhs->ptr_to_element(id));
+
+      *reinterpret_cast<float *>(_out->ptr_to_element(id)) = lhs_value + rhs_value;
+    });
+
+    _out->unmap(q);
+    _rhs->unmap(q);
+    _lhs->unmap(q);
+  }
+
+private:
+  ::arm_compute::ICLTensor *_lhs;
+  ::arm_compute::ICLTensor *_rhs;
+  ::arm_compute::ICLTensor *_out;
+};
+
+#endif // __SIMPLE_ARITHMETIC_LAYER_H__