#include "internal/arm_compute/kernel/View.h"
#include "internal/nnapi/kernel/Reader.h"
#include "internal/layers/GenericReshapeLayer.h"
+#include "internal/layers/SimpleArithmeticAdditionLayer.h"
#include "util/kernel/IndexIterator.h"
#include "model.h"
#include "logging.h"
+template<typename T> T from_env(const char *);
+
+template<> bool from_env(const char *s)
+{
+ if (s == nullptr)
+ {
+ return false;
+ }
+
+ return std::stoi(s) != 0;
+}
+
const char *to_string(const PaddingCode &code)
{
assert((ANEURALNETWORKS_PADDING_SAME == code) || (ANEURALNETWORKS_PADDING_VALID == code));
auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
auto rhs_alloc = ctx.at(::internal::tflite::operand::Index{param.rhs_index});
- auto fn = make_layer<::arm_compute::CLArithmeticAddition>();
+ std::unique_ptr<::arm_compute::IFunction> fn;
+
+ // NOTE CLArithmeticAddition emits incorrect values for ADAS model.
+ // TODO Figure out why this happens, and fix it
+ if(from_env<bool>(std::getenv("USE_CL_ARITHMETIC_ADDTION")))
+ {
+ auto l = make_layer<::arm_compute::CLArithmeticAddition>();
+
+ // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
+ l->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
+
+ fn = std::move(l);
+ }
+ else
+ {
+ // NOTE SimpleAritemeticAdditionLayer does not support broadcasting
+ assert(lhs_shape.depth() == rhs_shape.depth());
+ assert(lhs_shape.height() == rhs_shape.height());
+ assert(lhs_shape.width() == rhs_shape.width());
+
+ auto l = make_layer<SimpleArithmeticAdditionLayer>();
- // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
- fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
+ l->configure(lhs_alloc, rhs_alloc, ofm_alloc);
+
+ fn = std::move(l);
+ }
builder.append(std::move(fn));
--- /dev/null
+#ifndef __SIMPLE_ARITHMETIC_LAYER_H__
+#define __SIMPLE_ARITHMETIC_LAYER_H__
+
+#include <arm_compute/core/CL/ICLTensor.h>
+
+class SimpleArithmeticAdditionLayer : public ::arm_compute::IFunction
+{
+public:
+ void configure(::arm_compute::ICLTensor *lhs, ::arm_compute::ICLTensor *rhs, ::arm_compute::ICLTensor *out)
+ {
+ _lhs = lhs;
+ _rhs = rhs;
+ _out = out;
+ }
+
+public:
+ void run(void) override
+ {
+ auto &q = ::arm_compute::CLScheduler::get().queue();
+
+ _lhs->map(q);
+ _rhs->map(q);
+ _out->map(q);
+
+ arm_compute::Window window;
+ window.use_tensor_dimensions(_out->info()->tensor_shape());
+
+ execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+ const auto lhs_value = *reinterpret_cast<float *>(_lhs->ptr_to_element(id));
+ const auto rhs_value = *reinterpret_cast<float *>(_rhs->ptr_to_element(id));
+
+ *reinterpret_cast<float *>(_out->ptr_to_element(id)) = lhs_value + rhs_value;
+ });
+
+ _out->unmap(q);
+ _rhs->unmap(q);
+ _lhs->unmap(q);
+ }
+
+private:
+ ::arm_compute::ICLTensor *_lhs;
+ ::arm_compute::ICLTensor *_rhs;
+ ::arm_compute::ICLTensor *_out;
+};
+
+#endif // __SIMPLE_ARITHMETIC_LAYER_H__