From 29ebd0960398242703faacbe48afc101bc02d3ef Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EC=B5=9C=ED=98=95=EA=B7=9C/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Senior=20Engineer/=EC=82=BC=EC=84=B1?=
 =?utf8?q?=EC=A0=84=EC=9E=90?= <hk0110.choi@samsung.com>
Date: Tue, 10 Apr 2018 16:17:24 +0900
Subject: [PATCH] Introduce nnfw_kernels (#517)

- Introduce nnfw_kernels to choose kernel for various target at runtime
- Apply nnfw_kernels to convFloat32 used at OperationType::CONV_2D
  - Introduce environment variable NNFW_KERNEL_convFloat32 to select target of convFloat32
  - Tested with convolution_test2.tflite

Signed-off-by: Hyung-Kyu Choi <hk0110.choi@samsung.com>
---
 src/runtime/ref/nn/common/CMakeLists.txt  |  1 +
 src/runtime/ref/nn/common/CpuExecutor.cpp | 22 +++++++++++++++++--
 src/runtime/ref/nn/common/NNFWKernels.cpp | 21 +++++++++++++++++++
 src/runtime/ref/nn/common/NNFWKernels.h   | 35 +++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 src/runtime/ref/nn/common/NNFWKernels.cpp
 create mode 100644 src/runtime/ref/nn/common/NNFWKernels.h
diff --git a/src/runtime/ref/nn/common/CMakeLists.txt b/src/runtime/ref/nn/common/CMakeLists.txt
index b2d9503..4d184fb 100644
--- a/src/runtime/ref/nn/common/CMakeLists.txt
+++ b/src/runtime/ref/nn/common/CMakeLists.txt
@@ -13,6 +13,7 @@ SET (CUR_SRCS
      ${CMAKE_CURRENT_SOURCE_DIR}/CpuExecutor.cpp
      ${CMAKE_CURRENT_SOURCE_DIR}/OperationsUtils.cpp
      ${CMAKE_CURRENT_SOURCE_DIR}/Utils.cpp
+     ${CMAKE_CURRENT_SOURCE_DIR}/NNFWKernels.cpp
      ${CMAKE_CURRENT_SOURCE_DIR}/operations/Activation.cpp
      ${CMAKE_CURRENT_SOURCE_DIR}/operations/Conv2D.cpp
      ${CMAKE_CURRENT_SOURCE_DIR}/operations/Concatenation.cpp
diff --git a/src/runtime/ref/nn/common/CpuExecutor.cpp b/src/runtime/ref/nn/common/CpuExecutor.cpp
index 6fa55b2..06b44db 100644
--- a/src/runtime/ref/nn/common/CpuExecutor.cpp
+++ b/src/runtime/ref/nn/common/CpuExecutor.cpp
@@ -21,6 +21,8 @@
 #include "NeuralNetworks.h"
 #include "Operations.h"
 
+#include "NNFWKernels.h"
+
 #include <sys/mman.h>
 
 namespace android {
@@ -142,6 +144,9 @@ int CpuExecutor::run(const Model& model, const Request& request,
     VLOG(CPUEXE) << "request: " << toString(request);
 #endif
 
+    // Prepare NNFW_KERNELS
+    android::nn::init_nnfw_kernels();
+
     mModel = &model;
     mRequest = &request; // TODO check if mRequest is needed
     initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
@@ -544,8 +549,21 @@ int CpuExecutor::executeOperation(const Operation& operation) {
                                       padding_top, padding_bottom,
                                       stride_width, stride_height,
                                       &outShape) &&
-                          setInfoAndAllocateIfNeeded(&output, outShape) &&
-                          convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
+                          setInfoAndAllocateIfNeeded(&output, outShape);
+
+                auto it = nnfw_kernels_convFloat32.end();
+                auto target = std::getenv("NNFW_KERNEL_convFloat32");
+                if (target)
+                { 
+                    it = nnfw_kernels_convFloat32.find(target);
+                }
+                if (it == nnfw_kernels_convFloat32.end())
+                { 
+                    it = nnfw_kernels_convFloat32.find("fallback");
+                }
+                auto func = it->second; 
+
+                success = success && func(reinterpret_cast<const float*>(input.buffer), input.shape(),
                                       reinterpret_cast<const float*>(filter.buffer), filter.shape(),
                                       reinterpret_cast<const float*>(bias.buffer), bias.shape(),
                                       padding_left, padding_right,
diff --git a/src/runtime/ref/nn/common/NNFWKernels.cpp b/src/runtime/ref/nn/common/NNFWKernels.cpp
new file mode 100644
index 0000000..21bd117
--- /dev/null
+++ b/src/runtime/ref/nn/common/NNFWKernels.cpp
@@ -0,0 +1,21 @@
+#include "CpuExecutor.h"
+#include "NeuralNetworks.h"
+#include "Operations.h"
+
+#include "NNFWKernels.h"
+
+#include <map>
+
+namespace android {
+namespace nn {
+
+NNFW_KERNELS_convFloat32 nnfw_kernels_convFloat32;
+
+void init_nnfw_kernels()
+{
+  nnfw_kernels_convFloat32["fallback"] = convFloat32;
+  return;
+}
+
+} // namespace nn
+} // namespace android
diff --git a/src/runtime/ref/nn/common/NNFWKernels.h b/src/runtime/ref/nn/common/NNFWKernels.h
new file mode 100644
index 0000000..a75e2d0
--- /dev/null
+++ b/src/runtime/ref/nn/common/NNFWKernels.h
@@ -0,0 +1,35 @@
+#ifndef __NNFW_KERNELS_H__
+#define __NNFW_KERNELS_H__
+
+#include "CpuExecutor.h"
+#include "NeuralNetworks.h"
+#include "Operations.h"
+
+#include <map>
+
+namespace android {
+namespace nn {
+
+#define NNFW_KERNEL(Name, Ret, Params) \
+    typedef Ret (*KERNEL_##Name) Params; \
+    typedef std::map<std::string, KERNEL_##Name> NNFW_KERNELS_##Name; \
+    extern NNFW_KERNELS_##Name nnfw_kernels_##Name;
+
+// TODO-NNRT: Let's make a sparate list file and include file
+NNFW_KERNEL(convFloat32, bool, 
+            (const float* inputData, const Shape& inputShape,
+                 const float* filterData, const Shape& filterShape,
+                 const float* biasData, const Shape& biasShape,
+                 int32_t padding_left, int32_t padding_right,
+                 int32_t padding_top, int32_t padding_bottom,
+                 int32_t stride_width, int32_t stride_height,
+                 int32_t activation,
+                 float* outputData, const Shape& outputShape)
+           );
+#undef NNFW_KERNEL
+
+void init_nnfw_kernels();
+
+} // namespace nn
+} // namespace android
+#endif // __NNFW_KERNELS_H__
-- 
2.7.4