Apply insertFusedActivationLayer to NEON layers (#1083)
author김정현/동작제어Lab(SR)/Senior Engineer/삼성전자 <jh0822.kim@samsung.com>
Fri, 4 May 2018 01:21:21 +0000 (10:21 +0900)
committerGitHub Enterprise <noreply-CODE@samsung.com>
Fri, 4 May 2018 01:21:21 +0000 (10:21 +0900)
This commit applies insertFusedActivationLayer to NEON layers
such as Conv2D and Pooling.

Signed-off-by: Junghyun Kim <jh0822.kim@samsung.com>
libs/kernel/acl/src/neon/Conv2D.cpp
libs/kernel/acl/src/neon/Pooling.cpp

index 452a167..679ecfc 100644 (file)
@@ -62,8 +62,6 @@ bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
   NEUniqueTensor bias(arm_compute::TensorInfo(bias_shape, arm_compute::Format::F32));
   NEUniqueTensor filter(arm_compute::TensorInfo(filter_shape, arm_compute::Format::F32));
 
-  assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU);
-
   std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
 
   auto conv_f = std::make_shared<arm_compute::NEConvolutionLayer>();
@@ -72,17 +70,7 @@ bool convFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
 
   fns.emplace_back(conv_f);
 
-  if (ANEURALNETWORKS_FUSED_RELU == activation)
-  {
-    auto relu_f = std::make_shared<arm_compute::NEActivationLayer>();
-
-    const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
-    // Do in-place update
-    relu_f->configure(output.ptr(), nullptr, relu_info);
-
-    fns.emplace_back(relu_f);
-  }
+  util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
 
   input.allocate();
   output.allocate();
index 7816c64..5c58ae0 100644 (file)
@@ -39,8 +39,6 @@ bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
   arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
   arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
 
-  assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU);
-
   std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
 
   arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
@@ -60,18 +58,8 @@ bool maxPoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShape,
 
   fns.emplace_back(pool_f);
 
-  if (ANEURALNETWORKS_FUSED_RELU == activation)
-  {
-    auto relu_f = std::make_shared<arm_compute::NEActivationLayer>();
-
-    const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+  util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
 
-    // Do in-place update
-    relu_f->configure(output.ptr(), nullptr, relu_info);
-
-    fns.emplace_back(relu_f);
-  }
-  
   input.allocate();
   output.allocate();
 
@@ -98,8 +86,6 @@ bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShap
   arm_compute::TensorShape input_shape = util::fromNNShape(inputShape);
   arm_compute::TensorShape output_shape = util::fromNNShape(outputShape);
 
-  assert(activation == ANEURALNETWORKS_FUSED_NONE || activation == ANEURALNETWORKS_FUSED_RELU);
-
   std::vector<std::shared_ptr<arm_compute::IFunction>> fns;
 
   arm_compute::PadStrideInfo pad_info = arm_compute::PadStrideInfo(stride_width, stride_height,
@@ -119,17 +105,7 @@ bool averagePoolFloat32(const float* inputData, const nnfw::rt::Shape& inputShap
 
   fns.emplace_back(pool_f);
 
-  if (ANEURALNETWORKS_FUSED_RELU == activation)
-  {
-    auto relu_f = std::make_shared<arm_compute::NEActivationLayer>();
-
-    const arm_compute::ActivationLayerInfo relu_info{arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-
-    // Do in-place update
-    relu_f->configure(output.ptr(), nullptr, relu_info);
-
-    fns.emplace_back(relu_f);
-  }
+  util::insertFusedActivationLayer<NEUniqueTensor, arm_compute::NEActivationLayer>(output, activation, fns);
 
   input.allocate();
   output.allocate();