[neurun] Enable {CL|NE}DepthwiseConvolutionLayer3x3 on KernelGenerator (#6008)
author김용섭/On-Device Lab(SR)/Engineer/삼성전자 <yons.kim@samsung.com>
Tue, 30 Jul 2019 04:40:52 +0000 (13:40 +0900)
committer오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Tue, 30 Jul 2019 04:40:52 +0000 (13:40 +0900)
Enable {CL|NE}DepthwiseConvolutionLayer3x3 on each KernelGenerator. The
only CLDepthwiseConvolutionLayer3x3 has the parameter for the internal
buffer memory manager.

Signed-off-by: Yongseop Kim <yons.kim@samsung.com>
runtimes/neurun/backend/acl_cl/KernelGenerator.cc
runtimes/neurun/backend/acl_neon/KernelGenerator.cc

index 5ae4b7a..376b256 100644 (file)
@@ -252,12 +252,25 @@ void KernelGenerator::visit(const model::operation::DepthwiseConv2DNode &node)
   // TODO Use `activation` instead of `model::Activation::NONE`. See below.
   const auto act_info = acl_common::asActivationLayerInfo(model::Activation::NONE);
 
-  auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+  if (ker_height == 3 && ker_width == 3)
+  {
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer3x3>(
+        _tensor_builder->acl_memory_manager()->internal_buffer_manager());
 
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
-                conv_info, multiplier, act_info);
+    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
+                  ofm_alloc->handle(), conv_info, multiplier, act_info);
 
-  _execution_builder->append(asAclFunction(std::move(fn)));
+    _execution_builder->append(asAclFunction(std::move(fn)));
+  }
+  else
+  {
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+
+    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
+                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+
+    _execution_builder->append(asAclFunction(std::move(fn)));
+  }
 
   // TODO Use fused activation instead of separate layer after switching to ACL version >= v19.05.
   // Prior versions had a bug due to which the fused activation did not apply in some cases.
index 40743e9..6997d25 100644 (file)
@@ -263,12 +263,24 @@ void KernelGenerator::visit(const model::operation::DepthwiseConv2DNode &node)
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-  auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+  if (ker_height == 3 && ker_width == 3)
+  {
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer3x3>();
 
-  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
-                conv_info, multiplier, act_info);
+    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
+                  ofm_alloc->handle(), conv_info, multiplier, act_info);
 
-  _execution_builder->append(asAclFunction(std::move(fn)));
+    _execution_builder->append(asAclFunction(std::move(fn)));
+  }
+  else
+  {
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+
+    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
+                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+
+    _execution_builder->append(asAclFunction(std::move(fn)));
+  }
 }
 
 void KernelGenerator::visit(const model::operation::MaxPool2DNode &node)