Refactor: Don't include all ComputeLibrary function definitions everywhere.
authorMatthew Bentham <matthew.bentham@arm.com>
Tue, 8 Jan 2019 17:52:37 +0000 (17:52 +0000)
committerMatthew Bentham <matthew.bentham@arm.com>
Tue, 8 Jan 2019 18:00:12 +0000 (18:00 +0000)
Just include the function definition that is specifically needed for each workload.
Also, tighten up the scope where Compute Library functions are available.

Knocks about 30seconds off a 4m30s single-threaded compile of the Neon workloads.

Change-Id: Idac438f3bc77ff978295fbc9505cb42447def145

35 files changed:
src/backends/neon/workloads/NeonActivationWorkload.cpp
src/backends/neon/workloads/NeonActivationWorkload.hpp
src/backends/neon/workloads/NeonAdditionWorkload.cpp
src/backends/neon/workloads/NeonAdditionWorkload.hpp
src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp
src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp
src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
src/backends/neon/workloads/NeonFloorFloatWorkload.hpp
src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp
src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp
src/backends/neon/workloads/NeonMergerWorkload.cpp
src/backends/neon/workloads/NeonMergerWorkload.hpp
src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp
src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp
src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp
src/backends/neon/workloads/NeonPooling2dWorkload.cpp
src/backends/neon/workloads/NeonPooling2dWorkload.hpp
src/backends/neon/workloads/NeonReshapeWorkload.cpp
src/backends/neon/workloads/NeonReshapeWorkload.hpp
src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp
src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp
src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp
src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp
src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp
src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp
src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp
src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp
src/backends/neon/workloads/NeonWorkloadUtils.hpp

index 6e95678..c75a138 100644 (file)
@@ -4,8 +4,11 @@
 //
 
 #include "NeonActivationWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeUtils.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEActivationLayer.h>
+
 namespace armnn
 {
 
@@ -43,13 +46,16 @@ NeonActivationWorkload::NeonActivationWorkload(const ActivationQueueDescriptor&
     arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
-    m_ActivationLayer.configure(&input, &output, activationLayerInfo);
+    auto layer = std::make_unique<arm_compute::NEActivationLayer>();
+    layer->configure(&input, &output, activationLayerInfo);
+
+    m_ActivationLayer.reset(layer.release());
 }
 
 void NeonActivationWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationWorkload_Execute");
-    m_ActivationLayer.run();
+    m_ActivationLayer->run();
 }
 
 } //namespace armnn
index fc7c646..eefbfb6 100644 (file)
@@ -5,7 +5,10 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
 
 namespace armnn
 {
@@ -21,7 +24,7 @@ public:
     void Execute() const override;
 
 private:
-    mutable arm_compute::NEActivationLayer m_ActivationLayer;
+    std::unique_ptr<arm_compute::IFunction> m_ActivationLayer;
 };
 
 } //namespace armnn
index 70a3909..fa53781 100644 (file)
@@ -4,9 +4,13 @@
 //
 
 #include "NeonAdditionWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h>
+
 namespace armnn
 {
 
@@ -35,13 +39,15 @@ NeonAdditionWorkload::NeonAdditionWorkload(const AdditionQueueDescriptor& descri
     arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
-    m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+    auto layer = std::make_unique<arm_compute::NEArithmeticAddition>();
+    layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+    m_AddLayer.reset(layer.release());
 }
 
 void NeonAdditionWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionWorkload_Execute");
-    m_AddLayer.run();
+    m_AddLayer->run();
 }
 
 } //namespace armnn
index ca8ae8d..826fb1f 100644 (file)
@@ -5,7 +5,10 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
 
 namespace armnn
 {
@@ -21,7 +24,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NEArithmeticAddition m_AddLayer;
+    std::unique_ptr<arm_compute::IFunction> m_AddLayer;
 };
 
 } //namespace armnn
index 44d5035..fc80f41 100644 (file)
@@ -4,9 +4,13 @@
 //
 
 #include "NeonBatchNormalizationWorkload.hpp"
+
+#include "NeonWorkloadUtils.hpp"
+
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
-#include <armnn/ArmNN.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h>
 
 namespace armnn
 {
@@ -68,13 +72,15 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
     m_Beta = std::make_unique<arm_compute::Tensor>();
     BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo());
 
-    m_Layer.configure(&input,
-                      &output,
-                      m_Mean.get(),
-                      m_Variance.get(),
-                      m_Beta.get(),
-                      m_Gamma.get(),
-                      m_Data.m_Parameters.m_Eps);
+    auto layer = std::make_unique<arm_compute::NEBatchNormalizationLayer>();
+    layer->configure(&input,
+                     &output,
+                     m_Mean.get(),
+                     m_Variance.get(),
+                     m_Beta.get(),
+                     m_Gamma.get(),
+                     m_Data.m_Parameters.m_Eps);
+    m_Layer.reset(layer.release());
 
     InitializeArmComputeTensorData(*m_Mean, m_Data.m_Mean);
     InitializeArmComputeTensorData(*m_Variance, m_Data.m_Variance);
@@ -83,14 +89,14 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
 
     // Force Compute Library to perform the necessary copying and reshaping, after which
     // delete all the input tensors that will no longer be needed
-    m_Layer.prepare();
+    m_Layer->prepare();
     FreeUnusedTensors();
 }
 
 void NeonBatchNormalizationWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationWorkload_Execute");
-    m_Layer.run();
+    m_Layer->run();
 }
 
 void NeonBatchNormalizationWorkload::FreeUnusedTensors()
index 52e4db7..3619ea0 100644 (file)
@@ -5,7 +5,12 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/Tensor.h>
+
+#include <memory>
 
 namespace armnn
 {
@@ -26,7 +31,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NEBatchNormalizationLayer m_Layer;
+    std::unique_ptr<arm_compute::IFunction> m_Layer;
 
     std::unique_ptr<arm_compute::Tensor> m_Mean;
     std::unique_ptr<arm_compute::Tensor> m_Variance;
index 151132f..1080f32 100644 (file)
@@ -7,7 +7,9 @@
 
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
-#include <neon/NeonLayerSupport.hpp>
+#include <neon/workloads/NeonWorkloadUtils.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
 
 #include <armnn/Types.hpp>
 #include <Half.hpp>
index daf9a43..3fb408d 100644 (file)
@@ -5,12 +5,10 @@
 
 #pragma once
 
-#include <aclCommon/ArmComputeTensorUtils.hpp>
-#include <backendsCommon/CpuTensorHandle.hpp>
-#include <neon/NeonLayerSupport.hpp>
-#include <neon/workloads/NeonWorkloadUtils.hpp>
 #include <backendsCommon/Workload.hpp>
 
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/Tensor.h>
 #include <arm_compute/runtime/MemoryManagerOnDemand.h>
 
 #include <memory>
index be26359..c915555 100644 (file)
@@ -5,11 +5,18 @@
 
 #include "NeonDepthwiseConvolutionWorkload.hpp"
 
+#include "NeonWorkloadUtils.hpp"
+
+#include <DataLayoutIndexed.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <neon/NeonLayerSupport.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <backendsCommon/WorkloadUtils.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
+
+using namespace armnnUtils;
+
 namespace armnn
 {
 
index b5f2ae9..85932d3 100644 (file)
@@ -5,7 +5,12 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/Tensor.h>
+
+#include <memory>
 
 namespace armnn
 {
index a08ba8a..f024fef 100644 (file)
@@ -5,6 +5,12 @@
 
 #include "NeonFloorFloatWorkload.hpp"
 
+#include "NeonWorkloadUtils.hpp"
+
+#include <arm_compute/runtime/NEON/functions/NEFloor.h>
+
+#include <boost/polymorphic_cast.hpp>
+
 namespace armnn
 {
 NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor,
@@ -16,13 +22,15 @@ NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descr
     arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
-    m_Layer.configure(&input, &output);
+    auto layer = std::make_unique<arm_compute::NEFloor>();
+    layer->configure(&input, &output);
+    m_Layer.reset(layer.release());
 }
 
 void NeonFloorFloatWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute");
-    m_Layer.run();
+    m_Layer->run();
 }
 } //namespace armnn
 
index a4ce476..01b86a6 100644 (file)
@@ -5,7 +5,12 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/Tensor.h>
+
+#include <memory>
 
 namespace armnn
 {
@@ -17,7 +22,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NEFloor m_Layer;
+    std::unique_ptr<arm_compute::IFunction> m_Layer;
 };
 
 } //namespace armnn
index e432a6b..7395270 100644 (file)
@@ -5,10 +5,13 @@
 
 #include "NeonFullyConnectedWorkload.hpp"
 
+#include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
+
 namespace armnn
 {
 using namespace armcomputetensorutils;
@@ -45,7 +48,6 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
 NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor,
     const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
     : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
-    , m_FullyConnectedLayer(memoryManager)
 {
     m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1);
 
@@ -64,7 +66,10 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
     // Construct
     arm_compute::FullyConnectedLayerInfo fc_info;
     fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix;
-    m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
+
+    auto layer = std::make_unique<arm_compute::NEFullyConnectedLayer>(memoryManager);
+    layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
+    m_FullyConnectedLayer.reset(layer.release());
 
     // Allocate
     if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8)
@@ -90,14 +95,14 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
 
     // Force Compute Library to perform the necessary copying and reshaping, after which
     // delete all the input tensors that will no longer be needed
-    m_FullyConnectedLayer.prepare();
+    m_FullyConnectedLayer->prepare();
     FreeUnusedTensors();
 }
 
 void NeonFullyConnectedWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute");
-    m_FullyConnectedLayer.run();
+    m_FullyConnectedLayer->run();
 }
 
 void NeonFullyConnectedWorkload::FreeUnusedTensors()
index ec1661d..1cd8be1 100644 (file)
@@ -5,9 +5,12 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
 
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
 #include <arm_compute/runtime/MemoryManagerOnDemand.h>
+#include <arm_compute/runtime/Tensor.h>
 
 #include <memory>
 
@@ -28,7 +31,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer;
+    std::unique_ptr<arm_compute::IFunction> m_FullyConnectedLayer;
 
     std::unique_ptr<arm_compute::Tensor> m_WeightsTensor;
     std::unique_ptr<arm_compute::Tensor> m_BiasesTensor;
index afaa700..99bbcfa 100644 (file)
@@ -4,8 +4,13 @@
 //
 
 #include "NeonL2NormalizationFloatWorkload.hpp"
+
+#include "NeonWorkloadUtils.hpp"
+
 #include <aclCommon/ArmComputeUtils.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h>
+
 namespace armnn
 {
 using namespace armcomputetensorutils;
@@ -25,7 +30,6 @@ arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input,
 NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor,
     const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
     : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
-    , m_Layer(memoryManager)
 {
     m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1);
 
@@ -38,13 +42,15 @@ NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2Norma
 
     unsigned int axis = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
 
-    m_Layer.configure(&input, &output, axis);
+    auto layer = std::make_unique<arm_compute::NEL2NormalizeLayer>(memoryManager);
+    layer->configure(&input, &output, axis);
+    m_Layer.reset(layer.release());
 }
 
 void NeonL2NormalizationFloatWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute");
-    m_Layer.run();
+    m_Layer->run();
 }
 
 } //namespace armnn
index 30058c5..2a8eb38 100644 (file)
@@ -5,7 +5,10 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
 #include <arm_compute/runtime/MemoryManagerOnDemand.h>
 
 #include <memory>
@@ -25,7 +28,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NEL2NormalizeLayer m_Layer;
+    std::unique_ptr<arm_compute::IFunction> m_Layer;
 };
 
 } //namespace armnn
index f82e244..be096b4 100644 (file)
@@ -4,11 +4,14 @@
 //
 
 #include "NeonMergerWorkload.hpp"
-#include <armnn/ArmNN.hpp>
+
+#include "NeonWorkloadUtils.hpp"
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/NeonTensorHandle.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEConcatenateLayer.h>
 
 namespace armnn
 {
@@ -66,9 +69,11 @@ const MergerQueueDescriptor& descriptor, const WorkloadInfo& info)
 
     arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH;
 
-    m_Layer.configure(aclInputs, &output, aclAxis);
+    auto layer = std::make_unique<arm_compute::NEConcatenateLayer>();
+    layer->configure(aclInputs, &output, aclAxis);
+    m_Layer.reset(layer.release());
 
-    m_Layer.prepare();
+    m_Layer->prepare();
 }
 
 void NeonMergerWorkload::Execute() const
@@ -76,7 +81,7 @@ void NeonMergerWorkload::Execute() const
     if (m_Execute)
     {
         ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerWorkload_Execute");
-        m_Layer.run();
+        m_Layer->run();
     }
 }
 
index a4f36d1..3432c62 100644 (file)
@@ -6,7 +6,11 @@
 #pragma once
 
 #include <backendsCommon/Workload.hpp>
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
+#
+#include <memory>
 
 namespace armnn
 {
@@ -23,7 +27,7 @@ public:
     void Execute() const override;
 
 private:
-    mutable arm_compute::NEConcatenateLayer m_Layer;
+    std::unique_ptr<arm_compute::IFunction> m_Layer;
     bool m_Execute;
 
 };
index c4241ec..778e782 100644 (file)
@@ -5,6 +5,9 @@
 
 #include "NeonMultiplicationFloatWorkload.hpp"
 
+#include "NeonWorkloadUtils.hpp"
+
+#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
 
 namespace armnn
 {
@@ -41,18 +44,20 @@ NeonMultiplicationFloatWorkload::NeonMultiplicationFloatWorkload(const Multiplic
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
     // ignored for F32 tensors.
-    m_PixelWiseMultiplication.configure(&input1,
-                                        &input2,
-                                        &output,
-                                        1.0f,
-                                        arm_compute::ConvertPolicy::SATURATE,
-                                        arm_compute::RoundingPolicy::TO_ZERO);
+    auto layer = std::make_unique<arm_compute::NEPixelWiseMultiplication>();
+    layer->configure(&input1,
+                     &input2,
+                     &output,
+                     1.0f,
+                     arm_compute::ConvertPolicy::SATURATE,
+                     arm_compute::RoundingPolicy::TO_ZERO);
+    m_PixelWiseMultiplication.reset(layer.release());
 }
 
 void NeonMultiplicationFloatWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloatWorkload_Execute");
-    m_PixelWiseMultiplication.run();
+    m_PixelWiseMultiplication->run();
 }
 
 } //namespace armnn
index 8fa3171..a65ad4e 100644 (file)
@@ -5,7 +5,12 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include <memory>
 
 namespace armnn
 {
@@ -20,7 +25,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication;
+    std::unique_ptr<arm_compute::IFunction> m_PixelWiseMultiplication;
 };
 
 } //namespace armnn
index 854ecd3..92c0396 100644 (file)
@@ -4,10 +4,13 @@
 //
 
 #include "NeonNormalizationFloatWorkload.hpp"
-#include <neon/NeonLayerSupport.hpp>
+
+#include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NENormalizationLayer.h>
+
 using namespace armnn::armcomputetensorutils;
 
 namespace armnn
@@ -57,7 +60,6 @@ NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const Normalizati
                                                    const WorkloadInfo& info,
                                                    std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
     : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
-    , m_NormalizationLayer(memoryManager)
 {
     m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1);
     std::string reasonIfUnsupported;
@@ -89,14 +91,15 @@ NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const Normalizati
                                                           m_Data.m_Parameters.m_Beta,
                                                           m_Data.m_Parameters.m_K,
                                                           false);
-
-    m_NormalizationLayer.configure(&input, &output, normalizationInfo);
+    auto layer = std::make_unique<arm_compute::NENormalizationLayer>(memoryManager);
+    layer->configure(&input, &output, normalizationInfo);
+    m_NormalizationLayer.reset(layer.release());
 }
 
 void NeonNormalizationFloatWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute");
-    m_NormalizationLayer.run();
+    m_NormalizationLayer->run();
 }
 
 } //namespace armnn
index 89eba57..17bbeb4 100644 (file)
@@ -5,9 +5,14 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
 #include <arm_compute/runtime/MemoryManagerOnDemand.h>
 
+#include <memory>
+
 namespace armnn
 {
 
@@ -23,7 +28,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NENormalizationLayer m_NormalizationLayer;
+    std::unique_ptr<arm_compute::IFunction> m_NormalizationLayer;
 };
 
 } //namespace armnn
index 9c8f71a..75bceb1 100644 (file)
@@ -4,11 +4,15 @@
 //
 
 #include "NeonPooling2dWorkload.hpp"
-#include <neon/NeonLayerSupport.hpp>
+
+#include "NeonWorkloadUtils.hpp"
+
 #include <neon/NeonTensorHandle.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
+
 namespace armnn
 {
 using namespace armcomputetensorutils;
@@ -42,13 +46,15 @@ NeonPooling2dWorkload::NeonPooling2dWorkload(
 
     arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters);
 
-    m_PoolingLayer.configure(&input, &output, layerInfo);
+    auto layer = std::make_unique<arm_compute::NEPoolingLayer>();
+    layer->configure(&input, &output, layerInfo);
+    m_PoolingLayer.reset(layer.release());
 }
 
 void NeonPooling2dWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dWorkload_Execute");
-    m_PoolingLayer.run();
+    m_PoolingLayer->run();
 }
 
 } //namespace armnn
index b2379f7..b0e3aa8 100644 (file)
@@ -5,7 +5,12 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include <memory>
 
 namespace armnn
 {
@@ -24,7 +29,7 @@ public:
     void Execute() const override;
 
 private:
-    mutable arm_compute::NEPoolingLayer m_PoolingLayer;
+    std::unique_ptr<arm_compute::IFunction> m_PoolingLayer;
 };
 
 } //namespace armnn
index c2dcdd5..40fbef6 100644 (file)
@@ -5,6 +5,12 @@
 
 #include "NeonReshapeWorkload.hpp"
 
+#include "NeonWorkloadUtils.hpp"
+
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+
+#include <boost/polymorphic_cast.hpp>
+
 namespace armnn
 {
 
@@ -17,13 +23,15 @@ NeonReshapeWorkload::NeonReshapeWorkload(const ReshapeQueueDescriptor& descripto
     arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
-    m_Layer.configure(&input, &output);
+    auto layer = std::make_unique<arm_compute::NEReshapeLayer>();
+    layer->configure(&input, &output);
+    m_Layer.reset(layer.release());
 }
 
 void NeonReshapeWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeWorkload_Execute");
-    m_Layer.run();
+    m_Layer->run();
 }
 
 } //namespace armnn
index 38b6c51..2202463 100644 (file)
@@ -5,7 +5,11 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include <memory>
 
 namespace armnn
 {
@@ -18,7 +22,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NEReshapeLayer m_Layer;
+    std::unique_ptr<arm_compute::IFunction> m_Layer;
 };
 
 } //namespace armnn
index 434de87..b229bc4 100644 (file)
@@ -7,6 +7,8 @@
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
+
 namespace armnn
 {
 
index 6e96c2d..6eecb97 100644 (file)
@@ -5,7 +5,8 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <armnn/Descriptors.hpp>
+#include <arm_compute/core/Error.h>
 
 namespace armnn
 {
index 92e5139..d9c78bb 100644 (file)
@@ -5,13 +5,16 @@
 
 #include "NeonSoftmaxFloatWorkload.hpp"
 
+#include "NeonWorkloadUtils.hpp"
+
+#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
+
 namespace armnn
 {
 
 NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor,
     const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
     : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info)
-    , m_SoftmaxLayer(memoryManager)
 {
     m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1);
 
@@ -19,13 +22,15 @@ NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor&
     arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
-    m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta);
+    auto layer = std::make_unique<arm_compute::NESoftmaxLayer>(memoryManager);
+    layer->configure(&input, &output, m_Data.m_Parameters.m_Beta);
+    m_SoftmaxLayer.reset(layer.release());
 }
 
 void NeonSoftmaxFloatWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloatWorkload_Execute");
-    m_SoftmaxLayer.run();
+    m_SoftmaxLayer->run();
 }
 
 } //namespace armnn
index 9c11b27..77f2cc3 100644 (file)
@@ -5,7 +5,9 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/IFunction.h>
 #include <arm_compute/runtime/MemoryManagerOnDemand.h>
 
 #include <memory>
@@ -21,7 +23,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer;
+    std::unique_ptr<arm_compute::IFunction> m_SoftmaxLayer;
 };
 
 } //namespace armnn
index cff869c..f780589 100644 (file)
@@ -5,6 +5,10 @@
 
 #include "NeonSoftmaxUint8Workload.hpp"
 
+#include "NeonWorkloadUtils.hpp"
+
+#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
+
 namespace armnn
 {
 
@@ -12,7 +16,6 @@ NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor&
                                                    const WorkloadInfo& info,
                                                    std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
     : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info)
-    , m_SoftmaxLayer(memoryManager)
 {
     m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1);
 
@@ -27,14 +30,16 @@ NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor&
             "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported");
     }
 
-    m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta);
+    auto layer = std::make_unique<arm_compute::NESoftmaxLayer>(memoryManager);
+    layer->configure(&input, &output, descriptor.m_Parameters.m_Beta);
+    m_SoftmaxLayer.reset(layer.release());
 }
 
 void NeonSoftmaxUint8Workload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute");
 
-    m_SoftmaxLayer.run();
+    m_SoftmaxLayer->run();
 }
 
 } //namespace armnn
index b3bcbf3..c569208 100644 (file)
@@ -5,9 +5,13 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/IFunction.h>
 #include <arm_compute/runtime/MemoryManagerOnDemand.h>
 
+#include <memory>
+
 namespace armnn
 {
 
@@ -19,7 +23,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer;
+    std::unique_ptr<arm_compute::IFunction> m_SoftmaxLayer;
 };
 
 } //namespace armnn
index 1eae0a4..e39f8aa 100644 (file)
@@ -4,9 +4,13 @@
 //
 
 #include "NeonSubtractionFloatWorkload.hpp"
+
+#include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
+
 namespace armnn
 {
 
@@ -34,13 +38,15 @@ NeonSubtractionFloatWorkload::NeonSubtractionFloatWorkload(const SubtractionQueu
     arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
-    m_SubLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+    auto layer = std::make_unique<arm_compute::NEArithmeticSubtraction>();
+    layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+    m_SubLayer.reset(layer.release());
 }
 
 void NeonSubtractionFloatWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionFloatWorkload_Execute");
-    m_SubLayer.run();
+    m_SubLayer->run();
 }
 
 } //namespace armnn
index 0901699..5dce112 100644 (file)
@@ -5,7 +5,12 @@
 
 #pragma once
 
-#include <neon/workloads/NeonWorkloadUtils.hpp>
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include <memory>
 
 namespace armnn
 {
@@ -21,7 +26,7 @@ public:
     virtual void Execute() const override;
 
 private:
-    mutable arm_compute::NEArithmeticSubtraction m_SubLayer;
+    std::unique_ptr<arm_compute::IFunction> m_SubLayer;
 };
 
 } //namespace armnn
index 17e14cd..22ffece 100644 (file)
@@ -9,7 +9,6 @@
 #include <neon/NeonTensorHandle.hpp>
 #include <neon/NeonTimer.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
-#include <arm_compute/runtime/NEON/NEFunctions.h>
 
 #include <Half.hpp>