MLCE-82 Add Neon Mean support and unit tests
authorMatthew Bentham <matthew.bentham@arm.com>
Mon, 31 Dec 2018 15:49:42 +0000 (15:49 +0000)
committerEanna O Cathain Arm <eanna.ocathain@arm.com>
Wed, 2 Jan 2019 11:44:02 +0000 (11:44 +0000)
Factor out new BuildArmComputeReductionCoordinates function
from CL backend into ArmComputeTensorUtils.

Update NEON LayerSupport and WorkloadFactory objects

Change-Id: Icc975ec699199bffafbdb207323df509d35e1e04

src/backends/aclCommon/ArmComputeTensorUtils.cpp
src/backends/aclCommon/ArmComputeTensorUtils.hpp
src/backends/cl/workloads/ClMeanWorkload.cpp
src/backends/neon/NeonLayerSupport.cpp
src/backends/neon/NeonWorkloadFactory.cpp
src/backends/neon/backend.mk
src/backends/neon/test/NeonLayerTests.cpp
src/backends/neon/workloads/CMakeLists.txt
src/backends/neon/workloads/NeonMeanWorkload.cpp [new file with mode: 0644]
src/backends/neon/workloads/NeonMeanWorkload.hpp [new file with mode: 0644]
src/backends/neon/workloads/NeonWorkloads.hpp

index 6b55948..a2d7d8c 100644 (file)
@@ -31,6 +31,48 @@ arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType)
     }
 }
 
+arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensions,
+                                                             unsigned int originalInputRank,
+                                                             const std::vector<unsigned int>& armnnAxes)
+{
+    arm_compute::Coordinates outAclCoords;
+
+    if (armnnAxes.empty())
+    {
+        // If no reduction axes were provided, then the input must be reduced along all dimensions.
+        // Since Compute Library does not accept an empty vector as the reduction dimensions, we then
+        // manually create a vector including all the input dimensions (in reversed order) as:
+        //
+        // { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
+        //
+        outAclCoords.set_num_dimensions(inputDimensions);
+        std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
+    }
+    else
+    {
+        // Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
+        //
+        // Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
+        // dimension correction).
+        // For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
+        // new value for that reduction axis should be 1.
+        //
+        // Example:
+        // ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
+        // ArmNN reduction axis = { 2 }       -> ACL reduction axis = { 1 }
+        // ArmNN reduction axis = { 3 }       -> ACL reduction axis = { 0 }
+        //
+        // The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
+        //
+        outAclCoords.set_num_dimensions(armnnAxes.size());
+        std::transform(armnnAxes.begin(), armnnAxes.end(),
+                       outAclCoords.begin(),
+                       [originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
+    }
+
+    return outAclCoords;
+}
+
 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape)
 {
     arm_compute::TensorShape shape;
index 2a14d65..fbd850c 100644 (file)
@@ -24,6 +24,11 @@ namespace armcomputetensorutils
 /// Utility function to map an armnn::DataType to corresponding arm_compute::DataType.
 arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType);
 
+/// Utility function used to set up an arm_compute::Coordinates from a vector of ArmNN Axes for reduction functions
+arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensions,
+                                                             unsigned int originalInputRank,
+                                                             const std::vector<unsigned int>& armnnAxes);
+
 /// Utility function used to setup an arm_compute::TensorShape object from an armnn::TensorShape.
 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape);
 
index 960fca2..470b6a8 100644 (file)
 
 #include "ClWorkloadUtils.hpp"
 
-namespace
-{
-
-void ConvertArmnnAxesToAclCoordinates(size_t inputDimensions,
-                                      unsigned int originalInputRank,
-                                      const std::vector<unsigned int>& armnnAxes,
-                                      arm_compute::Coordinates& outAclCoords)
-{
-    if (armnnAxes.empty())
-    {
-        // If no reduction axes were provided, then the input must be reduced along all dimensions.
-        // Since arm_compute::CLReduceMean does not accept an empty vector as the reduction dimensions, we then
-        // manually create a vector including all the input dimensions (in reversed order) as:
-        //
-        // { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
-        //
-        outAclCoords.set_num_dimensions(inputDimensions);
-        std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
-    }
-    else
-    {
-        // Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
-        //
-        // Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
-        // dimension correction).
-        // For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
-        // new value for that reduction axis should be 1.
-        //
-        // Example:
-        // ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
-        // ArmNN reduction axis = { 2 }       -> ACL reduction axis = { 1 }
-        // ArmNN reduction axis = { 3 }       -> ACL reduction axis = { 0 }
-        //
-        // The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
-        //
-        outAclCoords.set_num_dimensions(armnnAxes.size());
-        std::transform(armnnAxes.begin(), armnnAxes.end(),
-                       outAclCoords.begin(),
-                       [originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
-    }
-}
-
-} // anonymous namespace
-
 namespace armnn
 {
 using namespace armcomputetensorutils;
@@ -65,11 +21,9 @@ arm_compute::Status ClMeanValidate(const TensorInfo& input,
     const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
 
-    arm_compute::Coordinates coords;
-    ConvertArmnnAxesToAclCoordinates(aclInputInfo.num_dimensions(),
-                                     input.GetNumDimensions(),
-                                     desc.m_Axis,
-                                     coords);
+    arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
+                                                                          input.GetNumDimensions(),
+                                                                          desc.m_Axis);
 
     return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
 }
@@ -82,11 +36,9 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const Work
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
-    arm_compute::Coordinates coords;
-    ConvertArmnnAxesToAclCoordinates(input.info()->num_dimensions(),
-                                     info.m_InputTensorInfos[0].GetNumDimensions(),
-                                     m_Data.m_Parameters.m_Axis,
-                                     coords);
+    arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(),
+                                                                          info.m_InputTensorInfos[0].GetNumDimensions(),
+                                                                          m_Data.m_Parameters.m_Axis);
 
     m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output);
 }
index 7efdf15..93c1123 100644 (file)
@@ -24,6 +24,7 @@
 #include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
 #include "workloads/NeonL2NormalizationFloatWorkload.hpp"
 #include "workloads/NeonMaximumWorkload.hpp"
+#include "workloads/NeonMeanWorkload.hpp"
 #include "workloads/NeonMergerWorkload.hpp"
 #include "workloads/NeonMultiplicationFloatWorkload.hpp"
 #include "workloads/NeonNormalizationFloatWorkload.hpp"
@@ -364,11 +365,11 @@ bool NeonLayerSupport::IsMeanSupported(const TensorInfo& input,
                                        const MeanDescriptor& descriptor,
                                        Optional<std::string&> reasonIfUnsupported) const
 {
-    ignore_unused(input);
-    ignore_unused(output);
-    ignore_unused(descriptor);
-    ignore_unused(reasonIfUnsupported);
-    return false;
+    FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMeanWorkloadValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   output,
+                                   descriptor);
 }
 
 bool NeonLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs,
index 85e5768..e635f0c 100644 (file)
@@ -273,7 +273,7 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMaximum(const MaximumQueue
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
                                                            const WorkloadInfo& info) const
 {
-    return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
+    return std::make_unique<NeonMeanWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
index fdfd696..d4f414e 100644 (file)
@@ -26,6 +26,7 @@ BACKEND_SOURCES := \
         workloads/NeonL2NormalizationFloatWorkload.cpp \
         workloads/NeonLstmFloatWorkload.cpp \
         workloads/NeonMaximumWorkload.cpp \
+        workloads/NeonMeanWorkload.cpp \
         workloads/NeonMergerWorkload.cpp \
         workloads/NeonMultiplicationFloatWorkload.cpp \
         workloads/NeonNormalizationFloatWorkload.cpp \
index 37933e0..5b83b2b 100644 (file)
@@ -399,6 +399,21 @@ ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgNoPeepholeNoProjection,
 ARMNN_AUTO_TEST_CASE(LstmLayerFloat32NoCifgWithPeepholeWithProjection,
                      LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest)
 
+// Mean
+ARMNN_AUTO_TEST_CASE(MeanUint8Simple, MeanUint8SimpleTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8SimpleAxis, MeanUint8SimpleAxisTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8KeepDims, MeanUint8KeepDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8MultipleDims, MeanUint8MultipleDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanVtsUint8, MeanVtsUint8Test)
+
+ARMNN_AUTO_TEST_CASE(MeanFloatSimple, MeanFloatSimpleTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatSimpleAxis, MeanFloatSimpleAxisTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatKeepDims, MeanFloatKeepDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatMultipleDims, MeanFloatMultipleDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat1, MeanVtsFloat1Test)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat2, MeanVtsFloat2Test)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat3, MeanVtsFloat3Test)
+
 // Max
 ARMNN_AUTO_TEST_CASE(SimpleMaximum, MaximumSimpleTest)
 ARMNN_AUTO_TEST_CASE(MaximumBroadcast1Element, MaximumBroadcast1ElementTest)
index 7b0251c..b7dfc3f 100644 (file)
@@ -30,6 +30,8 @@ list(APPEND armnnNeonBackendWorkloads_sources
     NeonLstmFloatWorkload.hpp
     NeonMaximumWorkload.cpp
     NeonMaximumWorkload.hpp
+    NeonMeanWorkload.cpp
+    NeonMeanWorkload.hpp
     NeonMergerWorkload.cpp
     NeonMergerWorkload.hpp
     NeonMultiplicationFloatWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonMeanWorkload.cpp b/src/backends/neon/workloads/NeonMeanWorkload.cpp
new file mode 100644 (file)
index 0000000..d736e42
--- /dev/null
@@ -0,0 +1,53 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonMeanWorkload.hpp"
+
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <neon/NeonTensorHandle.hpp>
+
+#include "NeonWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status NeonMeanWorkloadValidate(const TensorInfo& input,
+                                             const TensorInfo& output,
+                                             const MeanDescriptor& desc)
+{
+    const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+    arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
+                                                                          input.GetNumDimensions(),
+                                                                          desc.m_Axis);
+
+    return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
+}
+
+NeonMeanWorkload::NeonMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : BaseWorkload<MeanQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("NeonMeanWorkload", 1, 1);
+
+    arm_compute::ITensor& input  = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(),
+                                                                          info.m_InputTensorInfos[0].GetNumDimensions(),
+                                                                          m_Data.m_Parameters.m_Axis);
+
+    m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output);
+}
+
+void NeonMeanWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMeanWorkload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonMeanWorkload.hpp b/src/backends/neon/workloads/NeonMeanWorkload.hpp
new file mode 100644 (file)
index 0000000..055b52a
--- /dev/null
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEReduceMean.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonMeanWorkloadValidate(const TensorInfo& input,
+                                             const TensorInfo& output,
+                                             const MeanDescriptor& desc);
+
+class NeonMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
+{
+public:
+    NeonMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+    mutable arm_compute::NEReduceMean m_Layer;
+};
+
+} //namespace armnn
index 1f08d03..a5ef0dc 100644 (file)
@@ -17,6 +17,7 @@
 #include "NeonL2NormalizationFloatWorkload.hpp"
 #include "NeonLstmFloatWorkload.hpp"
 #include "NeonMaximumWorkload.hpp"
+#include "NeonMeanWorkload.hpp"
 #include "NeonMergerWorkload.hpp"
 #include "NeonMultiplicationFloatWorkload.hpp"
 #include "NeonNormalizationFloatWorkload.hpp"