IVGCVSW-3355 Add NeonSpaceToDepthWorkload
authorEllen Norris-Thompson <ellen.norris-thompson@arm.com>
Wed, 26 Jun 2019 15:40:36 +0000 (16:40 +0100)
committerJames Conroy <james.conroy@arm.com>
Tue, 9 Jul 2019 13:29:58 +0000 (13:29 +0000)
 * Add Neon backend support for SpaceToDepth
 * Enabled Neon Unit, CreateWorkload and
   EndToEnd tests for SpaceToDepth.
 * Added QSymm16 support to NeonTensorHandle.

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>
Change-Id: Ibbb0bce5ad77dace2bf4c4c111006d2fadf844e8

13 files changed:
src/backends/neon/NeonLayerSupport.cpp
src/backends/neon/NeonLayerSupport.hpp
src/backends/neon/NeonTensorHandle.hpp
src/backends/neon/NeonWorkloadFactory.cpp
src/backends/neon/NeonWorkloadFactory.hpp
src/backends/neon/backend.mk
src/backends/neon/test/NeonCreateWorkloadTests.cpp
src/backends/neon/test/NeonEndToEndTests.cpp
src/backends/neon/test/NeonLayerTests.cpp
src/backends/neon/workloads/CMakeLists.txt
src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp [new file with mode: 0644]
src/backends/neon/workloads/NeonSpaceToDepthWorkload.hpp [new file with mode: 0644]
src/backends/neon/workloads/NeonWorkloads.hpp

index c05456b..4fee53f 100644 (file)
@@ -40,6 +40,7 @@
 #include "workloads/NeonQuantizeWorkload.hpp"
 #include "workloads/NeonResizeBilinearWorkload.hpp"
 #include "workloads/NeonSoftmaxBaseWorkload.hpp"
+#include "workloads/NeonSpaceToDepthWorkload.hpp"
 #include "workloads/NeonSplitterWorkload.hpp"
 #include "workloads/NeonSubtractionWorkload.hpp"
 #endif
@@ -514,6 +515,18 @@ bool NeonLayerSupport::IsSoftmaxSupported(const TensorInfo& input,
     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
 }
 
+bool NeonLayerSupport::IsSpaceToDepthSupported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              const SpaceToDepthDescriptor& descriptor,
+                                              Optional<std::string&> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSpaceToDepthWorkloadValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   output,
+                                   descriptor);
+}
+
 bool NeonLayerSupport::IsSplitterSupported(const TensorInfo& input,
                                            const ViewsDescriptor& descriptor,
                                            Optional<std::string&> reasonIfUnsupported) const
index 344ce84..315248c 100644 (file)
@@ -177,6 +177,11 @@ public:
                             const SoftmaxDescriptor& descriptor,
                             Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsSpaceToDepthSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const SpaceToDepthDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     ARMNN_DEPRECATED_MSG("Use IsSplitterSupported with outputs instead")
     bool IsSplitterSupported(const TensorInfo& input,
                              const ViewsDescriptor& descriptor,
index 3bbba78..9077f34 100644 (file)
@@ -91,6 +91,11 @@ private:
                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
                                                                  static_cast<uint8_t*>(memory));
                 break;
+            case arm_compute::DataType::S16:
+            case arm_compute::DataType::QSYMM16:
+                armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+                                                                 static_cast<int16_t*>(memory));
+                break;
             default:
             {
                 throw armnn::UnimplementedException();
@@ -112,6 +117,11 @@ private:
                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
                                                                  this->GetTensor());
                 break;
+            case arm_compute::DataType::S16:
+            case arm_compute::DataType::QSYMM16:
+                armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
+                                                                 this->GetTensor());
+                break;
             default:
             {
                 throw armnn::UnimplementedException();
@@ -180,6 +190,11 @@ private:
                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
                                                                  static_cast<uint8_t*>(memory));
                 break;
+            case arm_compute::DataType::S16:
+            case arm_compute::DataType::QSYMM16:
+                armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+                                                                 static_cast<int16_t*>(memory));
+                break;
             default:
             {
                 throw armnn::UnimplementedException();
@@ -201,6 +216,11 @@ private:
                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
                                                                  this->GetTensor());
                 break;
+            case arm_compute::DataType::S16:
+            case arm_compute::DataType::QSYMM16:
+                armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
+                                                                 this->GetTensor());
+                break;
             default:
             {
                 throw armnn::UnimplementedException();
index 1802459..eadd636 100644 (file)
@@ -294,6 +294,12 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToBatchNd(const Space
     return nullptr;
 }
 
+std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToDepth(const armnn::SpaceToDepthQueueDescriptor& descriptor,
+                                                                  const armnn::WorkloadInfo& info) const
+{
+    return std::make_unique<NeonSpaceToDepthWorkload>(descriptor, info);
+}
+
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
     const WorkloadInfo& info) const
 {
index 2341673..4fd9bf2 100644 (file)
@@ -123,6 +123,9 @@ public:
     std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
                                                     const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
                                            const WorkloadInfo& info) const override;
 
index e5fafa3..9b0c188 100644 (file)
@@ -49,6 +49,7 @@ BACKEND_SOURCES := \
         workloads/NeonSoftmaxBaseWorkload.cpp \
         workloads/NeonSoftmaxFloatWorkload.cpp \
         workloads/NeonSoftmaxUint8Workload.cpp \
+        workloads/NeonSpaceToDepthWorkload.cpp \
         workloads/NeonSplitterWorkload.cpp \
         workloads/NeonSubtractionWorkload.cpp
 
index 19b520d..4968d0e 100644 (file)
@@ -522,6 +522,43 @@ BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload)
     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float32>();
 }
 
+template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
+static void NeonSpaceToDepthWorkloadTest()
+{
+    Graph graph;
+    NeonWorkloadFactory factory =
+            NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
+
+    auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
+
+    SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
+    auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+
+    BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
+    BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
+}
+
+BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
+{
+    NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
+}
+
+BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
+{
+    NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
+}
+
+BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
+{
+    NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QuantisedAsymm8>();
+}
+
+BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
+{
+    NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QuantisedSymm16>();
+}
+
 BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
 {
     Graph graph;
index 7a22b45..66cace6 100644 (file)
@@ -8,6 +8,7 @@
 #include <backendsCommon/test/ArithmeticTestImpl.hpp>
 #include <backendsCommon/test/ConcatTestImpl.hpp>
 #include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
+#include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
 
 #include <boost/test/unit_test.hpp>
@@ -135,6 +136,26 @@ BOOST_AUTO_TEST_CASE(DequantizeEndToEndOffsetTest)
     DequantizeEndToEndOffset<armnn::DataType::QuantisedAsymm8>(defaultBackends);
 }
 
+BOOST_AUTO_TEST_CASE(NeonSpaceToDepthNHWCEndToEndTest1)
+{
+    SpaceToDepthNHWCEndToEndTest1(defaultBackends);
+}
+
+BOOST_AUTO_TEST_CASE(NeonSpaceToDepthNCHWEndToEndTest1)
+{
+    SpaceToDepthNCHWEndToEndTest1(defaultBackends);
+}
+
+BOOST_AUTO_TEST_CASE(NeonSpaceToDepthNHWCEndToEndTest2)
+{
+    SpaceToDepthNHWCEndToEndTest2(defaultBackends);
+}
+
+BOOST_AUTO_TEST_CASE(NeonSpaceToDepthNCHWEndToEndTest2)
+{
+    SpaceToDepthNCHWEndToEndTest2(defaultBackends);
+}
+
 BOOST_AUTO_TEST_CASE(NeonSplitter1dEndToEndTest)
 {
     Splitter1dEndToEnd<armnn::DataType::Float32>(defaultBackends);
index 078016a..162aaad 100644 (file)
@@ -302,6 +302,19 @@ ARMNN_AUTO_TEST_CASE(Simple3dSoftmaxBeta1Uint8, Simple3dSoftmaxUint8Test, 1.0f)
 ARMNN_AUTO_TEST_CASE(Simple4dSoftmaxBeta1, Simple4dSoftmaxTest, 1.0f)
 ARMNN_AUTO_TEST_CASE(Simple4dSoftmaxBeta1Uint8, Simple4dSoftmaxUint8Test, 1.0f)
 
+// SpaceToDepth
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWAsymmQ8, SpaceToDepthNCHWAsymmQ8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCAsymmQ8, SpaceToDepthNHWCAsymmQ8Test)
+
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWC1Float32, SpaceToDepthNHWCFloat32Test1)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHW1Float32, SpaceToDepthNCHWFloat32Test1)
+
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWC2Float32, SpaceToDepthNHWCFloat32Test2)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHW2Float32, SpaceToDepthNCHWFloat32Test2)
+
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCQSymm16, SpaceToDepthNHWCQSymm16Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWQSymm16, SpaceToDepthNCHWQSymm16Test)
+
 // Splitter
 ARMNN_AUTO_TEST_CASE(SimpleSplitter, SplitterTest)
 ARMNN_AUTO_TEST_CASE(SimpleSplitterUint8, SplitterUint8Test)
index 8f9b7d5..0ad961a 100644 (file)
@@ -64,6 +64,8 @@ list(APPEND armnnNeonBackendWorkloads_sources
     NeonSoftmaxFloatWorkload.hpp
     NeonSoftmaxUint8Workload.cpp
     NeonSoftmaxUint8Workload.hpp
+    NeonSpaceToDepthWorkload.cpp
+    NeonSpaceToDepthWorkload.hpp
     NeonSplitterWorkload.cpp
     NeonSplitterWorkload.hpp
     NeonSubtractionWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
new file mode 100644 (file)
index 0000000..a4204b2
--- /dev/null
@@ -0,0 +1,58 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonSpaceToDepthWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
+#include <ResolveType.hpp>
+
+namespace armnn
+{
+
+using namespace armcomputetensorutils;
+
+arm_compute::Status NeonSpaceToDepthWorkloadValidate(const TensorInfo& input,
+                                                     const TensorInfo& output,
+                                                     const SpaceToDepthDescriptor& descriptor)
+{
+    DataLayout dataLayout = descriptor.m_DataLayout;
+    const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, dataLayout);
+    const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, dataLayout);
+
+    int32_t blockSize  = boost::numeric_cast<int32_t>(descriptor.m_BlockSize);
+
+    return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize);
+}
+
+NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc,
+                                                   const WorkloadInfo& info)
+    : BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info)
+{
+    m_Data.ValidateInputsOutputs("NeonSpaceToDepthWorkload", 1, 1);
+
+    arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+
+    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    input.info()->set_data_layout(aclDataLayout);
+
+    int32_t blockSize = boost::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+
+    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    output.info()->set_data_layout(aclDataLayout);
+
+    m_Layer.reset(new arm_compute::NESpaceToDepthLayer());
+    m_Layer->configure(&input, &output, blockSize);
+    m_Layer->prepare();
+}
+
+void NeonSpaceToDepthWorkload::Execute() const
+{
+    if (m_Layer)
+    {
+        ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToDepthWorkload_Execute");
+        m_Layer->run();
+    }
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.hpp b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.hpp
new file mode 100644 (file)
index 0000000..f2de26b
--- /dev/null
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/TypesUtils.hpp>
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonSpaceToDepthWorkloadValidate(const TensorInfo& input,
+                                                     const TensorInfo& output,
+                                                     const SpaceToDepthDescriptor& descriptor);
+
+class NeonSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor>
+{
+public:
+    using BaseWorkload<SpaceToDepthQueueDescriptor>::BaseWorkload;
+    NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info);
+    virtual void Execute() const override;
+private:
+    mutable std::unique_ptr<arm_compute::NESpaceToDepthLayer> m_Layer;
+};
+
+} //namespace armnn
\ No newline at end of file
index ebea879..a9604a1 100644 (file)
@@ -33,5 +33,6 @@
 #include "NeonResizeBilinearWorkload.hpp"
 #include "NeonSoftmaxFloatWorkload.hpp"
 #include "NeonSoftmaxUint8Workload.hpp"
+#include "NeonSpaceToDepthWorkload.hpp"
 #include "NeonSplitterWorkload.hpp"
 #include "NeonSubtractionWorkload.hpp"