#include "workloads/NeonSoftmaxBaseWorkload.hpp"
#include "workloads/NeonSpaceToDepthWorkload.hpp"
#include "workloads/NeonSplitterWorkload.hpp"
+#include "workloads/NeonStackWorkload.hpp"
#include "workloads/NeonSubtractionWorkload.hpp"
#endif
return true;
}
+bool NeonLayerSupport::IsStackSupported(const std::vector<const TensorInfo*>& inputs,
+ const TensorInfo& output,
+ const StackDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonStackWorkloadValidate,
+ reasonIfUnsupported,
+ inputs,
+ output,
+ descriptor);
+}
+
bool NeonLayerSupport::IsSubtractionSupported(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
const ViewsDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsStackSupported(const std::vector<const TensorInfo*>& inputs,
+ const TensorInfo& output,
+ const StackDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsSubtractionSupported(const TensorInfo& input0,
const TensorInfo& input1,
const TensorInfo& output,
return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
}
+std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return std::make_unique<NeonStackWorkload>(descriptor, info);
+}
+
} // namespace armnn
std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateStack(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
private:
mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
};
workloads/NeonSoftmaxUint8Workload.cpp \
workloads/NeonSpaceToDepthWorkload.cpp \
workloads/NeonSplitterWorkload.cpp \
+ workloads/NeonStackWorkload.cpp \
workloads/NeonSubtractionWorkload.cpp
else
NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 3, 2, 10 }, 3);
}
+template <armnn::DataType DataType>
+static void NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
+ const std::initializer_list<unsigned int>& outputShape,
+ unsigned int axis,
+ unsigned int numInputs)
+{
+ armnn::Graph graph;
+ NeonWorkloadFactory factory =
+ NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
+
+ auto workload = CreateStackWorkloadTest<NeonStackWorkload, DataType>(factory,
+ graph,
+ TensorShape(inputShape),
+ TensorShape(outputShape),
+ axis,
+ numInputs);
+
+ // Check inputs and output are as expected
+ StackQueueDescriptor queueDescriptor = workload->GetData();
+ for (unsigned int i = 0; i < numInputs; ++i)
+ {
+ auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
+ BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
+ }
+ auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+ BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
+}
+
+BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
+{
+ NeonCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
+}
+
+BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
+{
+ NeonCreateStackWorkloadTest<armnn::DataType::QuantisedAsymm8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
+}
+
BOOST_AUTO_TEST_SUITE_END()
ARMNN_AUTO_TEST_CASE(PreluFloat32, PreluTest<armnn::DataType::Float32>)
ARMNN_AUTO_TEST_CASE(PreluUint8, PreluTest<armnn::DataType::QuantisedAsymm8>)
+// Stack
+ARMNN_AUTO_TEST_CASE(Stack0Axis, Stack0AxisTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack4dOutput1Axis, Stack4dOutput1AxisTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack4dOutput2Axis, Stack4dOutput2AxisTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack4dOutput3Axis, Stack4dOutput3AxisTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(Stack3dOutput1Axis3Input, Stack3dOutput1Axis3InputTest<armnn::DataType::Float32>)
+
// ============================================================================
// COMPARE tests
NeonSpaceToDepthWorkload.hpp
NeonSplitterWorkload.cpp
NeonSplitterWorkload.hpp
+ NeonStackWorkload.cpp
+ NeonStackWorkload.hpp
NeonSubtractionWorkload.cpp
NeonSubtractionWorkload.hpp
NeonWorkloads.hpp
--- /dev/null
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "NeonStackWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
+
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <neon/NeonTensorHandle.hpp>
+
+#include <boost/numeric/conversion/cast.hpp>
+#include <boost/polymorphic_pointer_cast.hpp>
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+namespace
+{
+int CalcAxis(const unsigned int axis, const unsigned int inputDimensions)
+{
+ const int intAxis = boost::numeric_cast<int>(axis);
+ return boost::numeric_cast<int>(inputDimensions) - intAxis;
+}
+} //namespace
+
+arm_compute::Status NeonStackWorkloadValidate(const std::vector<const TensorInfo*>& inputs,
+ const TensorInfo& output,
+ const StackDescriptor& descriptor)
+{
+ std::vector<arm_compute::TensorInfo> aclInputs;
+ for (const TensorInfo* input : inputs)
+ {
+ arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
+ aclInputs.emplace_back(aclInputInfo);
+ }
+
+ std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
+ for (arm_compute::ITensorInfo& input : aclInputs)
+ {
+ aclInputPtrs.emplace_back(&input);
+ }
+
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ int aclAxis = CalcAxis(descriptor.m_Axis, descriptor.m_InputShape.GetNumDimensions());
+ return arm_compute::NEStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
+}
+
+NeonStackWorkload::NeonStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info)
+: BaseWorkload<StackQueueDescriptor>(descriptor, info)
+{
+ std::vector<arm_compute::ITensor*> aclInputs;
+ for (auto input : m_Data.m_Inputs)
+ {
+ arm_compute::ITensor& aclInput = boost::polymorphic_pointer_downcast<IAclTensorHandle>(input)->GetTensor();
+ aclInputs.emplace_back(&aclInput);
+ }
+ arm_compute::ITensor& output = boost::polymorphic_pointer_downcast<IAclTensorHandle>(
+ m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.reset(new arm_compute::NEStackLayer());
+ int aclAxis = CalcAxis(descriptor.m_Parameters.m_Axis, descriptor.m_Parameters.m_InputShape.GetNumDimensions());
+ m_Layer->configure(aclInputs, aclAxis, &output);
+}
+
+void NeonStackWorkload::Execute() const
+{
+ if (m_Layer)
+ {
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonStackWorkload_Execute");
+ m_Layer->run();
+ }
+}
+
+} //namespace armnn
--- /dev/null
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEStackLayer.h>
+
+namespace armnn
+{
+arm_compute::Status NeonStackWorkloadValidate(const std::vector<const TensorInfo*>& inputs,
+ const TensorInfo& output,
+ const StackDescriptor& descriptor);
+
+class NeonStackWorkload : public BaseWorkload<StackQueueDescriptor>
+{
+public:
+ NeonStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable std::unique_ptr<arm_compute::NEStackLayer> m_Layer;
+};
+
+} //namespace armnn
#include "NeonSoftmaxUint8Workload.hpp"
#include "NeonSpaceToDepthWorkload.hpp"
#include "NeonSplitterWorkload.hpp"
+#include "NeonStackWorkload.hpp"
#include "NeonSubtractionWorkload.hpp"