2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
5 #include "NeonWorkloadFactory.hpp"
6 #include "armnn/Utils.hpp"
7 #include "CpuTensorHandle.hpp"
11 #ifdef ARMCOMPUTENEON_ENABLED
12 #include "MemCopyWorkload.hpp"
13 #include "NeonTensorHandle.hpp"
14 #include "NeonWorkloadUtils.hpp"
15 #include "NeonWorkloads.hpp"
18 #include "MakeWorkloadHelper.hpp"
20 #include <boost/polymorphic_cast.hpp>
25 bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported)
27 return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported);
30 #ifdef ARMCOMPUTENEON_ENABLED
32 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
33 TensorShape const& subTensorShape,
34 unsigned int const* subTensorOrigin) const
36 BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon);
38 const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
40 arm_compute::Coordinates coords;
41 coords.set_num_dimensions(subTensorShape.GetNumDimensions());
42 for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
44 // arm compute indexes tensor coords in reverse order
45 unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
46 coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex]));
49 return std::make_unique<NeonSubTensorHandle>(boost::polymorphic_downcast<INeonTensorHandle*>(&parent)->GetTensor(),
53 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
55 return std::make_unique<NeonTensorHandle>(tensorInfo);
58 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
59 const WorkloadInfo& info) const
61 return MakeWorkload<CopyFromCpuToNeonFloat32Workload, CopyFromCpuToNeonUint8Workload>(descriptor, info);
64 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
65 const WorkloadInfo& info) const
67 return MakeWorkload<CopyFromNeonToCpuFloat32Workload, CopyFromNeonToCpuUint8Workload>(descriptor, info);
70 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
71 const WorkloadInfo& info) const
73 return MakeWorkload<NeonActivationFloat32Workload, NeonActivationUint8Workload>(descriptor, info);
76 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
77 const WorkloadInfo& info) const
79 return MakeWorkload<NeonSoftmaxFloat32Workload, NeonSoftmaxUint8Workload>(descriptor, info);
82 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
83 const WorkloadInfo& info) const
85 return MakeWorkload<NeonSplitterFloat32Workload, NeonSplitterUint8Workload>(descriptor, info);
88 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
89 const WorkloadInfo& info) const
91 return MakeWorkload<NeonMergerFloat32Workload, NeonMergerUint8Workload>(descriptor, info);
94 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
95 const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
97 return MakeWorkload<NeonFullyConnectedFloat32Workload, NullWorkload>(descriptor, info);
100 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
101 const WorkloadInfo& info) const
103 return MakeWorkload<NeonPermuteFloat32Workload, NeonPermuteUint8Workload>(descriptor, info);
106 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
107 const WorkloadInfo& info) const
109 return MakeWorkload<NeonPooling2dFloat32Workload, NeonPooling2dUint8Workload>(descriptor, info);
112 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
113 const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
115 return MakeWorkload<NeonConvolution2dFloat32Workload, NeonConvolution2dUint8Workload>(descriptor, info);
118 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
119 const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
121 return MakeWorkload<NeonDepthwiseConvolutionFloat32Workload, NeonDepthwiseConvolutionUint8Workload>(
125 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
126 const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
128 return MakeWorkload<NeonNormalizationFloat32Workload, NullWorkload>(descriptor, info);
131 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
132 const WorkloadInfo& info) const
134 return MakeWorkload<NeonAdditionFloat32Workload, NullWorkload>(descriptor, info);
137 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
138 const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
140 return MakeWorkload<NeonMultiplicationFloat32Workload, NullWorkload>(descriptor, info);
143 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
144 const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
146 return MakeWorkload<NeonBatchNormalizationFloat32Workload, NullWorkload>(descriptor, info);
149 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
150 const WorkloadInfo& info) const
152 if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
154 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
157 // Create a workload that will copy tensor data from the inputs, which can have a number of different formats,
159 switch (descriptor.m_Inputs[0]->GetType())
161 case ITensorHandle::Cpu:
162 return MakeWorkload<CopyFromCpuToNeonFloat32Workload, CopyFromCpuToNeonUint8Workload>(descriptor, info);
163 #if ARMCOMPUTECL_ENABLED
164 case ITensorHandle::CL:
166 return MakeWorkload<CopyFromClToNeonFloat32Workload, CopyFromClToNeonUint8Workload>(descriptor, info);
170 throw InvalidArgumentException("NeonWorkloadFactory: Destination type not supported for MemCopy Workload.");
174 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear(
175 const ResizeBilinearQueueDescriptor& descriptor,
176 const WorkloadInfo& info) const
181 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
182 const FakeQuantizationQueueDescriptor& descriptor,
183 const WorkloadInfo& info) const
188 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
189 const WorkloadInfo& info) const
191 return MakeWorkload<NeonL2NormalizationFloat32Workload, NullWorkload>(descriptor, info);
194 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
195 const WorkloadInfo& info) const
197 return MakeWorkload<NeonConstantFloat32Workload, NeonConstantUint8Workload>(descriptor, info);
200 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
201 const WorkloadInfo& info) const
203 return MakeWorkload<NeonReshapeFloat32Workload, NeonReshapeUint8Workload>(descriptor, info);
206 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
207 const WorkloadInfo& info) const
209 return MakeWorkload<NeonFloorFloat32Workload, NullWorkload>(descriptor, info);
212 #else // Compiled without ArmCompute libs
214 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
215 TensorShape const& subTensorShape,
216 unsigned int const* subTensorOrigin) const
221 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
226 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
227 const WorkloadInfo& info) const
232 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
233 const WorkloadInfo& info) const
238 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
239 const WorkloadInfo& info) const
244 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
245 const WorkloadInfo& info) const
250 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
251 const WorkloadInfo& info) const
256 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
257 const WorkloadInfo& info) const
262 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
263 const WorkloadInfo& info) const
268 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
269 const WorkloadInfo& info) const
274 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
275 const WorkloadInfo& info) const
280 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
281 const WorkloadInfo& info) const
286 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
287 const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
292 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
293 const WorkloadInfo& info) const
298 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
299 const WorkloadInfo& info) const
304 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data,
305 const WorkloadInfo& info) const
310 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data,
311 const WorkloadInfo& info) const
316 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
317 const WorkloadInfo& info) const
322 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
323 const WorkloadInfo& info) const
328 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
329 const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
334 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
335 const WorkloadInfo& info) const
340 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
341 const WorkloadInfo& info) const
346 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
347 const WorkloadInfo& info) const
352 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
353 const WorkloadInfo& info) const