2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
5 #include "NeonWorkloadFactory.hpp"
6 #include "NeonBackendId.hpp"
7 #include <armnn/Utils.hpp>
8 #include <backendsCommon/CpuTensorHandle.hpp>
11 #ifdef ARMCOMPUTENEON_ENABLED
12 #include <arm_compute/runtime/Allocator.h>
14 #include <backendsCommon/MemCopyWorkload.hpp>
15 #include "NeonTensorHandle.hpp"
16 #include "workloads/NeonWorkloadUtils.hpp"
17 #include "workloads/NeonWorkloads.hpp"
19 #include <aclCommon/memory/IPoolManager.hpp>
22 #include <backendsCommon/MakeWorkloadHelper.hpp>
24 #include <boost/polymorphic_cast.hpp>
31 static const BackendId s_Id{NeonBackendId()};
34 bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer,
35 Optional<DataType> dataType,
36 std::string& outReasonIfUnsupported)
38 return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
41 const BackendId& NeonWorkloadFactory::GetBackendId() const
46 #ifdef ARMCOMPUTENEON_ENABLED
48 NeonWorkloadFactory::NeonWorkloadFactory()
49 : m_MemoryManager(std::make_unique<arm_compute::Allocator>(), BaseMemoryManager::MemoryAffinity::Offset)
53 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
54 TensorShape const& subTensorShape,
55 unsigned int const* subTensorOrigin) const
57 BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon);
59 const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
61 arm_compute::Coordinates coords;
62 coords.set_num_dimensions(subTensorShape.GetNumDimensions());
63 for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
65 // Arm compute indexes tensor coords in reverse order.
66 unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
67 coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex]));
70 return std::make_unique<NeonSubTensorHandle>(
71 boost::polymorphic_downcast<INeonTensorHandle*>(&parent), shape, coords);
74 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
76 auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
77 tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
82 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
83 DataLayout dataLayout) const
85 auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
86 tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
91 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
92 const WorkloadInfo& info) const
94 return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
97 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
98 const WorkloadInfo& info) const
100 return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
103 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
104 const WorkloadInfo& info) const
106 return std::make_unique<NeonActivationWorkload>(descriptor, info);
109 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
110 const WorkloadInfo& info) const
112 return MakeWorkloadHelper<NeonSoftmaxFloatWorkload, NeonSoftmaxUint8Workload>(descriptor, info,
113 m_MemoryManager.GetIntraLayerManager());
116 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
117 const WorkloadInfo& info) const
119 return std::make_unique<NeonSplitterWorkload>(descriptor, info);
122 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
123 const WorkloadInfo& info) const
125 return std::make_unique<NeonMergerWorkload>(descriptor, info);
128 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
129 const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
131 return MakeWorkloadHelper<NeonFullyConnectedWorkload, NeonFullyConnectedWorkload>(descriptor, info,
132 m_MemoryManager.GetIntraLayerManager());
135 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
136 const WorkloadInfo& info) const
138 return std::make_unique<NeonPermuteWorkload>(descriptor, info);
141 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
142 const WorkloadInfo& info) const
144 return std::make_unique<NeonPooling2dWorkload>(descriptor, info);
147 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
148 const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
150 return std::make_unique<NeonConvolution2dWorkload>(descriptor, info,
151 m_MemoryManager.GetIntraLayerManager());
154 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
155 const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
157 return std::make_unique<NeonDepthwiseConvolutionWorkload>(descriptor, info);
160 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
161 const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
163 return MakeWorkloadHelper<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
164 m_MemoryManager.GetIntraLayerManager());
167 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
168 const WorkloadInfo& info) const
170 return MakeWorkloadHelper<NeonAdditionFloatWorkload, NullWorkload>(descriptor, info);
173 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
174 const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
176 return MakeWorkloadHelper<NeonMultiplicationFloatWorkload, NullWorkload>(descriptor, info);
179 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
180 const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
182 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
185 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction(
186 const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const
188 return MakeWorkloadHelper<NeonSubtractionFloatWorkload, NullWorkload>(descriptor, info);
191 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
192 const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
194 return MakeWorkloadHelper<NeonBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info);
197 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
198 const WorkloadInfo& info) const
200 if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
202 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
205 return MakeWorkloadHelper<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
208 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear(
209 const ResizeBilinearQueueDescriptor& descriptor,
210 const WorkloadInfo& info) const
215 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
216 const FakeQuantizationQueueDescriptor& descriptor,
217 const WorkloadInfo& info) const
222 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
223 const WorkloadInfo& info) const
225 return MakeWorkloadHelper<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
226 m_MemoryManager.GetIntraLayerManager());
229 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
230 const WorkloadInfo& info) const
232 return std::make_unique<NeonConstantWorkload>(descriptor, info);
235 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
236 const WorkloadInfo& info) const
238 return std::make_unique<NeonReshapeWorkload>(descriptor, info);
241 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
242 const WorkloadInfo& info) const
247 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
248 const WorkloadInfo& info) const
250 return MakeWorkloadHelper<NeonFloorFloatWorkload, NullWorkload>(descriptor, info);
253 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
254 const WorkloadInfo& info) const
256 return MakeWorkloadHelper<NeonLstmFloatWorkload, NullWorkload>(descriptor, info);
259 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32(
260 const ConvertFp16ToFp32QueueDescriptor& descriptor,
261 const WorkloadInfo& info) const
263 return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info);
266 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
267 const ConvertFp32ToFp16QueueDescriptor& descriptor,
268 const WorkloadInfo& info) const
270 return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info);
273 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
274 const WorkloadInfo& info) const
276 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
279 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
280 const WorkloadInfo& info) const
282 return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
285 void NeonWorkloadFactory::Finalize()
287 m_MemoryManager.Finalize();
290 void NeonWorkloadFactory::Release()
292 m_MemoryManager.Release();
295 void NeonWorkloadFactory::Acquire()
297 m_MemoryManager.Acquire();
300 #else // Compiled without ArmCompute libs
302 NeonWorkloadFactory::NeonWorkloadFactory()
306 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
307 TensorShape const& subTensorShape,
308 unsigned int const* subTensorOrigin) const
313 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
318 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
319 DataLayout dataLayout) const
324 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
325 const WorkloadInfo& info) const
330 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
331 const WorkloadInfo& info) const
336 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
337 const WorkloadInfo& info) const
342 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
343 const WorkloadInfo& info) const
348 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
349 const WorkloadInfo& info) const
354 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
355 const WorkloadInfo& info) const
360 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
361 const WorkloadInfo& info) const
366 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
367 const WorkloadInfo& info) const
372 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
373 const WorkloadInfo& info) const
378 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
379 const WorkloadInfo& info) const
384 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
385 const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
390 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
391 const WorkloadInfo& info) const
396 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
397 const WorkloadInfo& info) const
402 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data,
403 const WorkloadInfo& info) const
408 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data,
409 const WorkloadInfo& info) const
414 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
415 const WorkloadInfo& info) const
420 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
421 const WorkloadInfo& info) const
426 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
427 const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
432 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
433 const WorkloadInfo& info) const
438 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
439 const WorkloadInfo& info) const
444 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
445 const WorkloadInfo& info) const
450 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
451 const WorkloadInfo& info) const
456 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
457 const WorkloadInfo& info) const
462 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
463 const WorkloadInfo& info) const
468 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32(
469 const ConvertFp16ToFp32QueueDescriptor& descriptor,
470 const WorkloadInfo& info) const
475 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
476 const ConvertFp32ToFp16QueueDescriptor& descriptor,
477 const WorkloadInfo& info) const
482 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& data,
483 const WorkloadInfo& info) const
488 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& data,
489 const WorkloadInfo& info) const
494 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
495 const WorkloadInfo& info) const
500 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
501 const WorkloadInfo& info) const
506 void NeonWorkloadFactory::Finalize()
509 void NeonWorkloadFactory::Release()
512 void NeonWorkloadFactory::Acquire()