2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
5 #include "NeonWorkloadFactory.hpp"
6 #include "armnn/Utils.hpp"
7 #include "CpuTensorHandle.hpp"
10 #ifdef ARMCOMPUTENEON_ENABLED
11 #include "arm_compute/runtime/Allocator.h"
12 #include "MemCopyWorkload.hpp"
13 #include "NeonTensorHandle.hpp"
14 #include "NeonWorkloadUtils.hpp"
15 #include "NeonWorkloads.hpp"
18 #include "MakeWorkloadHelper.hpp"
20 #include <boost/polymorphic_cast.hpp>
25 bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported)
27 return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported);
30 #ifdef ARMCOMPUTENEON_ENABLED
32 NeonWorkloadFactory::NeonWorkloadFactory()
33 : m_MemoryManager(std::make_unique<arm_compute::Allocator>())
37 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
38 TensorShape const& subTensorShape,
39 unsigned int const* subTensorOrigin) const
41 BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon);
43 const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
45 arm_compute::Coordinates coords;
46 coords.set_num_dimensions(subTensorShape.GetNumDimensions());
47 for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
49 // arm compute indexes tensor coords in reverse order
50 unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
51 coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex]));
54 return std::make_unique<NeonSubTensorHandle>(boost::polymorphic_downcast<INeonTensorHandle*>(&parent)->GetTensor(),
58 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
60 return std::make_unique<NeonTensorHandle>(tensorInfo);
63 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
64 const WorkloadInfo& info) const
66 return MakeWorkload<CopyFromCpuToNeonFloat32Workload, CopyFromCpuToNeonUint8Workload>(descriptor, info);
69 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
70 const WorkloadInfo& info) const
72 return MakeWorkload<CopyFromNeonToCpuFloat32Workload, CopyFromNeonToCpuUint8Workload>(descriptor, info);
75 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
76 const WorkloadInfo& info) const
78 return MakeWorkload<NeonActivationFloat32Workload, NeonActivationUint8Workload>(descriptor, info);
81 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
82 const WorkloadInfo& info) const
84 return MakeWorkload<NeonSoftmaxFloat32Workload, NeonSoftmaxUint8Workload>(descriptor, info,
85 m_MemoryManager.Get());
88 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
89 const WorkloadInfo& info) const
91 return MakeWorkload<NeonSplitterFloat32Workload, NeonSplitterUint8Workload>(descriptor, info);
94 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
95 const WorkloadInfo& info) const
97 return MakeWorkload<NeonMergerFloat32Workload, NeonMergerUint8Workload>(descriptor, info);
100 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
101 const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
103 return MakeWorkload<NeonFullyConnectedFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
106 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
107 const WorkloadInfo& info) const
109 return MakeWorkload<NeonPermuteFloat32Workload, NeonPermuteUint8Workload>(descriptor, info);
112 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
113 const WorkloadInfo& info) const
115 return MakeWorkload<NeonPooling2dFloat32Workload, NeonPooling2dUint8Workload>(descriptor, info);
118 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
119 const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
121 return MakeWorkload<NeonConvolution2dFloat32Workload, NeonConvolution2dUint8Workload>(descriptor, info,
122 m_MemoryManager.Get());
125 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
126 const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
128 return MakeWorkload<NeonDepthwiseConvolutionFloat32Workload, NeonDepthwiseConvolutionUint8Workload>(
132 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
133 const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
135 return MakeWorkload<NeonNormalizationFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
138 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
139 const WorkloadInfo& info) const
141 return MakeWorkload<NeonAdditionFloat32Workload, NullWorkload>(descriptor, info);
144 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
145 const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
147 return MakeWorkload<NeonMultiplicationFloat32Workload, NullWorkload>(descriptor, info);
150 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
151 const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
153 return MakeWorkload<NeonBatchNormalizationFloat32Workload, NullWorkload>(descriptor, info);
156 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
157 const WorkloadInfo& info) const
159 if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
161 throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
164 // Create a workload that will copy tensor data from the inputs, which can have a number of different formats,
166 switch (descriptor.m_Inputs[0]->GetType())
168 case ITensorHandle::Cpu:
169 return MakeWorkload<CopyFromCpuToNeonFloat32Workload, CopyFromCpuToNeonUint8Workload>(descriptor, info);
170 #if ARMCOMPUTECL_ENABLED
171 case ITensorHandle::CL:
173 return MakeWorkload<CopyFromClToNeonFloat32Workload, CopyFromClToNeonUint8Workload>(descriptor, info);
177 throw InvalidArgumentException("NeonWorkloadFactory: Destination type not supported for MemCopy Workload.");
181 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear(
182 const ResizeBilinearQueueDescriptor& descriptor,
183 const WorkloadInfo& info) const
188 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
189 const FakeQuantizationQueueDescriptor& descriptor,
190 const WorkloadInfo& info) const
195 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
196 const WorkloadInfo& info) const
198 return MakeWorkload<NeonL2NormalizationFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
201 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
202 const WorkloadInfo& info) const
204 return MakeWorkload<NeonConstantFloat32Workload, NeonConstantUint8Workload>(descriptor, info);
207 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
208 const WorkloadInfo& info) const
210 return MakeWorkload<NeonReshapeFloat32Workload, NeonReshapeUint8Workload>(descriptor, info);
213 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
214 const WorkloadInfo& info) const
216 return MakeWorkload<NeonFloorFloat32Workload, NullWorkload>(descriptor, info);
219 void NeonWorkloadFactory::Finalize()
221 m_MemoryManager.Finalize();
224 #else // Compiled without ArmCompute libs
226 NeonWorkloadFactory::NeonWorkloadFactory()
230 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
231 TensorShape const& subTensorShape,
232 unsigned int const* subTensorOrigin) const
237 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
242 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
243 const WorkloadInfo& info) const
248 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
249 const WorkloadInfo& info) const
254 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
255 const WorkloadInfo& info) const
260 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
261 const WorkloadInfo& info) const
266 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
267 const WorkloadInfo& info) const
272 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
273 const WorkloadInfo& info) const
278 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
279 const WorkloadInfo& info) const
284 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
285 const WorkloadInfo& info) const
290 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
291 const WorkloadInfo& info) const
296 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
297 const WorkloadInfo& info) const
302 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
303 const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
308 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
309 const WorkloadInfo& info) const
314 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
315 const WorkloadInfo& info) const
320 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data,
321 const WorkloadInfo& info) const
326 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data,
327 const WorkloadInfo& info) const
332 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
333 const WorkloadInfo& info) const
338 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
339 const WorkloadInfo& info) const
344 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
345 const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
350 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
351 const WorkloadInfo& info) const
356 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
357 const WorkloadInfo& info) const
362 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
363 const WorkloadInfo& info) const
368 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
369 const WorkloadInfo& info) const
374 void NeonWorkloadFactory::Finalize()