Release 18.05.01
[platform/upstream/armnn.git] / src / armnn / backends / NeonWorkloadFactory.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
4 //
5 #include "NeonWorkloadFactory.hpp"
6 #include "armnn/Utils.hpp"
7 #include "CpuTensorHandle.hpp"
8 #include "Layer.hpp"
9
10 #ifdef ARMCOMPUTENEON_ENABLED
11 #include "arm_compute/runtime/Allocator.h"
12 #include "MemCopyWorkload.hpp"
13 #include "NeonTensorHandle.hpp"
14 #include "NeonWorkloadUtils.hpp"
15 #include "NeonWorkloads.hpp"
16 #endif
17
18 #include "MakeWorkloadHelper.hpp"
19
20 #include <boost/polymorphic_cast.hpp>
21
22 namespace armnn
23 {
24
25 bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported)
26 {
27     return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported);
28 }
29
30 #ifdef ARMCOMPUTENEON_ENABLED
31
32 NeonWorkloadFactory::NeonWorkloadFactory()
33 : m_MemoryManager(std::make_unique<arm_compute::Allocator>())
34 {
35 }
36
37 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
38     TensorShape const& subTensorShape,
39     unsigned int const* subTensorOrigin) const
40 {
41     BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon);
42
43     const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
44
45     arm_compute::Coordinates coords;
46     coords.set_num_dimensions(subTensorShape.GetNumDimensions());
47     for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
48     {
49         // arm compute indexes tensor coords in reverse order
50         unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
51         coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex]));
52     }
53
54     return std::make_unique<NeonSubTensorHandle>(boost::polymorphic_downcast<INeonTensorHandle*>(&parent)->GetTensor(),
55         shape, coords);
56 }
57
58 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
59 {
60     return std::make_unique<NeonTensorHandle>(tensorInfo);
61 }
62
63 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
64                                                             const WorkloadInfo&        info) const
65 {
66     return MakeWorkload<CopyFromCpuToNeonFloat32Workload, CopyFromCpuToNeonUint8Workload>(descriptor, info);
67 }
68
69 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
70                                                              const WorkloadInfo&        info) const
71 {
72     return MakeWorkload<CopyFromNeonToCpuFloat32Workload, CopyFromNeonToCpuUint8Workload>(descriptor, info);
73 }
74
75 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
76                                                                  const WorkloadInfo&              info) const
77 {
78     return MakeWorkload<NeonActivationFloat32Workload, NeonActivationUint8Workload>(descriptor, info);
79 }
80
81 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
82                                                               const WorkloadInfo&           info) const
83 {
84     return MakeWorkload<NeonSoftmaxFloat32Workload, NeonSoftmaxUint8Workload>(descriptor, info,
85                                                                               m_MemoryManager.Get());
86 }
87
88 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
89                                                                const WorkloadInfo&            info) const
90 {
91     return MakeWorkload<NeonSplitterFloat32Workload, NeonSplitterUint8Workload>(descriptor, info);
92 }
93
94 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
95                                                                     const WorkloadInfo&          info) const
96 {
97     return MakeWorkload<NeonMergerFloat32Workload, NeonMergerUint8Workload>(descriptor, info);
98 }
99
100 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
101     const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
102 {
103     return MakeWorkload<NeonFullyConnectedFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
104 }
105
106 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
107                                                                      const WorkloadInfo&           info) const
108 {
109     return MakeWorkload<NeonPermuteFloat32Workload, NeonPermuteUint8Workload>(descriptor, info);
110 }
111
112 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
113                                                                        const WorkloadInfo&           info) const
114 {
115     return MakeWorkload<NeonPooling2dFloat32Workload, NeonPooling2dUint8Workload>(descriptor, info);
116 }
117
118 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
119     const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
120 {
121     return MakeWorkload<NeonConvolution2dFloat32Workload, NeonConvolution2dUint8Workload>(descriptor, info,
122                                                                                           m_MemoryManager.Get());
123 }
124
125 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
126     const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
127 {
128     return MakeWorkload<NeonDepthwiseConvolutionFloat32Workload, NeonDepthwiseConvolutionUint8Workload>(
129         descriptor, info);
130 }
131
132 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
133     const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
134 {
135     return MakeWorkload<NeonNormalizationFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
136 }
137
138 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
139                                                                       const WorkloadInfo&            info) const
140 {
141     return MakeWorkload<NeonAdditionFloat32Workload, NullWorkload>(descriptor, info);
142 }
143
144 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
145     const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
146 {
147     return MakeWorkload<NeonMultiplicationFloat32Workload, NullWorkload>(descriptor, info);
148 }
149
150 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
151     const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
152 {
153     return MakeWorkload<NeonBatchNormalizationFloat32Workload, NullWorkload>(descriptor, info);
154 }
155
156 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
157                                                                      const WorkloadInfo&        info) const
158 {
159     if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
160     {
161         throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
162     }
163
164     // Create a workload that will copy tensor data from the inputs, which can have a number of different formats,
165     // to Neon tensors.
166     switch (descriptor.m_Inputs[0]->GetType())
167     {
168     case ITensorHandle::Cpu:
169         return MakeWorkload<CopyFromCpuToNeonFloat32Workload, CopyFromCpuToNeonUint8Workload>(descriptor, info);
170 #if ARMCOMPUTECL_ENABLED
171     case ITensorHandle::CL:
172     {
173         return MakeWorkload<CopyFromClToNeonFloat32Workload, CopyFromClToNeonUint8Workload>(descriptor, info);
174     }
175 #endif
176     default:
177         throw InvalidArgumentException("NeonWorkloadFactory: Destination type not supported for MemCopy Workload.");
178     }
179 }
180
181 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear(
182     const ResizeBilinearQueueDescriptor& descriptor,
183     const WorkloadInfo& info) const
184 {
185     return nullptr;
186 }
187
188 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
189     const FakeQuantizationQueueDescriptor& descriptor,
190     const WorkloadInfo& info) const
191 {
192     return nullptr;
193 }
194
195 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
196     const WorkloadInfo& info) const
197 {
198     return MakeWorkload<NeonL2NormalizationFloat32Workload, NullWorkload>(descriptor, info, m_MemoryManager.Get());
199 }
200
201 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
202     const WorkloadInfo& info) const
203 {
204     return MakeWorkload<NeonConstantFloat32Workload, NeonConstantUint8Workload>(descriptor, info);
205 }
206
207 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
208     const WorkloadInfo& info) const
209 {
210     return MakeWorkload<NeonReshapeFloat32Workload, NeonReshapeUint8Workload>(descriptor, info);
211 }
212
213 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
214     const WorkloadInfo& info) const
215 {
216     return MakeWorkload<NeonFloorFloat32Workload, NullWorkload>(descriptor, info);
217 }
218
219 void NeonWorkloadFactory::Finalize()
220 {
221     m_MemoryManager.Finalize();
222 }
223
224 #else // Compiled without ArmCompute libs
225
226 NeonWorkloadFactory::NeonWorkloadFactory()
227 {
228 }
229
230 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
231     TensorShape const& subTensorShape,
232     unsigned int const* subTensorOrigin) const
233 {
234     return nullptr;
235 }
236
237 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
238 {
239     return nullptr;
240 }
241
242 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
243                                                             const WorkloadInfo&        info) const
244 {
245     return nullptr;
246 }
247
248 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
249                                                              const WorkloadInfo&        info) const
250 {
251     return nullptr;
252 }
253
254 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
255                                                                  const WorkloadInfo&              info) const
256 {
257     return nullptr;
258 }
259
260 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
261                                                               const WorkloadInfo&           info) const
262 {
263     return nullptr;
264 }
265
266 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
267                                                                const WorkloadInfo&            info) const
268 {
269     return nullptr;
270 }
271
272 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
273                                                              const WorkloadInfo&          info) const
274 {
275     return nullptr;
276 }
277
278 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
279                                                                      const WorkloadInfo&                  info) const
280 {
281     return nullptr;
282 }
283
284 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
285                                                                      const WorkloadInfo&           info) const
286 {
287     return nullptr;
288 }
289
290 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
291                                                                 const WorkloadInfo&           info) const
292 {
293     return nullptr;
294 }
295
296 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
297                                                                     const WorkloadInfo&               info) const
298 {
299     return nullptr;
300 }
301
302 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
303     const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
304 {
305     return nullptr;
306 }
307
308 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
309                                                                     const WorkloadInfo&                 info) const
310 {
311     return nullptr;
312 }
313
314 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
315                                                                const WorkloadInfo&            info) const
316 {
317     return nullptr;
318 }
319
320 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data,
321                                                                          const WorkloadInfo& info) const
322 {
323     return nullptr;
324 }
325
326 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data,
327                                                                      const WorkloadInfo&                  info) const
328 {
329     return nullptr;
330 }
331
332 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
333                                                               const WorkloadInfo&        info) const
334 {
335     return nullptr;
336 }
337
338 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
339                                                                      const WorkloadInfo& info) const
340 {
341     return nullptr;
342 }
343
344 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
345         const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
346 {
347     return nullptr;
348 }
349
350 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
351     const WorkloadInfo& info) const
352 {
353     return nullptr;
354 }
355
356 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
357     const WorkloadInfo& info) const
358 {
359     return nullptr;
360 }
361
362 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
363     const WorkloadInfo&           info) const
364 {
365     return nullptr;
366 }
367
368 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
369     const WorkloadInfo& info) const
370 {
371     return nullptr;
372 }
373
374 void NeonWorkloadFactory::Finalize()
375 {}
376
377 #endif
378
379 } //namespace armnn