IVGCVSW-1951 Remove type templating from NeonActivationWorkload
[platform/upstream/armnn.git] / src / backends / neon / NeonWorkloadFactory.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "NeonWorkloadFactory.hpp"
6 #include <armnn/Utils.hpp>
7 #include <backends/CpuTensorHandle.hpp>
8 #include <Layer.hpp>
9
10 #ifdef ARMCOMPUTENEON_ENABLED
11 #include <arm_compute/runtime/Allocator.h>
12
13 #include <backends/MemCopyWorkload.hpp>
14 #include "NeonTensorHandle.hpp"
15 #include "workloads/NeonWorkloadUtils.hpp"
16 #include "workloads/NeonWorkloads.hpp"
17
18 #include <memory/IPoolManager.hpp>
19 #endif
20
21 #include <backends/MakeWorkloadHelper.hpp>
22
23 #include <boost/polymorphic_cast.hpp>
24
25 namespace armnn
26 {
27
28 bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType,
29                                            std::string& outReasonIfUnsupported)
30 {
31     return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported);
32 }
33
34 #ifdef ARMCOMPUTENEON_ENABLED
35
36 NeonWorkloadFactory::NeonWorkloadFactory()
37     : m_MemoryManager(std::make_unique<arm_compute::Allocator>(), BaseMemoryManager::MemoryAffinity::Offset)
38 {
39 }
40
41 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
42     TensorShape const& subTensorShape,
43     unsigned int const* subTensorOrigin) const
44 {
45     BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon);
46
47     const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
48
49     arm_compute::Coordinates coords;
50     coords.set_num_dimensions(subTensorShape.GetNumDimensions());
51     for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++)
52     {
53         // Arm compute indexes tensor coords in reverse order.
54         unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1;
55         coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex]));
56     }
57
58     return std::make_unique<NeonSubTensorHandle>(
59         boost::polymorphic_downcast<INeonTensorHandle*>(&parent), shape, coords);
60 }
61
62 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
63 {
64     auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo);
65     tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
66
67     return tensorHandle;
68 }
69
70 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
71                                                                        DataLayout dataLayout) const
72 {
73     auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout);
74     tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup());
75
76     return tensorHandle;
77 }
78
79 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
80                                                             const WorkloadInfo&        info) const
81 {
82     return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
83 }
84
85 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
86                                                              const WorkloadInfo&        info) const
87 {
88     return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
89 }
90
91 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
92                                                                  const WorkloadInfo&              info) const
93 {
94     return std::make_unique<NeonActivationWorkload>(descriptor, info);
95 }
96
97 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
98                                                               const WorkloadInfo&           info) const
99 {
100     return MakeWorkload<NeonSoftmaxFloatWorkload, NeonSoftmaxUint8Workload>(descriptor, info,
101                                                                               m_MemoryManager.GetIntraLayerManager());
102 }
103
104 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
105                                                                const WorkloadInfo&            info) const
106 {
107     return MakeWorkload<NeonSplitterFloatWorkload, NeonSplitterUint8Workload>(descriptor, info);
108 }
109
110 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
111                                                                     const WorkloadInfo&          info) const
112 {
113     return MakeWorkload<NeonMergerFloatWorkload, NeonMergerUint8Workload>(descriptor, info);
114 }
115
116 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected(
117     const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const
118 {
119     return MakeWorkload<NeonFullyConnectedWorkload, NeonFullyConnectedWorkload>(descriptor, info,
120                                                                                 m_MemoryManager.GetIntraLayerManager());
121 }
122
123 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
124                                                                      const WorkloadInfo&           info) const
125 {
126     return MakeWorkload<NeonPermuteFloatWorkload, NeonPermuteUint8Workload>(descriptor, info);
127 }
128
129 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
130                                                                        const WorkloadInfo&           info) const
131 {
132     return MakeWorkload<NeonPooling2dFloatWorkload, NeonPooling2dUint8Workload>(descriptor, info);
133 }
134
135 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
136     const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
137 {
138     return MakeWorkload<NeonConvolution2dFloatWorkload, NeonConvolution2dUint8Workload>(descriptor, info,
139                                                                               m_MemoryManager.GetIntraLayerManager());
140 }
141
142 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
143     const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
144 {
145     return MakeWorkload<NeonDepthwiseConvolutionFloatWorkload, NeonDepthwiseConvolutionUint8Workload>(
146         descriptor, info);
147 }
148
149 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization(
150     const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
151 {
152     return MakeWorkload<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info,
153                                                                         m_MemoryManager.GetIntraLayerManager());
154 }
155
156 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
157                                                                       const WorkloadInfo&            info) const
158 {
159     return MakeWorkload<NeonAdditionFloatWorkload, NullWorkload>(descriptor, info);
160 }
161
162 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication(
163     const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const
164 {
165     return MakeWorkload<NeonMultiplicationFloatWorkload, NullWorkload>(descriptor, info);
166 }
167
168 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision(
169     const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const
170 {
171     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
172 }
173
174 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction(
175     const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const
176 {
177     return MakeWorkload<NeonSubtractionFloatWorkload, NullWorkload>(descriptor, info);
178 }
179
180 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization(
181     const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
182 {
183     return MakeWorkload<NeonBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info);
184 }
185
186 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
187                                                                      const WorkloadInfo&        info) const
188 {
189     if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0])
190     {
191         throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload");
192     }
193
194     return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info);
195 }
196
197 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear(
198     const ResizeBilinearQueueDescriptor& descriptor,
199     const WorkloadInfo& info) const
200 {
201     return nullptr;
202 }
203
204 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
205     const FakeQuantizationQueueDescriptor& descriptor,
206     const WorkloadInfo& info) const
207 {
208     return nullptr;
209 }
210
211 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
212     const WorkloadInfo& info) const
213 {
214     return MakeWorkload<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info,
215                                                                           m_MemoryManager.GetIntraLayerManager());
216 }
217
218 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
219     const WorkloadInfo& info) const
220 {
221     return MakeWorkload<NeonConstantFloatWorkload, NeonConstantUint8Workload>(descriptor, info);
222 }
223
224 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
225     const WorkloadInfo& info) const
226 {
227     return MakeWorkload<NeonReshapeFloatWorkload, NeonReshapeUint8Workload>(descriptor, info);
228 }
229
230 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
231     const WorkloadInfo& info) const
232 {
233     return MakeWorkload<NeonFloorFloatWorkload, NullWorkload>(descriptor, info);
234 }
235
236 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
237     const WorkloadInfo& info) const
238 {
239     return MakeWorkload<NeonLstmFloatWorkload, NullWorkload>(descriptor, info);
240 }
241
242 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32(
243     const ConvertFp16ToFp32QueueDescriptor& descriptor,
244     const WorkloadInfo& info) const
245 {
246     return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info);
247 }
248
249 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
250     const ConvertFp32ToFp16QueueDescriptor& descriptor,
251     const WorkloadInfo& info) const
252 {
253     return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info);
254 }
255
256 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
257                                                            const WorkloadInfo& info) const
258 {
259     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
260 }
261
262 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
263                                                           const WorkloadInfo& info) const
264 {
265     return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
266 }
267
268 void NeonWorkloadFactory::Finalize()
269 {
270     m_MemoryManager.Finalize();
271 }
272
273 void NeonWorkloadFactory::Release()
274 {
275     m_MemoryManager.Release();
276 }
277
278 void NeonWorkloadFactory::Acquire()
279 {
280     m_MemoryManager.Acquire();
281 }
282
283 #else // Compiled without ArmCompute libs
284
285 NeonWorkloadFactory::NeonWorkloadFactory()
286 {
287 }
288
289 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent,
290     TensorShape const& subTensorShape,
291     unsigned int const* subTensorOrigin) const
292 {
293     return nullptr;
294 }
295
296 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const
297 {
298     return nullptr;
299 }
300
301 std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
302                                                                        DataLayout dataLayout) const
303 {
304     return nullptr;
305 }
306
307 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor,
308                                                             const WorkloadInfo&        info) const
309 {
310     return nullptr;
311 }
312
313 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
314                                                              const WorkloadInfo&        info) const
315 {
316     return nullptr;
317 }
318
319 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
320                                                                  const WorkloadInfo&              info) const
321 {
322     return nullptr;
323 }
324
325 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
326                                                               const WorkloadInfo&           info) const
327 {
328     return nullptr;
329 }
330
331 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
332                                                                const WorkloadInfo&            info) const
333 {
334     return nullptr;
335 }
336
337 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor,
338                                                              const WorkloadInfo&          info) const
339 {
340     return nullptr;
341 }
342
343 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
344                                                                      const WorkloadInfo&                  info) const
345 {
346     return nullptr;
347 }
348
349 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
350                                                                      const WorkloadInfo&           info) const
351 {
352     return nullptr;
353 }
354
355 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
356                                                                 const WorkloadInfo&           info) const
357 {
358     return nullptr;
359 }
360
361 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
362                                                                     const WorkloadInfo&               info) const
363 {
364     return nullptr;
365 }
366
367 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d(
368     const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
369 {
370     return nullptr;
371 }
372
373 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
374                                                                     const WorkloadInfo&                 info) const
375 {
376     return nullptr;
377 }
378
379 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
380                                                                const WorkloadInfo&            info) const
381 {
382     return nullptr;
383 }
384
385 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data,
386                                                                          const WorkloadInfo& info) const
387 {
388     return nullptr;
389 }
390
391 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data,
392                                                                      const WorkloadInfo&                  info) const
393 {
394     return nullptr;
395 }
396
397 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
398                                                               const WorkloadInfo&        info) const
399 {
400     return nullptr;
401 }
402
403 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
404                                                                      const WorkloadInfo& info) const
405 {
406     return nullptr;
407 }
408
409 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization(
410         const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
411 {
412     return nullptr;
413 }
414
415 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
416     const WorkloadInfo& info) const
417 {
418     return nullptr;
419 }
420
421 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
422     const WorkloadInfo& info) const
423 {
424     return nullptr;
425 }
426
427 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
428     const WorkloadInfo&           info) const
429 {
430     return nullptr;
431 }
432
433 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
434     const WorkloadInfo& info) const
435 {
436     return nullptr;
437 }
438
439 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
440     const WorkloadInfo& info) const
441 {
442     return nullptr;
443 }
444
445 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32(
446     const ConvertFp16ToFp32QueueDescriptor& descriptor,
447     const WorkloadInfo& info) const
448 {
449     return nullptr;
450 }
451
452 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16(
453     const ConvertFp32ToFp16QueueDescriptor& descriptor,
454     const WorkloadInfo& info) const
455 {
456     return nullptr;
457 }
458
459 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& data,
460                                                                const WorkloadInfo& info) const
461 {
462     return nullptr;
463 }
464
465 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& data,
466                                                                   const WorkloadInfo& info) const
467 {
468     return nullptr;
469 }
470
471 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
472                                                            const WorkloadInfo& info) const
473 {
474     return nullptr;
475 }
476
477 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
478                                                           const WorkloadInfo& info) const
479 {
480     return nullptr;
481 }
482
483 void NeonWorkloadFactory::Finalize()
484 {}
485
486 void NeonWorkloadFactory::Release()
487 {}
488
489 void NeonWorkloadFactory::Acquire()
490 {}
491
492 #endif
493
494 } //namespace armnn