IVGCVSW-1946: Remove armnn/src from the include paths
[platform/upstream/armnn.git] / src / backends / neon / NeonLayerSupport.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NeonLayerSupport.hpp"
7 #include "NeonBackendId.hpp"
8
9 #include <armnn/Descriptors.hpp>
10 #include <InternalTypes.hpp>
11 #include <LayerSupportCommon.hpp>
12 #include <armnn/Tensor.hpp>
13 #include <armnn/Types.hpp>
14
15 #include <backendsCommon/LayerSupportRegistry.hpp>
16
17 #include <boost/core/ignore_unused.hpp>
18
19 #ifdef ARMCOMPUTENEON_ENABLED
20 #include "workloads/NeonAdditionFloatWorkload.hpp"
21 #include "workloads/NeonActivationWorkload.hpp"
22 #include "workloads/NeonBatchNormalizationFloatWorkload.hpp"
23 #include "workloads/NeonConvolution2dWorkload.hpp"
24 #include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
25 #include "workloads/NeonL2NormalizationFloatWorkload.hpp"
26 #include "workloads/NeonMultiplicationFloatWorkload.hpp"
27 #include "workloads/NeonNormalizationFloatWorkload.hpp"
28 #include "workloads/NeonFullyConnectedWorkload.hpp"
29 #include "workloads/NeonPermuteWorkload.hpp"
30 #include "workloads/NeonPooling2dWorkload.hpp"
31 #include "workloads/NeonSoftmaxBaseWorkload.hpp"
32 #include "workloads/NeonSubtractionFloatWorkload.hpp"
33 #endif
34
35 using namespace boost;
36
37 namespace armnn
38 {
39
40 namespace
41 {
42
43 ILayerSupportSharedPtr GetLayerSupportPointer()
44 {
45     static ILayerSupportSharedPtr instance{new NeonLayerSupport};
46     return instance;
47 }
48
49 static StaticRegistryInitializer<LayerSupportRegistry> g_RegisterHelper{
50     LayerSupportRegistryInstance(),
51     NeonBackendId(),
52     [](const EmptyInitializer&)
53     {
54         return GetLayerSupportPointer();
55     }
56 };
57
58 bool IsNeonBackendSupported(Optional<std::string&> reasonIfUnsupported)
59 {
60 #if ARMCOMPUTENEON_ENABLED
61     return true;
62 #else
63     if (reasonIfUnsupported)
64     {
65         reasonIfUnsupported.value() = "The armnn library has been built without NEON support";
66     }
67     return false;
68 #endif
69 }
70
71 template<typename FloatFunc, typename Uint8Func, typename ... Params>
72 bool IsSupportedForDataTypeNeon(Optional<std::string&> reasonIfUnsupported,
73                                 DataType dataType,
74                                 FloatFunc floatFuncPtr,
75                                 Uint8Func uint8FuncPtr,
76                                 Params&&... params)
77 {
78     return IsNeonBackendSupported(reasonIfUnsupported) &&
79         IsSupportedForDataTypeGeneric(reasonIfUnsupported,
80                                          dataType,
81                                          floatFuncPtr,
82                                          floatFuncPtr,
83                                          uint8FuncPtr,
84                                          std::forward<Params>(params)...);
85 }
86
87 #if ARMCOMPUTENEON_ENABLED
88 template<class FuncType, class... Args>
89 inline bool IsWorkloadSupported(FuncType& func, Optional<std::string&> reasonIfUnsupported, Args&&... args)
90 {
91     arm_compute::Status aclStatus = func(std::forward<Args>(args)...);
92     const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
93     if (!supported && reasonIfUnsupported)
94     {
95         reasonIfUnsupported.value() = aclStatus.error_description();
96     }
97     return supported;
98 }
99
100 #define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \
101     return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__);
102 #else
103 #define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \
104     return IsNeonBackendSupported(reasonIfUnsupported);
105 #endif
106
107 } // anonymous namespace
108
109 bool NeonLayerSupport::IsActivationSupported(const TensorInfo& input,
110                                              const TensorInfo& output,
111                                              const ActivationDescriptor& descriptor,
112                                              Optional<std::string&> reasonIfUnsupported) const
113 {
114     ignore_unused(descriptor);
115     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate,
116                                    reasonIfUnsupported,
117                                    input,
118                                    output,
119                                    descriptor);
120 }
121
122 bool NeonLayerSupport::IsAdditionSupported(const TensorInfo& input0,
123                                            const TensorInfo& input1,
124                                            const TensorInfo& output,
125                                            Optional<std::string&> reasonIfUnsupported) const
126 {
127     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate,
128                                    reasonIfUnsupported,
129                                    input0,
130                                    input1,
131                                    output);
132 }
133
134 bool NeonLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input,
135                                                      const TensorInfo& output,
136                                                      const TensorInfo& mean,
137                                                      const TensorInfo& var,
138                                                      const TensorInfo& beta,
139                                                      const TensorInfo& gamma,
140                                                      const BatchNormalizationDescriptor& descriptor,
141                                                      Optional<std::string&> reasonIfUnsupported) const
142 {
143     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate,
144                                    reasonIfUnsupported,
145                                    input,
146                                    output,
147                                    mean,
148                                    var,
149                                    beta,
150                                    gamma,
151                                    descriptor);
152 }
153
154 bool NeonLayerSupport::IsConstantSupported(const TensorInfo& output,
155                                            Optional<std::string&> reasonIfUnsupported) const
156 {
157     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
158                                       output.GetDataType(),
159                                       &TrueFunc<>,
160                                       &TrueFunc<>);
161 }
162
163 bool NeonLayerSupport::IsConvertFp16ToFp32Supported(const TensorInfo& input,
164                                                     const TensorInfo& output,
165                                                     Optional<std::string&> reasonIfUnsupported) const
166 {
167     ignore_unused(input);
168     ignore_unused(output);
169     ignore_unused(reasonIfUnsupported);
170     return true;
171 }
172
173 bool NeonLayerSupport::IsConvertFp32ToFp16Supported(const TensorInfo& input,
174                                                     const TensorInfo& output,
175                                                     Optional<std::string&> reasonIfUnsupported) const
176 {
177     ignore_unused(input);
178     ignore_unused(output);
179     ignore_unused(reasonIfUnsupported);
180     return true;
181 }
182
183 bool NeonLayerSupport::IsConvolution2dSupported(const TensorInfo& input,
184                                                 const TensorInfo& output,
185                                                 const Convolution2dDescriptor& descriptor,
186                                                 const TensorInfo& weights,
187                                                 const Optional<TensorInfo>& biases,
188                                                 Optional<std::string&> reasonIfUnsupported) const
189 {
190     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate,
191                                    reasonIfUnsupported,
192                                    input,
193                                    output,
194                                    descriptor,
195                                    weights,
196                                    biases);
197 }
198
199 bool NeonLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
200                                                        const TensorInfo& output,
201                                                        const DepthwiseConvolution2dDescriptor& descriptor,
202                                                        const TensorInfo& weights,
203                                                        const Optional<TensorInfo>& biases,
204                                                        Optional<std::string&> reasonIfUnsupported) const
205 {
206     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate,
207                                    reasonIfUnsupported,
208                                    input,
209                                    output,
210                                    descriptor,
211                                    weights,
212                                    biases);
213 }
214
215 bool NeonLayerSupport::IsDivisionSupported(const TensorInfo& input0,
216                                            const TensorInfo& input1,
217                                            const TensorInfo& output,
218                                            Optional<std::string&> reasonIfUnsupported) const
219 {
220     ignore_unused(input0);
221     ignore_unused(input1);
222     ignore_unused(output);
223     ignore_unused(reasonIfUnsupported);
224     return false;
225 }
226
227 bool NeonLayerSupport::IsFakeQuantizationSupported(const TensorInfo& input,
228                                                    const FakeQuantizationDescriptor& descriptor,
229                                                    Optional<std::string&> reasonIfUnsupported) const
230 {
231     ignore_unused(input);
232     ignore_unused(descriptor);
233     ignore_unused(reasonIfUnsupported);
234     return false;
235 }
236
237 bool NeonLayerSupport::IsFloorSupported(const TensorInfo& input,
238                                         const TensorInfo& output,
239                                         Optional<std::string&> reasonIfUnsupported) const
240 {
241     ignore_unused(output);
242     return IsNeonBackendSupported(reasonIfUnsupported) &&
243            IsSupportedForDataTypeGeneric(reasonIfUnsupported,
244                                          input.GetDataType(),
245                                          &FalseFuncF16<>,
246                                          &TrueFunc<>,
247                                          &FalseFuncU8<>);
248 }
249
250 bool NeonLayerSupport::IsFullyConnectedSupported(const TensorInfo& input,
251                                                  const TensorInfo& output,
252                                                  const TensorInfo& weights,
253                                                  const TensorInfo& biases,
254                                                  const FullyConnectedDescriptor& descriptor,
255                                                  Optional<std::string&> reasonIfUnsupported) const
256 {
257     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate,
258                                    reasonIfUnsupported,
259                                    input,
260                                    output,
261                                    weights,
262                                    biases,
263                                    descriptor);
264 }
265
266 bool NeonLayerSupport::IsInputSupported(const TensorInfo& input,
267                                         Optional<std::string&> reasonIfUnsupported) const
268 {
269     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
270                                       input.GetDataType(),
271                                       &TrueFunc<>,
272                                       &TrueFunc<>);
273 }
274
275 bool NeonLayerSupport::IsL2NormalizationSupported(const TensorInfo& input,
276                                                   const TensorInfo& output,
277                                                   const L2NormalizationDescriptor& descriptor,
278                                                   Optional<std::string&> reasonIfUnsupported) const
279 {
280     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
281 }
282
283 bool NeonLayerSupport::IsLstmSupported(const TensorInfo& input,
284                                        const TensorInfo& outputStateIn,
285                                        const TensorInfo& cellStateIn,
286                                        const TensorInfo& scratchBuffer,
287                                        const TensorInfo& outputStateOut,
288                                        const TensorInfo& cellStateOut,
289                                        const TensorInfo& output,
290                                        const LstmDescriptor& descriptor,
291                                        const TensorInfo& inputToForgetWeights,
292                                        const TensorInfo& inputToCellWeights,
293                                        const TensorInfo& inputToOutputWeights,
294                                        const TensorInfo& recurrentToForgetWeights,
295                                        const TensorInfo& recurrentToCellWeights,
296                                        const TensorInfo& recurrentToOutputWeights,
297                                        const TensorInfo& forgetGateBias,
298                                        const TensorInfo& cellBias,
299                                        const TensorInfo& outputGateBias,
300                                        const TensorInfo* inputToInputWeights,
301                                        const TensorInfo* recurrentToInputWeights,
302                                        const TensorInfo* cellToInputWeights,
303                                        const TensorInfo* inputGateBias,
304                                        const TensorInfo* projectionWeights,
305                                        const TensorInfo* projectionBias,
306                                        const TensorInfo* cellToForgetWeights,
307                                        const TensorInfo* cellToOutputWeights,
308                                        Optional<std::string&> reasonIfUnsupported) const
309 {
310     ignore_unused(input);
311     ignore_unused(outputStateIn);
312     ignore_unused(cellStateIn);
313     ignore_unused(scratchBuffer);
314     ignore_unused(outputStateOut);
315     ignore_unused(cellStateOut);
316     ignore_unused(output);
317     ignore_unused(descriptor);
318     ignore_unused(inputToForgetWeights);
319     ignore_unused(inputToCellWeights);
320     ignore_unused(inputToOutputWeights);
321     ignore_unused(recurrentToForgetWeights);
322     ignore_unused(recurrentToCellWeights);
323     ignore_unused(recurrentToOutputWeights);
324     ignore_unused(forgetGateBias);
325     ignore_unused(cellBias);
326     ignore_unused(outputGateBias);
327     ignore_unused(inputToInputWeights);
328     ignore_unused(recurrentToInputWeights);
329     ignore_unused(cellToInputWeights);
330     ignore_unused(inputGateBias);
331     ignore_unused(projectionWeights);
332     ignore_unused(projectionBias);
333     ignore_unused(cellToForgetWeights);
334     ignore_unused(cellToOutputWeights);
335     ignore_unused(reasonIfUnsupported);
336     return false;
337 }
338
339 bool NeonLayerSupport::IsMeanSupported(const TensorInfo& input,
340                                        const TensorInfo& output,
341                                        const MeanDescriptor& descriptor,
342                                        Optional<std::string&> reasonIfUnsupported) const
343 {
344     ignore_unused(input);
345     ignore_unused(output);
346     ignore_unused(descriptor);
347     ignore_unused(reasonIfUnsupported);
348     return false;
349 }
350
351 bool NeonLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs,
352                                          const OriginsDescriptor& descriptor,
353                                          Optional<std::string&> reasonIfUnsupported) const
354 {
355     ignore_unused(descriptor);
356     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
357                                       inputs[0]->GetDataType(),
358                                       &TrueFunc<>,
359                                       &TrueFunc<>);
360 }
361
362 bool NeonLayerSupport::IsMultiplicationSupported(const TensorInfo& input0,
363                                                  const TensorInfo& input1,
364                                                  const TensorInfo& output,
365                                                  Optional<std::string&> reasonIfUnsupported) const
366 {
367     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate,
368                                    reasonIfUnsupported,
369                                    input0,
370                                    input1,
371                                    output);
372 }
373
374 bool NeonLayerSupport::IsNormalizationSupported(const TensorInfo& input,
375                                                 const TensorInfo& output,
376                                                 const NormalizationDescriptor& descriptor,
377                                                 Optional<std::string&> reasonIfUnsupported) const
378 {
379     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate,
380                                    reasonIfUnsupported,
381                                    input,
382                                    output,
383                                    descriptor);
384 }
385
386 bool NeonLayerSupport::IsOutputSupported(const TensorInfo& output,
387                                          Optional<std::string&> reasonIfUnsupported) const
388 {
389     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
390                                       output.GetDataType(),
391                                       &TrueFunc<>,
392                                       &TrueFunc<>);
393 }
394
395 bool NeonLayerSupport::IsPadSupported(const TensorInfo& input,
396                                       const TensorInfo& output,
397                                       const PadDescriptor& descriptor,
398                                       Optional<std::string&> reasonIfUnsupported) const
399 {
400     ignore_unused(input);
401     ignore_unused(output);
402     ignore_unused(descriptor);
403     ignore_unused(reasonIfUnsupported);
404     return false;
405 }
406
407 bool NeonLayerSupport::IsPermuteSupported(const TensorInfo& input,
408                                           const TensorInfo& output,
409                                           const PermuteDescriptor& descriptor,
410                                           Optional<std::string&> reasonIfUnsupported) const
411 {
412     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
413 }
414
415 bool NeonLayerSupport::IsPooling2dSupported(const TensorInfo& input,
416                                             const TensorInfo& output,
417                                             const Pooling2dDescriptor& descriptor,
418                                             Optional<std::string&> reasonIfUnsupported) const
419 {
420     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
421 }
422
423 bool NeonLayerSupport::IsReshapeSupported(const TensorInfo& input,
424                                           Optional<std::string&> reasonIfUnsupported) const
425 {
426     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
427                                       input.GetDataType(),
428                                       &TrueFunc<>,
429                                       &TrueFunc<>);
430 }
431
432 bool NeonLayerSupport::IsResizeBilinearSupported(const TensorInfo& input,
433                                                  Optional<std::string&> reasonIfUnsupported) const
434 {
435     ignore_unused(input);
436     ignore_unused(reasonIfUnsupported);
437     return false;
438 }
439
440 bool NeonLayerSupport::IsSoftmaxSupported(const TensorInfo& input,
441                                           const TensorInfo& output,
442                                           const SoftmaxDescriptor& descriptor,
443                                           Optional<std::string&> reasonIfUnsupported) const
444 {
445     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
446 }
447
448 bool NeonLayerSupport::IsSplitterSupported(const TensorInfo& input,
449                                            const ViewsDescriptor& descriptor,
450                                            Optional<std::string&> reasonIfUnsupported) const
451 {
452     ignore_unused(descriptor);
453     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
454                                       input.GetDataType(),
455                                       &TrueFunc<>,
456                                       &TrueFunc<>);
457 }
458
459 bool NeonLayerSupport::IsSubtractionSupported(const TensorInfo& input0,
460                                               const TensorInfo& input1,
461                                               const TensorInfo& output,
462                                               Optional<std::string&> reasonIfUnsupported) const
463 {
464     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate,
465                                    reasonIfUnsupported,
466                                    input0,
467                                    input1,
468                                    output);
469 }
470
471 bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc)
472 {
473     // See arm_compute::NEDirectConvolutionLayer documentation for the supported cases,
474     // and complement with NEDirectConvolutionLayerKernel::configure() implementation.
475
476     // Only 1x1 is using direct convolution. Performance results and details are in:
477     //    https://jira.arm.com/browse/IVGCVSW-1003
478     // Measurements were taken as of clframework: f105ab972135bcd21304883eff040d7e587099bc
479
480     const bool dataTypeSupported = (weightInfo.GetDataType() == armnn::DataType::Float32);
481
482     // Strides: 1|2|3
483     const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) &&
484                                  (desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3);
485
486     auto paddingLargerThan = [](const Convolution2dDescriptor& conv2ddesc, unsigned int value)
487     {
488         return conv2ddesc.m_PadLeft > value || conv2ddesc.m_PadRight > value ||
489                conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value;
490     };
491
492     // Supported sizes and padding.
493     const bool sizeAndPaddingSupported =
494         // Pad > 0 not supported for 1x1 weights.
495         (weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u));
496
497     const bool preferDirectConvolution = dataTypeSupported &&
498                                          strideSupported &&
499                                          sizeAndPaddingSupported &&
500                                          // NEDirectConvolutionLayerKernel doesn't support NULL bias.
501                                          desc.m_BiasEnabled;
502     return preferDirectConvolution;
503 }
504
505 } // namespace armnn