IVGCVSW-4374 Add NEON backend support for SpaceToBatchNd
[platform/upstream/armnn.git] / src / backends / neon / NeonLayerSupport.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NeonLayerSupport.hpp"
7 #include "NeonBackendId.hpp"
8
9 #include <armnn/Descriptors.hpp>
10 #include <armnn/Tensor.hpp>
11 #include <armnn/Types.hpp>
12 #include <armnn/BackendRegistry.hpp>
13
14 #include <InternalTypes.hpp>
15 #include <LayerSupportCommon.hpp>
16
17 #include <boost/core/ignore_unused.hpp>
18
19 #if defined(ARMCOMPUTENEON_ENABLED)
20 #include <aclCommon/ArmComputeUtils.hpp>
21 #include <aclCommon/ArmComputeTensorUtils.hpp>
22 #include "workloads/NeonAbsWorkload.hpp"
23 #include "workloads/NeonAdditionWorkload.hpp"
24 #include "workloads/NeonActivationWorkload.hpp"
25 #include "workloads/NeonArgMinMaxWorkload.hpp"
26 #include "workloads/NeonBatchNormalizationWorkload.hpp"
27 #include "workloads/NeonConvolution2dWorkload.hpp"
28 #include "workloads/NeonDepthToSpaceWorkload.hpp"
29 #include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
30 #include "workloads/NeonDequantizeWorkload.hpp"
31 #include "workloads/NeonDetectionPostProcessWorkload.hpp"
32 #include "workloads/NeonGreaterWorkload.hpp"
33 #include "workloads/NeonInstanceNormalizationWorkload.hpp"
34 #include "workloads/NeonL2NormalizationFloatWorkload.hpp"
35 #include "workloads/NeonLstmFloatWorkload.hpp"
36 #include "workloads/NeonMaximumWorkload.hpp"
37 #include "workloads/NeonMeanWorkload.hpp"
38 #include "workloads/NeonConcatWorkload.hpp"
39 #include "workloads/NeonMinimumWorkload.hpp"
40 #include "workloads/NeonMultiplicationWorkload.hpp"
41 #include "workloads/NeonNormalizationFloatWorkload.hpp"
42 #include "workloads/NeonFullyConnectedWorkload.hpp"
43 #include "workloads/NeonPadWorkload.hpp"
44 #include "workloads/NeonPermuteWorkload.hpp"
45 #include "workloads/NeonPooling2dWorkload.hpp"
46 #include "workloads/NeonPreluWorkload.hpp"
47 #include "workloads/NeonQuantizeWorkload.hpp"
48 #include "workloads/NeonQuantizedLstmWorkload.hpp"
49 #include "workloads/NeonResizeWorkload.hpp"
50 #include "workloads/NeonRsqrtWorkload.hpp"
51 #include "workloads/NeonSliceWorkload.hpp"
52 #include "workloads/NeonSoftmaxBaseWorkload.hpp"
53 #include "workloads/NeonSpaceToBatchNdWorkload.hpp"
54 #include "workloads/NeonSpaceToDepthWorkload.hpp"
55 #include "workloads/NeonSplitterWorkload.hpp"
56 #include "workloads/NeonStackWorkload.hpp"
57 #include "workloads/NeonStridedSliceWorkload.hpp"
58 #include "workloads/NeonSubtractionWorkload.hpp"
59 #include "workloads/NeonTransposeConvolution2dWorkload.hpp"
60 #endif
61
62 using namespace boost;
63
64 namespace armnn
65 {
66
67 namespace
68 {
69
70 template< typename ... Args>
71 bool IsNeonBackendSupported(Optional<std::string&> reasonIfUnsupported, Args... args)
72 {
73     boost::ignore_unused(reasonIfUnsupported, (args)...);
74 #if defined(ARMCOMPUTENEON_ENABLED)
75     return true;
76 #else
77     SetValueChecked(reasonIfUnsupported, "The armnn library has been built without NEON support");
78     return false;
79 #endif
80 }
81
82 template<typename FloatFunc, typename Uint8Func, typename ... Params>
83 bool IsSupportedForDataTypeNeon(Optional<std::string&> reasonIfUnsupported,
84                                 DataType dataType,
85                                 FloatFunc floatFuncPtr,
86                                 Uint8Func uint8FuncPtr,
87                                 Params&&... params)
88 {
89     return IsNeonBackendSupported(reasonIfUnsupported) &&
90         IsSupportedForDataTypeGeneric(reasonIfUnsupported,
91                                          dataType,
92                                          floatFuncPtr,
93                                          floatFuncPtr,
94                                          uint8FuncPtr,
95                                          &FalseFunc<>,
96                                          &FalseFunc<>,
97                                          std::forward<Params>(params)...);
98 }
99
100 #if defined(ARMCOMPUTENEON_ENABLED)
101 template<class FuncType, class... Args>
102 inline bool IsWorkloadSupported(FuncType& func, Optional<std::string&> reasonIfUnsupported, Args&&... args)
103 {
104     arm_compute::Status aclStatus = func(std::forward<Args>(args)...);
105     const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
106     if (!supported && reasonIfUnsupported)
107     {
108         reasonIfUnsupported.value() = aclStatus.error_description();
109     }
110     return supported;
111 }
112
113 #define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \
114     return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__);
115 #else
116 #define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \
117     return IsNeonBackendSupported(reasonIfUnsupported, __VA_ARGS__);
118 #endif
119
120 #if defined(ARMCOMPUTENEON_ENABLED)
121 #define IS_QUANT_MULTIPLIER_SUPPORTED(input, output, weights) \
122 armcomputetensorutils::IsQuantMultiplierSupported(input, output, weights)
123 #else
124 #define IS_QUANT_MULTIPLIER_SUPPORTED(input, output, weights) true
125 #endif
126
127 } // anonymous namespace
128
129 bool NeonLayerSupport::IsAbsSupported(const TensorInfo& input,
130                                       const TensorInfo& output,
131                                       Optional<std::string&> reasonIfUnsupported) const
132 {
133     ElementwiseUnaryDescriptor descriptor(UnaryOperation::Abs);
134     return IsElementwiseUnarySupported(input, output, descriptor, reasonIfUnsupported);
135 }
136
137 bool NeonLayerSupport::IsActivationSupported(const TensorInfo& input,
138                                              const TensorInfo& output,
139                                              const ActivationDescriptor& descriptor,
140                                              Optional<std::string&> reasonIfUnsupported) const
141 {
142     ignore_unused(descriptor);
143     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate,
144                                    reasonIfUnsupported,
145                                    input,
146                                    output,
147                                    descriptor);
148 }
149
150 bool NeonLayerSupport::IsAdditionSupported(const TensorInfo& input0,
151                                            const TensorInfo& input1,
152                                            const TensorInfo& output,
153                                            Optional<std::string&> reasonIfUnsupported) const
154 {
155     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate,
156                                    reasonIfUnsupported,
157                                    input0,
158                                    input1,
159                                    output);
160 }
161
162 bool NeonLayerSupport::IsArgMinMaxSupported(const TensorInfo& input,
163                                             const TensorInfo& output,
164                                             const ArgMinMaxDescriptor& descriptor,
165                                             Optional<std::string&> reasonIfUnsupported) const
166 {
167     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonArgMinMaxWorkloadValidate,
168                                    reasonIfUnsupported,
169                                    input,
170                                    output,
171                                    descriptor);
172 }
173
174 bool NeonLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input,
175                                                      const TensorInfo& output,
176                                                      const TensorInfo& mean,
177                                                      const TensorInfo& var,
178                                                      const TensorInfo& beta,
179                                                      const TensorInfo& gamma,
180                                                      const BatchNormalizationDescriptor& descriptor,
181                                                      Optional<std::string&> reasonIfUnsupported) const
182 {
183     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate,
184                                    reasonIfUnsupported,
185                                    input,
186                                    output,
187                                    mean,
188                                    var,
189                                    beta,
190                                    gamma,
191                                    descriptor);
192 }
193
194 bool NeonLayerSupport::IsComparisonSupported(const TensorInfo& input0,
195                                              const TensorInfo& input1,
196                                              const TensorInfo& output,
197                                              const ComparisonDescriptor& descriptor,
198                                              Optional<std::string&> reasonIfUnsupported) const
199 {
200     if (descriptor.m_Operation == ComparisonOperation::Greater)
201     {
202         FORWARD_WORKLOAD_VALIDATE_FUNC(NeonGreaterWorkloadValidate,
203                                        reasonIfUnsupported,
204                                        input0,
205                                        input1,
206                                        output);
207     }
208
209     return false;
210 }
211
212 bool NeonLayerSupport::IsConcatSupported(const std::vector<const TensorInfo*> inputs,
213                                          const TensorInfo& output,
214                                          const ConcatDescriptor& descriptor,
215                                          Optional<std::string&> reasonIfUnsupported) const
216 {
217     if (descriptor.GetNumDimensions() <= descriptor.GetConcatAxis())
218     {
219         SetValueChecked(reasonIfUnsupported, "Neon Concat: Concat axis > Number of dimensions.");
220         return false;
221     }
222
223     unsigned int concatInnerAxis = (descriptor.GetNumDimensions() - descriptor.GetConcatAxis()) - 1;
224     if(concatInnerAxis < 3) // Width, height, or channels
225     {
226         FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConcatWorkloadValidate,
227                                        reasonIfUnsupported,
228                                        inputs,
229                                        output,
230                                        descriptor);
231     }
232     else if (concatInnerAxis == 3)
233     {
234         for (auto& input : inputs)
235         {
236             if (input && !output.IsTypeSpaceMatch(*input)) // Cannot use sub-tensors if the types are not same space
237             {
238                 SetValueChecked(reasonIfUnsupported, "Neon Concat: Types and quantization parameters must match.");
239                 return false;
240             }
241         }
242         return true; // Sub-tensors support concat along batch
243     }
244     else // > 4 dimensions not supported.
245     {
246         SetValueChecked(reasonIfUnsupported, "Neon Concat: Maximum of 4 dimensions supported.");
247         return false;
248     }
249 }
250
251 bool NeonLayerSupport::IsConstantSupported(const TensorInfo& output,
252                                            Optional<std::string&> reasonIfUnsupported) const
253 {
254     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
255                                       output.GetDataType(),
256                                       &TrueFunc<>,
257                                       &TrueFunc<>);
258 }
259
260 bool NeonLayerSupport::IsConvertFp16ToFp32Supported(const TensorInfo& input,
261                                                     const TensorInfo& output,
262                                                     Optional<std::string&> reasonIfUnsupported) const
263 {
264     ignore_unused(input);
265     ignore_unused(output);
266     ignore_unused(reasonIfUnsupported);
267     return true;
268 }
269
270 bool NeonLayerSupport::IsConvertFp32ToFp16Supported(const TensorInfo& input,
271                                                     const TensorInfo& output,
272                                                     Optional<std::string&> reasonIfUnsupported) const
273 {
274     ignore_unused(input);
275     ignore_unused(output);
276     ignore_unused(reasonIfUnsupported);
277     return true;
278 }
279
280 bool NeonLayerSupport::IsConvolution2dSupported(const TensorInfo& input,
281                                                 const TensorInfo& output,
282                                                 const Convolution2dDescriptor& descriptor,
283                                                 const TensorInfo& weights,
284                                                 const Optional<TensorInfo>& biases,
285                                                 Optional<std::string&> reasonIfUnsupported) const
286 {
287     if (!IS_QUANT_MULTIPLIER_SUPPORTED(input, output, weights))
288     {
289         return false;
290     }
291
292     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate,
293                                    reasonIfUnsupported,
294                                    input,
295                                    output,
296                                    descriptor,
297                                    weights,
298                                    biases);
299 }
300
301 bool NeonLayerSupport::IsDepthToSpaceSupported(const TensorInfo& input,
302                                                const TensorInfo& output,
303                                                const DepthToSpaceDescriptor& descriptor,
304                                                Optional<std::string&> reasonIfUnsupported) const
305 {
306     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthToSpaceWorkloadValidate,
307                                    reasonIfUnsupported,
308                                    input,
309                                    output,
310                                    descriptor);
311 }
312
313 bool NeonLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
314                                                        const TensorInfo& output,
315                                                        const DepthwiseConvolution2dDescriptor& descriptor,
316                                                        const TensorInfo& weights,
317                                                        const Optional<TensorInfo>& biases,
318                                                        Optional<std::string&> reasonIfUnsupported) const
319 {
320     if (!IS_QUANT_MULTIPLIER_SUPPORTED(input, output, weights))
321     {
322         return false;
323     }
324
325     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate,
326                                    reasonIfUnsupported,
327                                    input,
328                                    output,
329                                    descriptor,
330                                    weights,
331                                    biases);
332 }
333
334 bool NeonLayerSupport::IsDequantizeSupported(const TensorInfo& input,
335                                              const TensorInfo& output,
336                                              Optional<std::string&> reasonIfUnsupported) const
337 {
338     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDequantizeWorkloadValidate,
339                                    reasonIfUnsupported,
340                                    input,
341                                    output);
342 }
343
344 bool NeonLayerSupport::IsDetectionPostProcessSupported(const TensorInfo& boxEncodings,
345                                                        const TensorInfo& scores,
346                                                        const TensorInfo& anchors,
347                                                        const TensorInfo& detectionBoxes,
348                                                        const TensorInfo& detectionClasses,
349                                                        const TensorInfo& detectionScores,
350                                                        const TensorInfo& numDetections,
351                                                        const DetectionPostProcessDescriptor& descriptor,
352                                                        Optional<std::string&> reasonIfUnsupported) const
353 {
354     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDetectionPostProcessValidate,
355                                    reasonIfUnsupported,
356                                    boxEncodings,
357                                    scores,
358                                    anchors,
359                                    detectionBoxes,
360                                    detectionClasses,
361                                    detectionScores,
362                                    numDetections,
363                                    descriptor);
364 }
365
366
367 bool NeonLayerSupport::IsDilatedDepthwiseConvolutionSupported(const TensorInfo& input,
368                                                               const TensorInfo& output,
369                                                               const DepthwiseConvolution2dDescriptor& descriptor,
370                                                               const TensorInfo& weights,
371                                                               const Optional<TensorInfo>& biases,
372                                                               Optional<std::string&> reasonIfUnsupported) const
373 {
374     if (!IS_QUANT_MULTIPLIER_SUPPORTED(input, output, weights))
375     {
376         return false;
377     }
378
379     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate,
380                                    reasonIfUnsupported,
381                                    input,
382                                    output,
383                                    descriptor,
384                                    weights,
385                                    biases);
386 }
387
388 bool NeonLayerSupport::IsElementwiseUnarySupported(const TensorInfo& input,
389                                                    const TensorInfo& output,
390                                                    const ElementwiseUnaryDescriptor& descriptor,
391                                                    Optional<std::string&> reasonIfUnsupported) const
392 {
393     if (descriptor.m_Operation == UnaryOperation::Abs)
394     {
395         FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAbsWorkloadValidate,
396                                        reasonIfUnsupported,
397                                        input,
398                                        output);
399     }
400     else if (descriptor.m_Operation == UnaryOperation::Rsqrt)
401     {
402         FORWARD_WORKLOAD_VALIDATE_FUNC(NeonRsqrtWorkloadValidate,
403                                        reasonIfUnsupported,
404                                        input,
405                                        output);
406     }
407
408     return false;
409 }
410
411 bool NeonLayerSupport::IsFloorSupported(const TensorInfo& input,
412                                         const TensorInfo& output,
413                                         Optional<std::string&> reasonIfUnsupported) const
414 {
415     ignore_unused(output);
416     return IsNeonBackendSupported(reasonIfUnsupported) &&
417            IsSupportedForDataTypeGeneric(reasonIfUnsupported,
418                                          input.GetDataType(),
419                                          &FalseFuncF16<>,
420                                          &TrueFunc<>,
421                                          &FalseFuncU8<>,
422                                          &FalseFuncI32<>,
423                                          &FalseFuncU8<>);
424 }
425
426 bool NeonLayerSupport::IsFullyConnectedSupported(const TensorInfo& input,
427                                                  const TensorInfo& output,
428                                                  const TensorInfo& weights,
429                                                  const TensorInfo& biases,
430                                                  const FullyConnectedDescriptor& descriptor,
431                                                  Optional<std::string&> reasonIfUnsupported) const
432 {
433     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate,
434                                    reasonIfUnsupported,
435                                    input,
436                                    output,
437                                    weights,
438                                    biases,
439                                    descriptor);
440 }
441
442 bool NeonLayerSupport::IsGreaterSupported(const armnn::TensorInfo& input0,
443                                           const armnn::TensorInfo& input1,
444                                           const armnn::TensorInfo& output,
445                                           armnn::Optional<std::string&> reasonIfUnsupported) const
446 {
447     ComparisonDescriptor descriptor(ComparisonOperation::Greater);
448     return IsComparisonSupported(input0, input1, output, descriptor, reasonIfUnsupported);
449 }
450
451 bool NeonLayerSupport::IsInputSupported(const TensorInfo& input,
452                                         Optional<std::string&> reasonIfUnsupported) const
453 {
454     return IsNeonBackendSupported(reasonIfUnsupported, input);
455 }
456
457 bool NeonLayerSupport::IsInstanceNormalizationSupported(const TensorInfo& input,
458                                                         const TensorInfo& output,
459                                                         const InstanceNormalizationDescriptor& descriptor,
460                                                         Optional<std::string&> reasonIfUnsupported) const
461 {
462     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonInstanceNormalizationWorkloadValidate,
463                                    reasonIfUnsupported,
464                                    input,
465                                    output,
466                                    descriptor);
467 }
468
469 bool NeonLayerSupport::IsL2NormalizationSupported(const TensorInfo& input,
470                                                   const TensorInfo& output,
471                                                   const L2NormalizationDescriptor& descriptor,
472                                                   Optional<std::string&> reasonIfUnsupported) const
473 {
474     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
475 }
476
477 bool NeonLayerSupport::IsLstmSupported(const TensorInfo& input,
478                                        const TensorInfo& outputStateIn,
479                                        const TensorInfo& cellStateIn,
480                                        const TensorInfo& scratchBuffer,
481                                        const TensorInfo& outputStateOut,
482                                        const TensorInfo& cellStateOut,
483                                        const TensorInfo& output,
484                                        const LstmDescriptor& descriptor,
485                                        const LstmInputParamsInfo& paramsInfo,
486                                        Optional<std::string&> reasonIfUnsupported) const
487 {
488     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonLstmFloatWorkloadValidate,
489                                    reasonIfUnsupported,
490                                    input,
491                                    outputStateIn,
492                                    cellStateIn,
493                                    scratchBuffer,
494                                    outputStateOut,
495                                    cellStateOut,
496                                    output,
497                                    descriptor,
498                                    paramsInfo);
499 }
500
501 bool NeonLayerSupport::IsMaximumSupported(const TensorInfo& input0,
502                                           const TensorInfo& input1,
503                                           const TensorInfo& output,
504                                           Optional<std::string&> reasonIfUnsupported) const
505 {
506     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMaximumWorkloadValidate,
507                                    reasonIfUnsupported,
508                                    input0,
509                                    input1,
510                                    output);
511 }
512
513 bool NeonLayerSupport::IsMeanSupported(const TensorInfo& input,
514                                        const TensorInfo& output,
515                                        const MeanDescriptor& descriptor,
516                                        Optional<std::string&> reasonIfUnsupported) const
517 {
518     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMeanWorkloadValidate,
519                                    reasonIfUnsupported,
520                                    input,
521                                    output,
522                                    descriptor);
523 }
524
525 bool NeonLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs,
526                                          const TensorInfo& output,
527                                          const MergerDescriptor& descriptor,
528                                          Optional<std::string&> reasonIfUnsupported) const
529 {
530      return IsConcatSupported(inputs, output, descriptor, reasonIfUnsupported);
531 }
532
533 bool NeonLayerSupport::IsMinimumSupported(const TensorInfo& input0,
534                                           const TensorInfo& input1,
535                                           const TensorInfo& output,
536                                           Optional<std::string&> reasonIfUnsupported) const
537 {
538     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMinimumWorkloadValidate,
539                                    reasonIfUnsupported,
540                                    input0,
541                                    input1,
542                                    output);
543 }
544
545 bool NeonLayerSupport::IsMultiplicationSupported(const TensorInfo& input0,
546                                                  const TensorInfo& input1,
547                                                  const TensorInfo& output,
548                                                  Optional<std::string&> reasonIfUnsupported) const
549 {
550     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate,
551                                    reasonIfUnsupported,
552                                    input0,
553                                    input1,
554                                    output);
555 }
556
557 bool NeonLayerSupport::IsNormalizationSupported(const TensorInfo& input,
558                                                 const TensorInfo& output,
559                                                 const NormalizationDescriptor& descriptor,
560                                                 Optional<std::string&> reasonIfUnsupported) const
561 {
562     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate,
563                                    reasonIfUnsupported,
564                                    input,
565                                    output,
566                                    descriptor);
567 }
568
569 bool NeonLayerSupport::IsOutputSupported(const TensorInfo& output,
570                                          Optional<std::string&> reasonIfUnsupported) const
571 {
572     return IsNeonBackendSupported(reasonIfUnsupported, output);
573 }
574
575 bool NeonLayerSupport::IsPadSupported(const TensorInfo& input,
576                                       const TensorInfo& output,
577                                       const PadDescriptor& descriptor,
578                                       Optional<std::string&> reasonIfUnsupported) const
579 {
580     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPadWorkloadValidate,
581                                    reasonIfUnsupported,
582                                    input,
583                                    output,
584                                    descriptor);
585 }
586
587 bool NeonLayerSupport::IsPermuteSupported(const TensorInfo& input,
588                                           const TensorInfo& output,
589                                           const PermuteDescriptor& descriptor,
590                                           Optional<std::string&> reasonIfUnsupported) const
591 {
592     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
593 }
594
595 bool NeonLayerSupport::IsPooling2dSupported(const TensorInfo& input,
596                                             const TensorInfo& output,
597                                             const Pooling2dDescriptor& descriptor,
598                                             Optional<std::string&> reasonIfUnsupported) const
599 {
600     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
601 }
602
603 bool NeonLayerSupport::IsPreluSupported(const armnn::TensorInfo &input,
604                                         const armnn::TensorInfo &alpha,
605                                         const armnn::TensorInfo &output,
606                                         armnn::Optional<std::string &> reasonIfUnsupported) const
607 {
608     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPreluWorkloadValidate, reasonIfUnsupported, input, alpha, output);
609 }
610
611 bool NeonLayerSupport::IsQuantizeSupported(const TensorInfo& input,
612                                            const TensorInfo& output,
613                                            Optional<std::string&> reasonIfUnsupported) const
614 {
615     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonQuantizeWorkloadValidate,
616                                    reasonIfUnsupported,
617                                    input,
618                                    output);
619 }
620
621 bool NeonLayerSupport::IsQuantizedLstmSupported(const TensorInfo& input,
622                                                 const TensorInfo& cellStateIn,
623                                                 const TensorInfo& outputStateIn,
624                                                 const TensorInfo& cellStateOut,
625                                                 const TensorInfo& outputStateOut,
626                                                 const QuantizedLstmInputParamsInfo& paramsInfo,
627                                                 Optional<std::string&> reasonIfUnsupported) const
628 {
629     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonQuantizedLstmWorkloadValidate,
630                                    reasonIfUnsupported,
631                                    input,
632                                    cellStateIn,
633                                    outputStateIn,
634                                    cellStateOut,
635                                    outputStateOut,
636                                    paramsInfo);
637 }
638
639 bool NeonLayerSupport::IsReshapeSupported(const TensorInfo& input,
640                                           const ReshapeDescriptor& descriptor,
641                                           Optional<std::string&> reasonIfUnsupported) const
642 {
643     ignore_unused(descriptor);
644     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
645                                       input.GetDataType(),
646                                       &TrueFunc<>,
647                                       &TrueFunc<>);
648 }
649
650 bool NeonLayerSupport::IsResizeSupported(const TensorInfo& input,
651                                          const TensorInfo& output,
652                                          const ResizeDescriptor& descriptor,
653                                          Optional<std::string&> reasonIfUnsupported) const
654 {
655     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonResizeWorkloadValidate,
656                                    reasonIfUnsupported,
657                                    input,
658                                    output,
659                                    descriptor);
660 }
661
662 bool NeonLayerSupport::IsResizeBilinearSupported(const TensorInfo& input,
663                                                  const TensorInfo& output,
664                                                  Optional<std::string&> reasonIfUnsupported) const
665 {
666     ResizeDescriptor descriptor;
667     descriptor.m_Method     = ResizeMethod::Bilinear;
668     descriptor.m_DataLayout = DataLayout::NCHW;
669
670     const TensorShape& outputShape = output.GetShape();
671     descriptor.m_TargetHeight = outputShape[2];
672     descriptor.m_TargetWidth  = outputShape[3];
673
674     return IsResizeSupported(input, output, descriptor, reasonIfUnsupported);
675 }
676
677 bool NeonLayerSupport::IsRsqrtSupported(const TensorInfo& input,
678                                         const TensorInfo& output,
679                                         Optional<std::string&> reasonIfUnsupported) const
680 {
681     ElementwiseUnaryDescriptor descriptor(UnaryOperation::Rsqrt);
682     return IsElementwiseUnarySupported(input, output, descriptor, reasonIfUnsupported);
683 }
684
685 bool NeonLayerSupport::IsSliceSupported(const TensorInfo& input,
686                                         const TensorInfo& output,
687                                         const SliceDescriptor& descriptor,
688                                         Optional<std::string&> reasonIfUnsupported) const
689 {
690     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSliceWorkloadValidate,
691                                    reasonIfUnsupported,
692                                    input,
693                                    output,
694                                    descriptor);
695 }
696
697 bool NeonLayerSupport::IsSoftmaxSupported(const TensorInfo& input,
698                                           const TensorInfo& output,
699                                           const SoftmaxDescriptor& descriptor,
700                                           Optional<std::string&> reasonIfUnsupported) const
701 {
702     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
703 }
704
705 bool NeonLayerSupport::IsSpaceToBatchNdSupported(const TensorInfo& input,
706                                                  const TensorInfo& output,
707                                                  const SpaceToBatchNdDescriptor& descriptor,
708                                                  Optional<std::string&> reasonIfUnsupported) const
709 {
710     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSpaceToBatchNdWorkloadValidate,
711                                    reasonIfUnsupported,
712                                    input,
713                                    output,
714                                    descriptor);
715 }
716
717 bool NeonLayerSupport::IsSpaceToDepthSupported(const TensorInfo& input,
718                                                const TensorInfo& output,
719                                                const SpaceToDepthDescriptor& descriptor,
720                                                Optional<std::string&> reasonIfUnsupported) const
721 {
722     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSpaceToDepthWorkloadValidate,
723                                    reasonIfUnsupported,
724                                    input,
725                                    output,
726                                    descriptor);
727 }
728
729 bool NeonLayerSupport::IsSplitterSupported(const TensorInfo& input,
730                                            const ViewsDescriptor& descriptor,
731                                            Optional<std::string&> reasonIfUnsupported) const
732 {
733     ignore_unused(descriptor);
734     return IsSupportedForDataTypeNeon(reasonIfUnsupported,
735                                       input.GetDataType(),
736                                       &TrueFunc<>,
737                                       &TrueFunc<>);
738 }
739
740 bool NeonLayerSupport::IsSplitterSupported(const TensorInfo& input,
741                                            const std::vector<std::reference_wrapper<TensorInfo>>& outputs,
742                                            const ViewsDescriptor& descriptor,
743                                            Optional<std::string&> reasonIfUnsupported) const
744 {
745 #if defined(ARMCOMPUTENEON_ENABLED)
746     // Split along the last dimension, cannot use sub-tensors
747     // as width and height of the sub-tensors do not match
748     // the width and height of the parent tensor
749     // in case of input with more than 2D.
750     std::set<unsigned int> splitAxis = ComputeSplitAxis(descriptor, input.GetShape());
751     if (descriptor.GetNumDimensions() > 2 && splitAxis.size() == 1 &&
752         *splitAxis.begin() == descriptor.GetNumDimensions() - 1 )
753     {
754         FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSplitterWorkloadValidate,
755                                        reasonIfUnsupported,
756                                        input,
757                                        outputs,
758                                        *splitAxis.begin());
759     }
760 #endif
761     boost::ignore_unused(descriptor);
762     for (auto output : outputs)
763     {
764         if (!input.IsTypeSpaceMatch(output)) // Cannot use sub-tensors if the types are not same space
765         {
766             SetValueChecked(reasonIfUnsupported, "Neon Splitter: Types and quantization parameters must match.");
767             return false;
768         }
769     }
770     return true;
771 }
772
773 bool NeonLayerSupport::IsStackSupported(const std::vector<const TensorInfo*>& inputs,
774                                         const TensorInfo& output,
775                                         const StackDescriptor& descriptor,
776                                         Optional<std::string&> reasonIfUnsupported) const
777 {
778     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonStackWorkloadValidate,
779                                    reasonIfUnsupported,
780                                    inputs,
781                                    output,
782                                    descriptor);
783 }
784
785 bool NeonLayerSupport::IsStridedSliceSupported(const TensorInfo& input,
786                                                const TensorInfo& output,
787                                                const StridedSliceDescriptor& descriptor,
788                                                Optional<std::string&> reasonIfUnsupported) const
789 {
790     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonStridedSliceWorkloadValidate,
791                                    reasonIfUnsupported,
792                                    input,
793                                    output,
794                                    descriptor);
795 }
796
797 bool NeonLayerSupport::IsSubtractionSupported(const TensorInfo& input0,
798                                               const TensorInfo& input1,
799                                               const TensorInfo& output,
800                                               Optional<std::string&> reasonIfUnsupported) const
801 {
802     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate,
803                                    reasonIfUnsupported,
804                                    input0,
805                                    input1,
806                                    output);
807 }
808
809 bool NeonLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input,
810                                                          const TensorInfo& output,
811                                                          const TransposeConvolution2dDescriptor& descriptor,
812                                                          const TensorInfo& weights,
813                                                          const Optional<TensorInfo>& biases,
814                                                          Optional<std::string&> reasonIfUnsupported) const
815 {
816     if (!IS_QUANT_MULTIPLIER_SUPPORTED(input, output, weights))
817     {
818         return false;
819     }
820
821     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonTransposeConvolution2dWorkloadValidate,
822                                    reasonIfUnsupported,
823                                    input,
824                                    output,
825                                    descriptor,
826                                    weights,
827                                    biases);
828 }
829
830 } // namespace armnn