2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include "NeonLayerSupport.hpp"
7 #include "NeonBackendId.hpp"
9 #include <armnn/Descriptors.hpp>
10 #include <InternalTypes.hpp>
11 #include <LayerSupportCommon.hpp>
12 #include <armnn/Tensor.hpp>
13 #include <armnn/Types.hpp>
15 #include <backendsCommon/LayerSupportRegistry.hpp>
17 #include <boost/core/ignore_unused.hpp>
19 #ifdef ARMCOMPUTENEON_ENABLED
20 #include "workloads/NeonAdditionFloatWorkload.hpp"
21 #include "workloads/NeonActivationWorkload.hpp"
22 #include "workloads/NeonBatchNormalizationFloatWorkload.hpp"
23 #include "workloads/NeonConvolution2dWorkload.hpp"
24 #include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
25 #include "workloads/NeonL2NormalizationFloatWorkload.hpp"
26 #include "workloads/NeonMultiplicationFloatWorkload.hpp"
27 #include "workloads/NeonNormalizationFloatWorkload.hpp"
28 #include "workloads/NeonFullyConnectedWorkload.hpp"
29 #include "workloads/NeonPermuteWorkload.hpp"
30 #include "workloads/NeonPooling2dWorkload.hpp"
31 #include "workloads/NeonSoftmaxBaseWorkload.hpp"
32 #include "workloads/NeonSubtractionFloatWorkload.hpp"
35 using namespace boost;
43 ILayerSupportSharedPtr GetLayerSupportPointer()
45 static ILayerSupportSharedPtr instance{new NeonLayerSupport};
49 static StaticRegistryInitializer<LayerSupportRegistry> g_RegisterHelper{
50 LayerSupportRegistryInstance(),
52 [](const EmptyInitializer&)
54 return GetLayerSupportPointer();
58 bool IsNeonBackendSupported(Optional<std::string&> reasonIfUnsupported)
60 #if ARMCOMPUTENEON_ENABLED
63 if (reasonIfUnsupported)
65 reasonIfUnsupported.value() = "The armnn library has been built without NEON support";
71 template<typename FloatFunc, typename Uint8Func, typename ... Params>
72 bool IsSupportedForDataTypeNeon(Optional<std::string&> reasonIfUnsupported,
74 FloatFunc floatFuncPtr,
75 Uint8Func uint8FuncPtr,
78 return IsNeonBackendSupported(reasonIfUnsupported) &&
79 IsSupportedForDataTypeGeneric(reasonIfUnsupported,
84 std::forward<Params>(params)...);
87 #if ARMCOMPUTENEON_ENABLED
88 template<class FuncType, class... Args>
89 inline bool IsWorkloadSupported(FuncType& func, Optional<std::string&> reasonIfUnsupported, Args&&... args)
91 arm_compute::Status aclStatus = func(std::forward<Args>(args)...);
92 const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
93 if (!supported && reasonIfUnsupported)
95 reasonIfUnsupported.value() = aclStatus.error_description();
100 #define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \
101 return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__);
103 #define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \
104 return IsNeonBackendSupported(reasonIfUnsupported);
107 } // anonymous namespace
109 bool NeonLayerSupport::IsActivationSupported(const TensorInfo& input,
110 const TensorInfo& output,
111 const ActivationDescriptor& descriptor,
112 Optional<std::string&> reasonIfUnsupported) const
114 ignore_unused(descriptor);
115 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate,
122 bool NeonLayerSupport::IsAdditionSupported(const TensorInfo& input0,
123 const TensorInfo& input1,
124 const TensorInfo& output,
125 Optional<std::string&> reasonIfUnsupported) const
127 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate,
134 bool NeonLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input,
135 const TensorInfo& output,
136 const TensorInfo& mean,
137 const TensorInfo& var,
138 const TensorInfo& beta,
139 const TensorInfo& gamma,
140 const BatchNormalizationDescriptor& descriptor,
141 Optional<std::string&> reasonIfUnsupported) const
143 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate,
154 bool NeonLayerSupport::IsConstantSupported(const TensorInfo& output,
155 Optional<std::string&> reasonIfUnsupported) const
157 return IsSupportedForDataTypeNeon(reasonIfUnsupported,
158 output.GetDataType(),
163 bool NeonLayerSupport::IsConvertFp16ToFp32Supported(const TensorInfo& input,
164 const TensorInfo& output,
165 Optional<std::string&> reasonIfUnsupported) const
167 ignore_unused(input);
168 ignore_unused(output);
169 ignore_unused(reasonIfUnsupported);
173 bool NeonLayerSupport::IsConvertFp32ToFp16Supported(const TensorInfo& input,
174 const TensorInfo& output,
175 Optional<std::string&> reasonIfUnsupported) const
177 ignore_unused(input);
178 ignore_unused(output);
179 ignore_unused(reasonIfUnsupported);
183 bool NeonLayerSupport::IsConvolution2dSupported(const TensorInfo& input,
184 const TensorInfo& output,
185 const Convolution2dDescriptor& descriptor,
186 const TensorInfo& weights,
187 const Optional<TensorInfo>& biases,
188 Optional<std::string&> reasonIfUnsupported) const
190 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate,
199 bool NeonLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
200 const TensorInfo& output,
201 const DepthwiseConvolution2dDescriptor& descriptor,
202 const TensorInfo& weights,
203 const Optional<TensorInfo>& biases,
204 Optional<std::string&> reasonIfUnsupported) const
206 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate,
215 bool NeonLayerSupport::IsDivisionSupported(const TensorInfo& input0,
216 const TensorInfo& input1,
217 const TensorInfo& output,
218 Optional<std::string&> reasonIfUnsupported) const
220 ignore_unused(input0);
221 ignore_unused(input1);
222 ignore_unused(output);
223 ignore_unused(reasonIfUnsupported);
227 bool NeonLayerSupport::IsFakeQuantizationSupported(const TensorInfo& input,
228 const FakeQuantizationDescriptor& descriptor,
229 Optional<std::string&> reasonIfUnsupported) const
231 ignore_unused(input);
232 ignore_unused(descriptor);
233 ignore_unused(reasonIfUnsupported);
237 bool NeonLayerSupport::IsFloorSupported(const TensorInfo& input,
238 const TensorInfo& output,
239 Optional<std::string&> reasonIfUnsupported) const
241 ignore_unused(output);
242 return IsNeonBackendSupported(reasonIfUnsupported) &&
243 IsSupportedForDataTypeGeneric(reasonIfUnsupported,
250 bool NeonLayerSupport::IsFullyConnectedSupported(const TensorInfo& input,
251 const TensorInfo& output,
252 const TensorInfo& weights,
253 const TensorInfo& biases,
254 const FullyConnectedDescriptor& descriptor,
255 Optional<std::string&> reasonIfUnsupported) const
257 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate,
266 bool NeonLayerSupport::IsInputSupported(const TensorInfo& input,
267 Optional<std::string&> reasonIfUnsupported) const
269 return IsSupportedForDataTypeNeon(reasonIfUnsupported,
275 bool NeonLayerSupport::IsL2NormalizationSupported(const TensorInfo& input,
276 const TensorInfo& output,
277 const L2NormalizationDescriptor& descriptor,
278 Optional<std::string&> reasonIfUnsupported) const
280 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
283 bool NeonLayerSupport::IsLstmSupported(const TensorInfo& input,
284 const TensorInfo& outputStateIn,
285 const TensorInfo& cellStateIn,
286 const TensorInfo& scratchBuffer,
287 const TensorInfo& outputStateOut,
288 const TensorInfo& cellStateOut,
289 const TensorInfo& output,
290 const LstmDescriptor& descriptor,
291 const TensorInfo& inputToForgetWeights,
292 const TensorInfo& inputToCellWeights,
293 const TensorInfo& inputToOutputWeights,
294 const TensorInfo& recurrentToForgetWeights,
295 const TensorInfo& recurrentToCellWeights,
296 const TensorInfo& recurrentToOutputWeights,
297 const TensorInfo& forgetGateBias,
298 const TensorInfo& cellBias,
299 const TensorInfo& outputGateBias,
300 const TensorInfo* inputToInputWeights,
301 const TensorInfo* recurrentToInputWeights,
302 const TensorInfo* cellToInputWeights,
303 const TensorInfo* inputGateBias,
304 const TensorInfo* projectionWeights,
305 const TensorInfo* projectionBias,
306 const TensorInfo* cellToForgetWeights,
307 const TensorInfo* cellToOutputWeights,
308 Optional<std::string&> reasonIfUnsupported) const
310 ignore_unused(input);
311 ignore_unused(outputStateIn);
312 ignore_unused(cellStateIn);
313 ignore_unused(scratchBuffer);
314 ignore_unused(outputStateOut);
315 ignore_unused(cellStateOut);
316 ignore_unused(output);
317 ignore_unused(descriptor);
318 ignore_unused(inputToForgetWeights);
319 ignore_unused(inputToCellWeights);
320 ignore_unused(inputToOutputWeights);
321 ignore_unused(recurrentToForgetWeights);
322 ignore_unused(recurrentToCellWeights);
323 ignore_unused(recurrentToOutputWeights);
324 ignore_unused(forgetGateBias);
325 ignore_unused(cellBias);
326 ignore_unused(outputGateBias);
327 ignore_unused(inputToInputWeights);
328 ignore_unused(recurrentToInputWeights);
329 ignore_unused(cellToInputWeights);
330 ignore_unused(inputGateBias);
331 ignore_unused(projectionWeights);
332 ignore_unused(projectionBias);
333 ignore_unused(cellToForgetWeights);
334 ignore_unused(cellToOutputWeights);
335 ignore_unused(reasonIfUnsupported);
339 bool NeonLayerSupport::IsMeanSupported(const TensorInfo& input,
340 const TensorInfo& output,
341 const MeanDescriptor& descriptor,
342 Optional<std::string&> reasonIfUnsupported) const
344 ignore_unused(input);
345 ignore_unused(output);
346 ignore_unused(descriptor);
347 ignore_unused(reasonIfUnsupported);
351 bool NeonLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs,
352 const OriginsDescriptor& descriptor,
353 Optional<std::string&> reasonIfUnsupported) const
355 ignore_unused(descriptor);
356 return IsSupportedForDataTypeNeon(reasonIfUnsupported,
357 inputs[0]->GetDataType(),
362 bool NeonLayerSupport::IsMultiplicationSupported(const TensorInfo& input0,
363 const TensorInfo& input1,
364 const TensorInfo& output,
365 Optional<std::string&> reasonIfUnsupported) const
367 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate,
374 bool NeonLayerSupport::IsNormalizationSupported(const TensorInfo& input,
375 const TensorInfo& output,
376 const NormalizationDescriptor& descriptor,
377 Optional<std::string&> reasonIfUnsupported) const
379 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate,
386 bool NeonLayerSupport::IsOutputSupported(const TensorInfo& output,
387 Optional<std::string&> reasonIfUnsupported) const
389 return IsSupportedForDataTypeNeon(reasonIfUnsupported,
390 output.GetDataType(),
395 bool NeonLayerSupport::IsPadSupported(const TensorInfo& input,
396 const TensorInfo& output,
397 const PadDescriptor& descriptor,
398 Optional<std::string&> reasonIfUnsupported) const
400 ignore_unused(input);
401 ignore_unused(output);
402 ignore_unused(descriptor);
403 ignore_unused(reasonIfUnsupported);
407 bool NeonLayerSupport::IsPermuteSupported(const TensorInfo& input,
408 const TensorInfo& output,
409 const PermuteDescriptor& descriptor,
410 Optional<std::string&> reasonIfUnsupported) const
412 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
415 bool NeonLayerSupport::IsPooling2dSupported(const TensorInfo& input,
416 const TensorInfo& output,
417 const Pooling2dDescriptor& descriptor,
418 Optional<std::string&> reasonIfUnsupported) const
420 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
423 bool NeonLayerSupport::IsReshapeSupported(const TensorInfo& input,
424 Optional<std::string&> reasonIfUnsupported) const
426 return IsSupportedForDataTypeNeon(reasonIfUnsupported,
432 bool NeonLayerSupport::IsResizeBilinearSupported(const TensorInfo& input,
433 Optional<std::string&> reasonIfUnsupported) const
435 ignore_unused(input);
436 ignore_unused(reasonIfUnsupported);
440 bool NeonLayerSupport::IsSoftmaxSupported(const TensorInfo& input,
441 const TensorInfo& output,
442 const SoftmaxDescriptor& descriptor,
443 Optional<std::string&> reasonIfUnsupported) const
445 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
448 bool NeonLayerSupport::IsSplitterSupported(const TensorInfo& input,
449 const ViewsDescriptor& descriptor,
450 Optional<std::string&> reasonIfUnsupported) const
452 ignore_unused(descriptor);
453 return IsSupportedForDataTypeNeon(reasonIfUnsupported,
459 bool NeonLayerSupport::IsSubtractionSupported(const TensorInfo& input0,
460 const TensorInfo& input1,
461 const TensorInfo& output,
462 Optional<std::string&> reasonIfUnsupported) const
464 FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate,
471 bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc)
473 // See arm_compute::NEDirectConvolutionLayer documentation for the supported cases,
474 // and complement with NEDirectConvolutionLayerKernel::configure() implementation.
476 // Only 1x1 is using direct convolution. Performance results and details are in:
477 // https://jira.arm.com/browse/IVGCVSW-1003
478 // Measurements were taken as of clframework: f105ab972135bcd21304883eff040d7e587099bc
480 const bool dataTypeSupported = (weightInfo.GetDataType() == armnn::DataType::Float32);
483 const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) &&
484 (desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3);
486 auto paddingLargerThan = [](const Convolution2dDescriptor& conv2ddesc, unsigned int value)
488 return conv2ddesc.m_PadLeft > value || conv2ddesc.m_PadRight > value ||
489 conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value;
492 // Supported sizes and padding.
493 const bool sizeAndPaddingSupported =
494 // Pad > 0 not supported for 1x1 weights.
495 (weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u));
497 const bool preferDirectConvolution = dataTypeSupported &&
499 sizeAndPaddingSupported &&
500 // NEDirectConvolutionLayerKernel doesn't support NULL bias.
502 return preferDirectConvolution;