2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include "NeonWorkloadFactoryHelper.hpp"
8 #include <aclCommon/ArmComputeTensorUtils.hpp>
9 #include <backendsCommon/MemCopyWorkload.hpp>
11 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
13 #include <neon/NeonWorkloadFactory.hpp>
14 #include <neon/NeonTensorHandle.hpp>
15 #include <neon/workloads/NeonWorkloadUtils.hpp>
16 #include <neon/workloads/NeonWorkloads.hpp>
18 BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon)
23 boost::test_tools::predicate_result CompareIAclTensorHandleShape(IAclTensorHandle* tensorHandle,
24 std::initializer_list<unsigned int> expectedDimensions)
26 return CompareTensorHandleShape<IAclTensorHandle>(tensorHandle, expectedDimensions);
29 bool TestNeonTensorHandleInfo(armnn::IAclTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
31 using namespace armnn::armcomputetensorutils;
33 const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
34 const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
36 if (handleInfo->data_type() != expectedAclInfo.data_type())
41 if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
46 if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
51 for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
53 if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
64 template <typename armnn::DataType DataType>
65 static void NeonCreateActivationWorkloadTest()
68 NeonWorkloadFactory factory =
69 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
71 auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
73 // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
74 ActivationQueueDescriptor queueDescriptor = workload->GetData();
75 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
76 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
77 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
78 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
81 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
82 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
84 NeonCreateActivationWorkloadTest<DataType::Float16>();
88 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
90 NeonCreateActivationWorkloadTest<DataType::Float32>();
93 template <typename WorkloadType,
94 typename DescriptorType,
96 armnn::DataType DataType>
97 static void NeonCreateElementwiseWorkloadTest()
100 NeonWorkloadFactory factory =
101 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
103 auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
105 DescriptorType queueDescriptor = workload->GetData();
106 auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
107 auto inputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
108 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
109 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
110 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
111 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
114 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
115 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
117 NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
118 AdditionQueueDescriptor,
120 DataType::Float16>();
124 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
126 NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
127 AdditionQueueDescriptor,
129 DataType::Float32>();
132 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
133 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
135 NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
136 SubtractionQueueDescriptor,
138 DataType::Float16>();
142 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
144 NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
145 SubtractionQueueDescriptor,
147 DataType::Float32>();
150 BOOST_AUTO_TEST_CASE(CreateSubtractionUint8Workload)
152 NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
153 SubtractionQueueDescriptor,
155 DataType::QuantisedAsymm8>();
158 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
159 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload)
161 NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
162 MultiplicationQueueDescriptor,
164 DataType::Float16>();
168 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload)
170 NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
171 MultiplicationQueueDescriptor,
173 DataType::Float32>();
176 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload)
178 NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
179 MultiplicationQueueDescriptor,
181 DataType::QuantisedAsymm8>();
184 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
185 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
188 NeonWorkloadFactory factory =
189 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
191 auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
192 (factory, graph, dataLayout);
194 // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
195 BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
196 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
197 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
199 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
200 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
202 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
203 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
206 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
207 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
209 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NCHW);
212 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NhwcWorkload)
214 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NHWC);
218 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
220 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NCHW);
223 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
225 NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NHWC);
228 template <typename armnn::DataType DataType>
229 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
232 NeonWorkloadFactory factory =
233 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
235 auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, DataType>(factory, graph, dataLayout);
237 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
238 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
240 // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
241 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
242 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
243 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
244 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
245 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
248 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
249 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
251 NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
254 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
256 NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
260 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
262 NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
265 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
267 NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
270 template <typename armnn::DataType DataType>
271 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
274 NeonWorkloadFactory factory =
275 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
277 auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
278 DataType>(factory, graph, dataLayout);
280 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
281 DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
282 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
283 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
285 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
286 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
287 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
288 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
290 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
291 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
294 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload)
296 NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
299 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
300 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload)
302 NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
306 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
307 static void NeonCreateFullyConnectedWorkloadTest()
310 NeonWorkloadFactory factory =
311 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
313 auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
315 // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
316 FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
317 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
318 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
319 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType)));
320 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType)));
323 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
324 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload)
326 NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
330 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload)
332 NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
335 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
336 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
339 NeonWorkloadFactory factory =
340 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
342 auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
344 // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
345 NormalizationQueueDescriptor queueDescriptor = workload->GetData();
346 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
347 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
349 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
350 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
352 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
353 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
356 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
357 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
359 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
362 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
364 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
368 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNchwWorkload)
370 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
373 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNhwcWorkload)
375 NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
379 template <typename armnn::DataType DataType>
380 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
383 NeonWorkloadFactory factory =
384 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
386 auto workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>(factory, graph, dataLayout);
388 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
389 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
391 // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
392 Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
393 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
394 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
395 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
396 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
399 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
400 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload)
402 NeonCreatePooling2dWorkloadTest<DataType::Float16>();
406 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
408 NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
411 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
413 NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
416 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NchwWorkload)
418 NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NCHW);
421 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NhwcWorkload)
423 NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NHWC);
426 static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
427 const armnn::TensorShape& alphaShape,
428 const armnn::TensorShape& outputShape,
429 armnn::DataType dataType)
432 NeonWorkloadFactory factory =
433 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
435 auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
442 // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
443 PreluQueueDescriptor queueDescriptor = workload->GetData();
444 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
445 auto alphaHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
446 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
447 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
448 BOOST_TEST(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
449 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
452 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
453 BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
455 NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
459 BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
461 NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
464 BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
466 NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QuantisedAsymm8);
469 template <typename armnn::DataType DataType>
470 static void NeonCreateReshapeWorkloadTest()
473 NeonWorkloadFactory factory =
474 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
476 auto workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
478 // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
479 ReshapeQueueDescriptor queueDescriptor = workload->GetData();
480 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
481 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
482 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
483 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
486 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
487 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
489 NeonCreateReshapeWorkloadTest<DataType::Float16>();
493 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
495 NeonCreateReshapeWorkloadTest<DataType::Float32>();
498 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
500 NeonCreateReshapeWorkloadTest<DataType::QuantisedAsymm8>();
503 template <typename ResizeWorkloadType, armnn::DataType DataType>
504 static void NeonCreateResizeWorkloadTest(DataLayout dataLayout)
507 NeonWorkloadFactory factory =
508 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
509 auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
511 auto queueDescriptor = workload->GetData();
513 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
514 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
518 case DataLayout::NHWC:
519 BOOST_TEST(CompareIAclTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
520 BOOST_TEST(CompareIAclTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
522 case DataLayout::NCHW:
524 BOOST_TEST(CompareIAclTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
525 BOOST_TEST(CompareIAclTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
529 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload)
531 NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
534 BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload)
536 NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NCHW);
539 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload)
541 NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
544 BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload)
546 NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NHWC);
549 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
550 static void NeonCreateSoftmaxWorkloadTest()
553 NeonWorkloadFactory factory =
554 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
556 auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
558 // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
559 SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
560 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
561 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
562 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
563 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType)));
566 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
567 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload)
569 NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float16>();
573 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload)
575 NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float32>();
578 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
579 static void NeonSpaceToDepthWorkloadTest()
582 NeonWorkloadFactory factory =
583 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
585 auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
587 SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
588 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
589 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
591 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
592 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
595 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
597 NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
600 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
602 NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
605 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
607 NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QuantisedAsymm8>();
610 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
612 NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QuantisedSymm16>();
615 BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
618 NeonWorkloadFactory factory =
619 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
621 auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
623 // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
624 SplitterQueueDescriptor queueDescriptor = workload->GetData();
625 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
626 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
628 auto outputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
629 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
631 auto outputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
632 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
634 auto outputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
635 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
638 BOOST_AUTO_TEST_CASE(CreateSplitterConcat)
640 // Tests that it is possible to decide which output of the splitter layer
641 // should be lined to which input of the concat layer.
642 // We tested that is is possible to specify 0th output
643 // of the splitter to be the 1st input to the concat, and the 1st output of the splitter to be 0th input
647 NeonWorkloadFactory factory =
648 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
651 CreateSplitterConcatWorkloadTest<NeonSplitterWorkload, NeonConcatWorkload,
652 DataType::Float32>(factory, graph);
654 auto wlSplitter = std::move(workloads.first);
655 auto wlConcat = std::move(workloads.second);
657 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
658 armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
659 armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
660 armnn::IAclTensorHandle* mIn0 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
661 armnn::IAclTensorHandle* mIn1 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
668 bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
670 BOOST_TEST(validDataPointers);
673 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
675 // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
676 // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
679 NeonWorkloadFactory factory =
680 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
682 std::unique_ptr<NeonSplitterWorkload> wlSplitter;
683 std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
684 std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
685 std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
686 std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
688 CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
689 NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
690 wlActiv1_0, wlActiv1_1);
692 armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
693 armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
694 armnn::IAclTensorHandle* activ0_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
695 armnn::IAclTensorHandle* activ0_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
696 armnn::IAclTensorHandle* activ1_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
697 armnn::IAclTensorHandle* activ1_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
702 BOOST_TEST(activ0_0Im);
703 BOOST_TEST(activ0_1Im);
704 BOOST_TEST(activ1_0Im);
705 BOOST_TEST(activ1_1Im);
707 bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
708 (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
710 BOOST_TEST(validDataPointers);
713 #if defined(ARMNNREF_ENABLED)
715 // This test unit needs the reference backend, it's not available if the reference backend is not built
717 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon)
719 NeonWorkloadFactory factory =
720 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
721 CreateMemCopyWorkloads<IAclTensorHandle>(factory);
726 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
727 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
730 NeonWorkloadFactory factory =
731 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
734 CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
736 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
737 L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
738 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
739 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
741 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ?
742 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
743 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
744 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
746 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
747 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
750 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
751 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
753 NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
756 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
758 NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
762 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNchwWorkload)
764 NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
767 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNhwcWorkload)
769 NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
772 template <typename LstmWorkloadType>
773 static void NeonCreateLstmWorkloadTest()
776 NeonWorkloadFactory factory =
777 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
779 auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
781 LstmQueueDescriptor queueDescriptor = workload->GetData();
783 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
784 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
786 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
787 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
790 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
792 NeonCreateLstmWorkloadTest<NeonLstmFloatWorkload>();
795 template <typename ConcatWorkloadType, armnn::DataType DataType>
796 static void NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
797 unsigned int concatAxis)
800 NeonWorkloadFactory factory =
801 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
803 auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
805 ConcatQueueDescriptor queueDescriptor = workload->GetData();
806 auto inputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
807 auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
808 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
810 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
811 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
812 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
815 BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
817 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
820 BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
822 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
825 BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
827 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
830 BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
832 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 4, 3, 2, 5 }, 0);
835 BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
837 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 6, 2, 5 }, 1);
840 BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
842 NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 3, 2, 10 }, 3);
845 template <armnn::DataType DataType>
846 static void NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
847 const std::initializer_list<unsigned int>& outputShape,
849 unsigned int numInputs)
852 NeonWorkloadFactory factory =
853 NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
855 auto workload = CreateStackWorkloadTest<NeonStackWorkload, DataType>(factory,
857 TensorShape(inputShape),
858 TensorShape(outputShape),
862 // Check inputs and output are as expected
863 StackQueueDescriptor queueDescriptor = workload->GetData();
864 for (unsigned int i = 0; i < numInputs; ++i)
866 auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
867 BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
869 auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
870 BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
873 BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
875 NeonCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
878 BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
880 NeonCreateStackWorkloadTest<armnn::DataType::QuantisedAsymm8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
883 template <typename QuantizedLstmWorkloadType>
884 static void NeonCreateQuantizedLstmWorkloadTest()
886 using boost::polymorphic_downcast;
889 NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
891 auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
893 QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
895 IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
896 BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
897 BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
899 IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
900 BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
901 BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
903 IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
904 BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
905 BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
907 IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
908 BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
909 BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
911 IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
912 BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
913 BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
916 BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload)
918 NeonCreateQuantizedLstmWorkloadTest<NeonQuantizedLstmWorkload>();
921 BOOST_AUTO_TEST_SUITE_END()