2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include "ClContextControlFixture.hpp"
7 #include "ClWorkloadFactoryHelper.hpp"
9 #include <backendsCommon/MemCopyWorkload.hpp>
11 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
13 #include <cl/ClTensorHandle.hpp>
14 #include <cl/ClWorkloadFactory.hpp>
15 #include <cl/workloads/ClWorkloads.hpp>
16 #include <cl/workloads/ClWorkloadUtils.hpp>
18 boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle,
19 std::initializer_list<unsigned int> expectedDimensions)
21 return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
24 BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture)
26 template <armnn::DataType DataType>
27 static void ClCreateActivationWorkloadTest()
30 ClWorkloadFactory factory =
31 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
33 auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
35 // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
36 ActivationQueueDescriptor queueDescriptor = workload->GetData();
37 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
38 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
40 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 1}));
41 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 1}));
44 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
46 ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
49 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
51 ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
54 template <typename WorkloadType,
55 typename DescriptorType,
57 armnn::DataType DataType>
58 static void ClCreateElementwiseWorkloadTest()
61 ClWorkloadFactory factory =
62 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
64 auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
66 // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
67 DescriptorType queueDescriptor = workload->GetData();
68 auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
69 auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
70 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
71 BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3}));
72 BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3}));
73 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
76 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
78 ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
79 AdditionQueueDescriptor,
81 armnn::DataType::Float32>();
84 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
86 ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
87 AdditionQueueDescriptor,
89 armnn::DataType::Float16>();
92 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
94 ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
95 SubtractionQueueDescriptor,
97 armnn::DataType::Float32>();
100 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
102 ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
103 SubtractionQueueDescriptor,
105 armnn::DataType::Float16>();
108 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest)
110 ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
111 MultiplicationQueueDescriptor,
113 armnn::DataType::Float32>();
116 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest)
118 ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
119 MultiplicationQueueDescriptor,
121 armnn::DataType::Float16>();
124 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest)
126 ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
127 MultiplicationQueueDescriptor,
129 armnn::DataType::QuantisedAsymm8>();
132 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
134 ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
135 DivisionQueueDescriptor,
137 armnn::DataType::Float32>();
140 BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest)
142 ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
143 DivisionQueueDescriptor,
145 armnn::DataType::Float16>();
148 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
149 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
152 ClWorkloadFactory factory =
153 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
155 auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
156 (factory, graph, dataLayout);
158 // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
159 BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
160 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
161 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
165 case DataLayout::NHWC:
166 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
167 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 }));
170 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
171 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 }));
175 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
177 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
178 armnn::DataType::Float32>(DataLayout::NCHW);
181 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
183 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
184 armnn::DataType::Float16>(DataLayout::NCHW);
187 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
189 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
190 armnn::DataType::Float32>(DataLayout::NHWC);
193 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload)
195 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
196 armnn::DataType::Float16>(DataLayout::NHWC);
199 BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload)
202 ClWorkloadFactory factory =
203 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
205 auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
207 ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
208 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
209 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
211 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
212 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
213 BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
214 BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
217 BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload)
220 ClWorkloadFactory factory =
221 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
223 auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
225 ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
226 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
227 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
229 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
230 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
231 BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
232 BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
235 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
236 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
239 ClWorkloadFactory factory =
240 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
242 auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
246 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
247 : std::initializer_list<unsigned int>({2, 8, 16, 3});
248 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
249 : std::initializer_list<unsigned int>({2, 2, 10, 2});
251 // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
252 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
253 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
254 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
255 BOOST_TEST((inputHandle->GetShape() == inputShape));
256 BOOST_TEST((outputHandle->GetShape() == outputShape));
259 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
261 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
264 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
266 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
269 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
271 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
274 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
276 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
279 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
280 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
283 ClWorkloadFactory factory =
284 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
286 auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
287 (factory, graph, dataLayout);
289 // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
290 DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
291 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
292 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
294 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
295 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
296 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
297 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
299 BOOST_TEST((inputHandle->GetShape() == inputShape));
300 BOOST_TEST((outputHandle->GetShape() == outputShape));
303 BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload)
305 ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
308 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
309 static void ClDirectConvolution2dWorkloadTest()
312 ClWorkloadFactory factory =
313 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
315 auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
317 // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
318 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
319 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
320 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
321 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6}));
322 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6}));
325 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload)
327 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
330 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload)
332 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
335 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload)
337 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QuantisedAsymm8>();
340 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
341 static void ClCreateFullyConnectedWorkloadTest()
344 ClWorkloadFactory factory =
345 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
348 CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
350 // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
351 FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
352 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
353 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
354 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5}));
355 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7}));
359 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest)
361 ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
364 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest)
366 ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
369 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
370 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
373 ClWorkloadFactory factory =
374 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
376 auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
378 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
379 NormalizationQueueDescriptor queueDescriptor = workload->GetData();
380 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
381 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
383 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
384 : std::initializer_list<unsigned int>({3, 1, 5, 5});
385 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
386 : std::initializer_list<unsigned int>({3, 1, 5, 5});
388 BOOST_TEST((inputHandle->GetShape() == inputShape));
389 BOOST_TEST((outputHandle->GetShape() == outputShape));
392 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload)
394 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
397 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
399 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
402 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload)
404 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
407 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
409 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
412 template <typename armnn::DataType DataType>
413 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
416 ClWorkloadFactory factory =
417 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
419 auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
421 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
422 : std::initializer_list<unsigned int>({3, 5, 5, 2});
423 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
424 : std::initializer_list<unsigned int>({3, 2, 4, 2});
426 // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
427 Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
428 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
429 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
431 BOOST_TEST((inputHandle->GetShape() == inputShape));
432 BOOST_TEST((outputHandle->GetShape() == outputShape));
435 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
437 ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
440 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
442 ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
445 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NchwWorkload)
447 ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
450 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NhwcWorkload)
452 ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
455 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
456 const armnn::TensorShape& alphaShape,
457 const armnn::TensorShape& outputShape,
458 armnn::DataType dataType)
461 ClWorkloadFactory factory =
462 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
464 auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
471 // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
472 PreluQueueDescriptor queueDescriptor = workload->GetData();
473 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
474 auto alphaHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
475 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
477 BOOST_TEST((inputHandle->GetShape() == inputShape));
478 BOOST_TEST((alphaHandle->GetShape() == alphaShape));
479 BOOST_TEST((outputHandle->GetShape() == outputShape));
482 BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
484 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
487 BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
489 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
492 BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
494 ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QuantisedAsymm8);
497 template <typename armnn::DataType DataType>
498 static void ClCreateReshapeWorkloadTest()
501 ClWorkloadFactory factory =
502 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
504 auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
506 // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
507 ReshapeQueueDescriptor queueDescriptor = workload->GetData();
508 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
509 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
511 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
512 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 4}));
515 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
517 ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
520 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
522 ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
525 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
527 ClCreateReshapeWorkloadTest<armnn::DataType::QuantisedAsymm8>();
530 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
531 static void ClSoftmaxWorkloadTest()
534 ClWorkloadFactory factory =
535 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
537 auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
539 // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
540 SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
541 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
542 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
544 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
545 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
549 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkloadTest)
551 ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float32>();
554 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest)
556 ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float16>();
559 template <typename armnn::DataType DataType>
560 static void ClSplitterWorkloadTest()
563 ClWorkloadFactory factory =
564 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
566 auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
568 // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
569 SplitterQueueDescriptor queueDescriptor = workload->GetData();
570 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
571 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
573 auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
574 BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
576 auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
577 BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
579 auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
580 BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {1, 7, 7}));
583 BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload)
585 ClSplitterWorkloadTest<armnn::DataType::Float32>();
588 BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload)
590 ClSplitterWorkloadTest<armnn::DataType::Float16>();
593 template <typename armnn::DataType DataType>
594 static void ClSplitterConcatTest()
596 // Tests that it is possible to decide which output of the splitter layer
597 // should be lined to which input of the concat layer.
598 // We test that is is possible to specify 0th output
599 // of the splitter to be the 1st input to the concat and the 1st output of the splitter to be 0th input
603 ClWorkloadFactory factory =
604 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
607 CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
610 auto wlSplitter = std::move(workloads.first);
611 auto wlConcat = std::move(workloads.second);
613 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
614 armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
615 armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
616 armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
617 armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
624 //Fliped order of inputs/outputs.
625 bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
626 BOOST_TEST(validDataPointers);
629 //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
630 bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
631 && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
633 BOOST_TEST(validSubTensorParents);
636 BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloatWorkload)
638 ClSplitterConcatTest<armnn::DataType::Float32>();
641 BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloat16Workload)
643 ClSplitterConcatTest<armnn::DataType::Float16>();
647 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
649 // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
650 // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
653 ClWorkloadFactory factory =
654 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
656 std::unique_ptr<ClSplitterWorkload> wlSplitter;
657 std::unique_ptr<ClActivationWorkload> wlActiv0_0;
658 std::unique_ptr<ClActivationWorkload> wlActiv0_1;
659 std::unique_ptr<ClActivationWorkload> wlActiv1_0;
660 std::unique_ptr<ClActivationWorkload> wlActiv1_1;
662 CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
663 ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
664 wlActiv1_0, wlActiv1_1);
666 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
667 armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
668 armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
669 armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
670 armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
671 armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
672 armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
677 BOOST_TEST(activ0_0Im);
678 BOOST_TEST(activ0_1Im);
679 BOOST_TEST(activ1_0Im);
680 BOOST_TEST(activ1_1Im);
682 bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
683 (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
685 BOOST_TEST(validDataPointers);
688 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl)
690 ClWorkloadFactory factory =
691 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
693 CreateMemCopyWorkloads<IClTensorHandle>(factory);
696 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
697 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
700 ClWorkloadFactory factory =
701 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
704 CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
706 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
707 L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
708 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
709 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
711 TensorShape inputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
712 : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
713 TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
714 : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
716 BOOST_TEST((inputHandle->GetShape() == inputShape));
717 BOOST_TEST((outputHandle->GetShape() == outputShape));
720 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload)
722 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
725 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload)
727 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
730 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
732 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
735 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
737 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
740 template <typename LstmWorkloadType>
741 static void ClCreateLstmWorkloadTest()
744 ClWorkloadFactory factory =
745 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
747 auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
749 LstmQueueDescriptor queueDescriptor = workload->GetData();
750 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
751 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
752 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 }));
753 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 }));
756 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
758 ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
761 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
762 static void ClResizeWorkloadTest(DataLayout dataLayout)
765 ClWorkloadFactory factory =
766 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
768 auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
770 auto queueDescriptor = workload->GetData();
772 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
773 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
777 case DataLayout::NHWC:
778 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
779 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
781 case DataLayout::NCHW:
783 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
784 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
788 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload)
790 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
793 BOOST_AUTO_TEST_CASE(CreateResizeFloat16NchwWorkload)
795 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
798 BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload)
800 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NCHW);
803 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload)
805 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
808 BOOST_AUTO_TEST_CASE(CreateResizeFloat16NhwcWorkload)
810 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
813 BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload)
815 ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NHWC);
818 template <typename MeanWorkloadType, typename armnn::DataType DataType>
819 static void ClMeanWorkloadTest()
822 ClWorkloadFactory factory =
823 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
825 auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
827 // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
828 MeanQueueDescriptor queueDescriptor = workload->GetData();
829 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
830 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
832 // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
833 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 3, 7, 4 }));
834 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 4 }));
837 BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload)
839 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
842 BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload)
844 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
847 BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload)
849 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QuantisedAsymm8>();
852 template <typename ConcatWorkloadType, armnn::DataType DataType>
853 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
854 unsigned int concatAxis)
857 ClWorkloadFactory factory =
858 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
860 auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
862 ConcatQueueDescriptor queueDescriptor = workload->GetData();
863 auto inputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
864 auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
865 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
867 BOOST_TEST(CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 }));
868 BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 }));
869 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
872 BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
874 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
877 BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
879 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
882 BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
884 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
887 BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
889 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 4, 3, 2, 5 }, 0);
892 BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
894 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 6, 2, 5 }, 1);
897 BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
899 ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 3, 2, 10 }, 3);
902 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
903 static void ClSpaceToDepthWorkloadTest()
906 ClWorkloadFactory factory =
907 ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
909 auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
911 SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
912 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
913 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
915 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 }));
916 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 }));
919 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
921 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
924 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
926 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
929 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
931 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QuantisedAsymm8>();
934 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
936 ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QuantisedSymm16>();
939 BOOST_AUTO_TEST_SUITE_END()