2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
6 #include "ClContextControlFixture.hpp"
8 #include <backendsCommon/MemCopyWorkload.hpp>
10 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
12 #include <cl/ClTensorHandle.hpp>
13 #include <cl/ClWorkloadFactory.hpp>
14 #include <cl/workloads/ClWorkloads.hpp>
15 #include <cl/workloads/ClWorkloadUtils.hpp>
17 boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle,
18 std::initializer_list<unsigned int> expectedDimensions)
20 return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
23 BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture)
25 template <armnn::DataType DataType>
26 static void ClCreateActivationWorkloadTest()
29 ClWorkloadFactory factory;
31 auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
33 // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
34 ActivationQueueDescriptor queueDescriptor = workload->GetData();
35 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
36 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
38 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1}));
39 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1}));
42 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
44 ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
47 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
49 ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
52 template <typename WorkloadType,
53 typename DescriptorType,
55 armnn::DataType DataType>
56 static void ClCreateArithmethicWorkloadTest()
59 ClWorkloadFactory factory;
60 auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
62 // Checks that inputs/outputs are as we expect them (see definition of CreateArithmeticWorkloadTest).
63 DescriptorType queueDescriptor = workload->GetData();
64 auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
65 auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
66 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
67 BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3}));
68 BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3}));
69 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
72 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
74 ClCreateArithmethicWorkloadTest<ClAdditionWorkload,
75 AdditionQueueDescriptor,
77 armnn::DataType::Float32>();
80 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
82 ClCreateArithmethicWorkloadTest<ClAdditionWorkload,
83 AdditionQueueDescriptor,
85 armnn::DataType::Float16>();
88 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
90 ClCreateArithmethicWorkloadTest<ClSubtractionWorkload,
91 SubtractionQueueDescriptor,
93 armnn::DataType::Float32>();
96 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
98 ClCreateArithmethicWorkloadTest<ClSubtractionWorkload,
99 SubtractionQueueDescriptor,
101 armnn::DataType::Float16>();
104 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest)
106 ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
107 MultiplicationQueueDescriptor,
109 armnn::DataType::Float32>();
112 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest)
114 ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
115 MultiplicationQueueDescriptor,
117 armnn::DataType::Float16>();
120 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest)
122 ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
123 MultiplicationQueueDescriptor,
125 armnn::DataType::QuantisedAsymm8>();
128 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
130 ClCreateArithmethicWorkloadTest<ClDivisionFloatWorkload,
131 DivisionQueueDescriptor,
133 armnn::DataType::Float32>();
136 BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest)
138 ClCreateArithmethicWorkloadTest<ClDivisionFloatWorkload,
139 DivisionQueueDescriptor,
141 armnn::DataType::Float16>();
144 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
145 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
148 ClWorkloadFactory factory;
150 auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
151 (factory, graph, dataLayout);
153 // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
154 BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
155 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
156 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
160 case DataLayout::NHWC:
161 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
162 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 }));
165 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
166 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 }));
170 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
172 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
173 armnn::DataType::Float32>(DataLayout::NCHW);
176 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
178 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
179 armnn::DataType::Float16>(DataLayout::NCHW);
182 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
184 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
185 armnn::DataType::Float32>(DataLayout::NHWC);
188 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload)
190 ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
191 armnn::DataType::Float16>(DataLayout::NHWC);
194 BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload)
197 ClWorkloadFactory factory;
198 auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
200 ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
201 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
202 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
204 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3}));
205 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3}));
206 BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
207 BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
210 BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload)
213 ClWorkloadFactory factory;
214 auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
216 ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
217 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
218 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
220 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3}));
221 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3}));
222 BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
223 BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
226 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
227 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
230 ClWorkloadFactory factory;
231 auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
235 std::initializer_list<unsigned int> inputShape = (dataLayout == DataLayout::NCHW) ?
236 std::initializer_list<unsigned int>({2, 3, 8, 16}) : std::initializer_list<unsigned int>({2, 8, 16, 3});
237 std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW) ?
238 std::initializer_list<unsigned int>({2, 2, 2, 10}) : std::initializer_list<unsigned int>({2, 2, 10, 2});
240 // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
241 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
242 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
243 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
244 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
245 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
248 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
250 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
253 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
255 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
258 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
260 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
263 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
265 ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
268 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
269 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
272 ClWorkloadFactory factory;
274 auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
275 (factory, graph, dataLayout);
277 // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
278 DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
279 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
280 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
282 std::initializer_list<unsigned int> inputShape = (dataLayout == DataLayout::NCHW)
283 ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
284 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
285 std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW)
286 ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
287 : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
289 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
290 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
293 BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload)
295 ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
298 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
299 static void ClDirectConvolution2dWorkloadTest()
302 ClWorkloadFactory factory;
303 auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
305 // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
306 Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
307 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
308 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
309 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6}));
310 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6}));
313 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload)
315 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
318 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload)
320 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
323 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload)
325 ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QuantisedAsymm8>();
328 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
329 static void ClCreateFullyConnectedWorkloadTest()
332 ClWorkloadFactory factory;
334 CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
336 // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
337 FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
338 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
339 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
340 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5}));
341 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7}));
345 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest)
347 ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
350 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest)
352 ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
355 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
356 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
359 ClWorkloadFactory factory;
360 auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
362 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
363 NormalizationQueueDescriptor queueDescriptor = workload->GetData();
364 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
365 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
367 std::initializer_list<unsigned int> inputShape = (dataLayout == DataLayout::NCHW) ?
368 std::initializer_list<unsigned int>({3, 5, 5, 1}) : std::initializer_list<unsigned int>({3, 1, 5, 5});
369 std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW) ?
370 std::initializer_list<unsigned int>({3, 5, 5, 1}) : std::initializer_list<unsigned int>({3, 1, 5, 5});
372 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
373 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
376 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload)
378 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
381 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
383 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
386 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload)
388 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
391 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
393 ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
396 template <typename armnn::DataType DataType>
397 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
400 ClWorkloadFactory factory;
402 auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
404 std::initializer_list<unsigned int> inputShape = (dataLayout == DataLayout::NCHW) ?
405 std::initializer_list<unsigned int>({3, 2, 5, 5}) : std::initializer_list<unsigned int>({3, 5, 5, 2});
406 std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW) ?
407 std::initializer_list<unsigned int>({3, 2, 2, 4}) : std::initializer_list<unsigned int>({3, 2, 4, 2});
409 // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
410 Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
411 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
412 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
414 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
415 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
418 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
420 ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
423 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
425 ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
428 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NchwWorkload)
430 ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
433 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NhwcWorkload)
435 ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
438 template <typename armnn::DataType DataType>
439 static void ClCreateReshapeWorkloadTest()
442 ClWorkloadFactory factory;
444 auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
446 // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
447 ReshapeQueueDescriptor queueDescriptor = workload->GetData();
448 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
449 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
451 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
452 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4})); // Leading size 1 dimensions are collapsed by ACL.
455 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
457 ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
460 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
462 ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
465 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
467 ClCreateReshapeWorkloadTest<armnn::DataType::QuantisedAsymm8>();
470 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
471 static void ClSoftmaxWorkloadTest()
474 ClWorkloadFactory factory;
476 auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
478 // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
479 SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
480 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
481 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
483 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
484 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
488 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkloadTest)
490 ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float32>();
493 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest)
495 ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float16>();
498 template <typename armnn::DataType DataType>
499 static void ClSplitterWorkloadTest()
502 ClWorkloadFactory factory;
504 auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
506 // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
507 SplitterQueueDescriptor queueDescriptor = workload->GetData();
508 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
509 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
511 auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
512 BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
514 auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
515 BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
517 auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
518 // NOTE: At the moment the CL collapses the tensor to a 2 dim when dimension zero = 1
519 // we are raising this difference between the NEON and CL libs as an issue with the compute library team.
520 BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {7, 7}));
523 BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload)
525 ClSplitterWorkloadTest<armnn::DataType::Float32>();
528 BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload)
530 ClSplitterWorkloadTest<armnn::DataType::Float16>();
533 template <typename armnn::DataType DataType>
534 static void ClSplitterMergerTest()
536 // Tests that it is possible to decide which output of the splitter layer
537 // should be lined to which input of the merger layer.
538 // We test that is is possible to specify 0th output
539 // of the splitter to be the 1st input to the merger and the 1st output of the splitter to be 0th input
543 ClWorkloadFactory factory;
546 CreateSplitterMergerWorkloadTest<ClSplitterWorkload, ClMergerWorkload, DataType>
549 auto wlSplitter = std::move(workloads.first);
550 auto wlMerger = std::move(workloads.second);
552 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
553 armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
554 armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
555 armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[0]);
556 armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[1]);
563 //Fliped order of inputs/outputs.
564 bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
565 BOOST_TEST(validDataPointers);
568 //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
569 bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
570 && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
572 BOOST_TEST(validSubTensorParents);
575 BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloatWorkload)
577 ClSplitterMergerTest<armnn::DataType::Float32>();
580 BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat16Workload)
582 ClSplitterMergerTest<armnn::DataType::Float16>();
586 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
588 // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
589 // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
592 ClWorkloadFactory factory;
593 std::unique_ptr<ClSplitterWorkload> wlSplitter;
594 std::unique_ptr<ClActivationWorkload> wlActiv0_0;
595 std::unique_ptr<ClActivationWorkload> wlActiv0_1;
596 std::unique_ptr<ClActivationWorkload> wlActiv1_0;
597 std::unique_ptr<ClActivationWorkload> wlActiv1_1;
599 CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
600 ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
601 wlActiv1_0, wlActiv1_1);
603 //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
604 armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
605 armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
606 armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
607 armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
608 armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
609 armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
614 BOOST_TEST(activ0_0Im);
615 BOOST_TEST(activ0_1Im);
616 BOOST_TEST(activ1_0Im);
617 BOOST_TEST(activ1_1Im);
619 bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
620 (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
622 BOOST_TEST(validDataPointers);
625 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl)
627 ClWorkloadFactory factory;
628 CreateMemCopyWorkloads<IClTensorHandle>(factory);
631 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
632 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
635 ClWorkloadFactory factory;
637 CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
639 // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
640 L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
641 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
642 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
644 std::initializer_list<unsigned int> inputShape = (dataLayout == DataLayout::NCHW)
645 ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
646 : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
647 std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW)
648 ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
649 : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
651 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
652 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
655 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload)
657 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
660 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload)
662 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
665 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
667 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
670 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
672 ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
675 template <typename LstmWorkloadType>
676 static void ClCreateLstmWorkloadTest()
679 ClWorkloadFactory factory;
680 auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
682 LstmQueueDescriptor queueDescriptor = workload->GetData();
683 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
684 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
685 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 }));
686 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 }));
689 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
691 ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
694 template <typename ResizeBilinearWorkloadType, typename armnn::DataType DataType>
695 static void ClResizeBilinearWorkloadTest(DataLayout dataLayout)
698 ClWorkloadFactory factory;
700 auto workload = CreateResizeBilinearWorkloadTest<ResizeBilinearWorkloadType, DataType>(factory, graph, dataLayout);
702 // Checks that inputs/outputs are as we expect them (see definition of CreateResizeBilinearWorkloadTest).
703 ResizeBilinearQueueDescriptor queueDescriptor = workload->GetData();
704 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
705 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
709 case DataLayout::NHWC:
710 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
711 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
713 case DataLayout::NCHW:
715 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
716 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
720 BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32NchwWorkload)
722 ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
725 BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat16NchwWorkload)
727 ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
730 BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32NhwcWorkload)
732 ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
735 BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat16NhwcWorkload)
737 ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
740 template <typename MeanWorkloadType, typename armnn::DataType DataType>
741 static void ClMeanWorkloadTest()
744 ClWorkloadFactory factory;
745 auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
747 // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
748 MeanQueueDescriptor queueDescriptor = workload->GetData();
749 auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
750 auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
752 // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
753 BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 3, 7, 4 }));
754 BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 4 }));
757 BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload)
759 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
762 BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload)
764 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
767 BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload)
769 ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QuantisedAsymm8>();
772 BOOST_AUTO_TEST_SUITE_END()