IVGCVSW-3656 Rename ARMCOMPUTEREF to ARMNNREF
[platform/upstream/armnn.git] / src / backends / cl / test / ClCreateWorkloadTests.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "ClContextControlFixture.hpp"
7 #include "ClWorkloadFactoryHelper.hpp"
8
9 #include <backendsCommon/MemCopyWorkload.hpp>
10
11 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
12 #include <aclCommon/ArmComputeTensorUtils.hpp>
13
14 #include <cl/ClTensorHandle.hpp>
15 #include <cl/ClWorkloadFactory.hpp>
16 #include <cl/workloads/ClWorkloads.hpp>
17 #include <cl/workloads/ClWorkloadUtils.hpp>
18
19 boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle*                    tensorHandle,
20                                                                 std::initializer_list<unsigned int> expectedDimensions)
21 {
22     return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
23 }
24
25 BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture)
26
27 template <armnn::DataType DataType>
28 static void ClCreateActivationWorkloadTest()
29 {
30     Graph graph;
31     ClWorkloadFactory factory =
32         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
33
34     auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
35
36     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
37     ActivationQueueDescriptor queueDescriptor = workload->GetData();
38     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
39     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
40
41     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 1}));
42     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 1}));
43 }
44
45 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
46 {
47     ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
48 }
49
50 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
51 {
52     ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
53 }
54
55 template <typename WorkloadType,
56           typename DescriptorType,
57           typename LayerType,
58           armnn::DataType DataType>
59 static void ClCreateElementwiseWorkloadTest()
60 {
61     Graph graph;
62     ClWorkloadFactory factory =
63         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
64
65     auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
66
67     // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
68     DescriptorType queueDescriptor = workload->GetData();
69     auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
70     auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
71     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
72     BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3}));
73     BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3}));
74     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
75 }
76
77 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
78 {
79     ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
80                                     AdditionQueueDescriptor,
81                                     AdditionLayer,
82                                     armnn::DataType::Float32>();
83 }
84
85 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
86 {
87     ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
88                                     AdditionQueueDescriptor,
89                                     AdditionLayer,
90                                     armnn::DataType::Float16>();
91 }
92
93 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
94 {
95     ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
96                                     SubtractionQueueDescriptor,
97                                     SubtractionLayer,
98                                     armnn::DataType::Float32>();
99 }
100
101 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
102 {
103     ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
104                                     SubtractionQueueDescriptor,
105                                     SubtractionLayer,
106                                     armnn::DataType::Float16>();
107 }
108
109 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest)
110 {
111     ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
112                                     MultiplicationQueueDescriptor,
113                                     MultiplicationLayer,
114                                     armnn::DataType::Float32>();
115 }
116
117 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest)
118 {
119     ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
120                                     MultiplicationQueueDescriptor,
121                                     MultiplicationLayer,
122                                     armnn::DataType::Float16>();
123 }
124
125 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest)
126 {
127     ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
128                                     MultiplicationQueueDescriptor,
129                                     MultiplicationLayer,
130                                     armnn::DataType::QuantisedAsymm8>();
131 }
132
133 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
134 {
135     ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
136                                     DivisionQueueDescriptor,
137                                     DivisionLayer,
138                                     armnn::DataType::Float32>();
139 }
140
141 BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest)
142 {
143     ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
144                                     DivisionQueueDescriptor,
145                                     DivisionLayer,
146                                     armnn::DataType::Float16>();
147 }
148
149 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
150 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
151 {
152     Graph graph;
153     ClWorkloadFactory factory =
154         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
155
156     auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
157                     (factory, graph, dataLayout);
158
159     // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
160     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
161     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
162     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
163
164      switch (dataLayout)
165     {
166         case DataLayout::NHWC:
167             BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
168             BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 }));
169             break;
170         default: // NCHW
171             BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
172             BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 }));
173     }
174 }
175
176 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
177 {
178     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
179                                            armnn::DataType::Float32>(DataLayout::NCHW);
180 }
181
182 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
183 {
184     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
185                                            armnn::DataType::Float16>(DataLayout::NCHW);
186 }
187
188 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
189 {
190     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
191                                            armnn::DataType::Float32>(DataLayout::NHWC);
192 }
193
194 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload)
195 {
196     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
197                                            armnn::DataType::Float16>(DataLayout::NHWC);
198 }
199
200 BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload)
201 {
202     Graph graph;
203     ClWorkloadFactory factory =
204         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
205
206     auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
207
208     ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
209     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
210     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
211
212     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
213     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
214     BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
215     BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
216 }
217
218 BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload)
219 {
220     Graph graph;
221     ClWorkloadFactory factory =
222         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
223
224     auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
225
226     ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
227     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
228     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
229
230     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1, 3, 2, 3}));
231     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 3, 2, 3}));
232     BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
233     BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
234 }
235
236 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
237 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
238 {
239     Graph graph;
240     ClWorkloadFactory factory =
241         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
242
243     auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
244                                                                                        graph,
245                                                                                        dataLayout);
246
247     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 3, 8, 16})
248                                                                : std::initializer_list<unsigned int>({2, 8, 16, 3});
249     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({2, 2, 2, 10})
250                                                                : std::initializer_list<unsigned int>({2, 2, 10, 2});
251
252     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
253     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
254     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
255     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
256     BOOST_TEST((inputHandle->GetShape() == inputShape));
257     BOOST_TEST((outputHandle->GetShape() == outputShape));
258 }
259
260 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
261 {
262     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
263 }
264
265 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
266 {
267     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
268 }
269
270 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
271 {
272     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
273 }
274
275 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
276 {
277     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
278 }
279
280 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
281 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
282 {
283     Graph graph;
284     ClWorkloadFactory factory =
285         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
286
287     auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
288                     (factory, graph, dataLayout);
289
290     // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
291     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
292     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
293     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
294
295     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
296                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
297     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
298                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
299
300     BOOST_TEST((inputHandle->GetShape() == inputShape));
301     BOOST_TEST((outputHandle->GetShape() == outputShape));
302 }
303
304 BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload)
305 {
306     ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
307 }
308
309 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
310 static void ClDirectConvolution2dWorkloadTest()
311 {
312     Graph graph;
313     ClWorkloadFactory factory =
314         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
315
316     auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
317
318     // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
319     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
320     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
321     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
322     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6}));
323     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6}));
324 }
325
326 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload)
327 {
328     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
329 }
330
331 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload)
332 {
333     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
334 }
335
336 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload)
337 {
338     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QuantisedAsymm8>();
339 }
340
341 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
342 static void ClCreateFullyConnectedWorkloadTest()
343 {
344     Graph graph;
345     ClWorkloadFactory factory =
346         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
347
348     auto workload =
349         CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
350
351     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
352     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
353     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
354     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
355     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5}));
356     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7}));
357 }
358
359
360 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest)
361 {
362     ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
363 }
364
365 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest)
366 {
367     ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
368 }
369
370 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
371 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
372 {
373     Graph graph;
374     ClWorkloadFactory factory =
375         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
376
377     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
378
379     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
380     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
381     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
382     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
383
384     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
385                                                                : std::initializer_list<unsigned int>({3, 1, 5, 5});
386     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 5, 5, 1})
387                                                                : std::initializer_list<unsigned int>({3, 1, 5, 5});
388
389     BOOST_TEST((inputHandle->GetShape() == inputShape));
390     BOOST_TEST((outputHandle->GetShape() == outputShape));
391 }
392
393 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload)
394 {
395     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
396 }
397
398 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
399 {
400     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
401 }
402
403 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload)
404 {
405     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
406 }
407
408 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
409 {
410     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
411 }
412
413 template <typename armnn::DataType DataType>
414 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
415 {
416     Graph graph;
417     ClWorkloadFactory factory =
418         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
419
420     auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
421
422     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 5, 5})
423                                                                : std::initializer_list<unsigned int>({3, 5, 5, 2});
424     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({3, 2, 2, 4})
425                                                                : std::initializer_list<unsigned int>({3, 2, 4, 2});
426
427     // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
428     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
429     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
430     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
431
432     BOOST_TEST((inputHandle->GetShape() == inputShape));
433     BOOST_TEST((outputHandle->GetShape() == outputShape));
434 }
435
436 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
437 {
438     ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
439 }
440
441 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
442 {
443     ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
444 }
445
446 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NchwWorkload)
447 {
448     ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
449 }
450
451 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NhwcWorkload)
452 {
453     ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
454 }
455
456 static void ClCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
457                                       const armnn::TensorShape& alphaShape,
458                                       const armnn::TensorShape& outputShape,
459                                       armnn::DataType dataType)
460 {
461     Graph graph;
462     ClWorkloadFactory factory =
463             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
464
465     auto workload = CreatePreluWorkloadTest<ClPreluWorkload>(factory,
466                                                              graph,
467                                                              inputShape,
468                                                              alphaShape,
469                                                              outputShape,
470                                                              dataType);
471
472     // Checks that outputs and inputs are as we expect them (see definition of CreatePreluWorkloadTest).
473     PreluQueueDescriptor queueDescriptor = workload->GetData();
474     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
475     auto alphaHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
476     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
477
478     BOOST_TEST((inputHandle->GetShape() == inputShape));
479     BOOST_TEST((alphaHandle->GetShape() == alphaShape));
480     BOOST_TEST((outputHandle->GetShape() == outputShape));
481 }
482
483 BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
484 {
485     ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
486 }
487
488 BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
489 {
490     ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
491 }
492
493 BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
494 {
495     ClCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QuantisedAsymm8);
496 }
497
498 template <typename armnn::DataType DataType>
499 static void ClCreateReshapeWorkloadTest()
500 {
501     Graph graph;
502     ClWorkloadFactory factory =
503         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
504
505     auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
506
507     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
508     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
509     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
510     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
511
512     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
513     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1, 4}));
514 }
515
516 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
517 {
518     ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
519 }
520
521 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
522 {
523     ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
524 }
525
526 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
527 {
528     ClCreateReshapeWorkloadTest<armnn::DataType::QuantisedAsymm8>();
529 }
530
531 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
532 static void ClSoftmaxWorkloadTest()
533 {
534     Graph graph;
535     ClWorkloadFactory factory =
536         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
537
538     auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
539
540     // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
541     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
542     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
543     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
544
545     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
546     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
547 }
548
549
550 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkloadTest)
551 {
552     ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float32>();
553 }
554
555 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest)
556 {
557     ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float16>();
558 }
559
560 template <typename armnn::DataType DataType>
561 static void ClSplitterWorkloadTest()
562 {
563     Graph graph;
564     ClWorkloadFactory factory =
565         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
566
567     auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
568
569     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
570     SplitterQueueDescriptor queueDescriptor = workload->GetData();
571     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
572     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
573
574     auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
575     BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
576
577     auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
578     BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
579
580     auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
581     BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {1, 7, 7}));
582 }
583
584 BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload)
585 {
586     ClSplitterWorkloadTest<armnn::DataType::Float32>();
587 }
588
589 BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload)
590 {
591     ClSplitterWorkloadTest<armnn::DataType::Float16>();
592 }
593
594 template <typename armnn::DataType DataType>
595 static void ClSplitterConcatTest()
596 {
597     // Tests that it is possible to decide which output of the splitter layer
598     // should be lined to which input of the concat layer.
599     // We test that is is possible to specify 0th output
600     // of the splitter to be the 1st input to the concat and the 1st output of the splitter  to be 0th input
601     // of the concat.
602
603     Graph graph;
604     ClWorkloadFactory factory =
605         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
606
607     auto workloads =
608         CreateSplitterConcatWorkloadTest<ClSplitterWorkload, ClConcatWorkload, DataType>
609             (factory, graph);
610
611     auto wlSplitter = std::move(workloads.first);
612     auto wlConcat = std::move(workloads.second);
613
614     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
615     armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
616     armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
617     armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
618     armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
619
620     BOOST_TEST(sOut0);
621     BOOST_TEST(sOut1);
622     BOOST_TEST(mIn0);
623     BOOST_TEST(mIn1);
624
625     //Fliped order of inputs/outputs.
626     bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
627     BOOST_TEST(validDataPointers);
628
629
630     //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
631     bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
632                                     && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
633
634     BOOST_TEST(validSubTensorParents);
635 }
636
637 BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloatWorkload)
638 {
639     ClSplitterConcatTest<armnn::DataType::Float32>();
640 }
641
642 BOOST_AUTO_TEST_CASE(CreateSplitterConcatFloat16Workload)
643 {
644     ClSplitterConcatTest<armnn::DataType::Float16>();
645 }
646
647
648 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
649 {
650     // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
651     // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
652
653     Graph graph;
654     ClWorkloadFactory factory =
655         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
656
657     std::unique_ptr<ClSplitterWorkload> wlSplitter;
658     std::unique_ptr<ClActivationWorkload> wlActiv0_0;
659     std::unique_ptr<ClActivationWorkload> wlActiv0_1;
660     std::unique_ptr<ClActivationWorkload> wlActiv1_0;
661     std::unique_ptr<ClActivationWorkload> wlActiv1_1;
662
663     CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
664         ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
665                                                                wlActiv1_0, wlActiv1_1);
666
667     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
668     armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
669     armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
670     armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
671     armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
672     armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
673     armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
674
675
676     BOOST_TEST(sOut0);
677     BOOST_TEST(sOut1);
678     BOOST_TEST(activ0_0Im);
679     BOOST_TEST(activ0_1Im);
680     BOOST_TEST(activ1_0Im);
681     BOOST_TEST(activ1_1Im);
682
683     bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
684                              (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
685
686     BOOST_TEST(validDataPointers);
687 }
688
689 #if defined(ARMNNREF_ENABLED)
690
691 // This test unit needs the reference backend, it's not available if the reference backend is not built
692
693 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl)
694 {
695     ClWorkloadFactory factory =
696         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
697
698     CreateMemCopyWorkloads<IClTensorHandle>(factory);
699 }
700
701 #endif
702
703 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
704 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
705 {
706     Graph graph;
707     ClWorkloadFactory factory =
708         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
709
710     auto workload =
711             CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
712
713     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
714     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
715     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
716     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
717
718     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
719                                                                : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
720     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
721                                                                : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
722
723     BOOST_TEST((inputHandle->GetShape() == inputShape));
724     BOOST_TEST((outputHandle->GetShape() == outputShape));
725 }
726
727 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload)
728 {
729     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
730 }
731
732 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload)
733 {
734     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
735 }
736
737 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
738 {
739     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
740 }
741
742 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
743 {
744     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
745 }
746
747 template <typename LstmWorkloadType>
748 static void ClCreateLstmWorkloadTest()
749 {
750     Graph graph;
751     ClWorkloadFactory factory =
752         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
753
754     auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
755
756     LstmQueueDescriptor queueDescriptor = workload->GetData();
757     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
758     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
759     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 }));
760     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 }));
761 }
762
763 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
764 {
765     ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
766 }
767
768 template <typename ResizeWorkloadType, typename armnn::DataType DataType>
769 static void ClResizeWorkloadTest(DataLayout dataLayout)
770 {
771     Graph graph;
772     ClWorkloadFactory factory =
773         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
774
775     auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
776
777     auto queueDescriptor = workload->GetData();
778
779     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
780     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
781
782     switch (dataLayout)
783     {
784         case DataLayout::NHWC:
785             BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
786             BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
787             break;
788         case DataLayout::NCHW:
789         default:
790             BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
791             BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
792     }
793 }
794
795 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload)
796 {
797     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
798 }
799
800 BOOST_AUTO_TEST_CASE(CreateResizeFloat16NchwWorkload)
801 {
802     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
803 }
804
805 BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload)
806 {
807     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NCHW);
808 }
809
810 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload)
811 {
812     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
813 }
814
815 BOOST_AUTO_TEST_CASE(CreateResizeFloat16NhwcWorkload)
816 {
817     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
818 }
819
820 BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload)
821 {
822     ClResizeWorkloadTest<ClResizeWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NHWC);
823 }
824
825 template <typename MeanWorkloadType, typename armnn::DataType DataType>
826 static void ClMeanWorkloadTest()
827 {
828     Graph graph;
829     ClWorkloadFactory factory =
830         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
831
832     auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
833
834     // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
835     MeanQueueDescriptor queueDescriptor = workload->GetData();
836     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
837     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
838
839     // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
840     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {  1, 3, 7, 4 }));
841     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 4 }));
842 }
843
844 BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload)
845 {
846     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
847 }
848
849 BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload)
850 {
851     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
852 }
853
854 BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload)
855 {
856     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QuantisedAsymm8>();
857 }
858
859 template <typename ConcatWorkloadType, armnn::DataType DataType>
860 static void ClCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
861                                        unsigned int concatAxis)
862 {
863     Graph graph;
864     ClWorkloadFactory factory =
865         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
866
867     auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
868
869     ConcatQueueDescriptor queueDescriptor = workload->GetData();
870     auto inputHandle0  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
871     auto inputHandle1  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
872     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
873
874     BOOST_TEST(CompareIClTensorHandleShape(inputHandle0, { 2, 3, 2, 5 }));
875     BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, { 2, 3, 2, 5 }));
876     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
877 }
878
879 BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
880 {
881     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
882 }
883
884 BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
885 {
886     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
887 }
888
889 BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
890 {
891     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
892 }
893
894 BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
895 {
896     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 4, 3, 2, 5 }, 0);
897 }
898
899 BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
900 {
901     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 6, 2, 5 }, 1);
902 }
903
904 BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
905 {
906     ClCreateConcatWorkloadTest<ClConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 3, 2, 10 }, 3);
907 }
908
909 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
910 static void ClSpaceToDepthWorkloadTest()
911 {
912     Graph graph;
913     ClWorkloadFactory factory =
914             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
915
916     auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
917
918     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
919     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
920     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
921
922     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 1, 2, 2, 1 }));
923     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 1, 1, 1, 4 }));
924 }
925
926 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
927 {
928     ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float32>();
929 }
930
931 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
932 {
933     ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::Float16>();
934 }
935
936 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
937 {
938     ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QuantisedAsymm8>();
939 }
940
941 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
942 {
943     ClSpaceToDepthWorkloadTest<ClSpaceToDepthWorkload, armnn::DataType::QuantisedSymm16>();
944 }
945
946 template <armnn::DataType DataType>
947 static void ClCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
948                                       const std::initializer_list<unsigned int>& outputShape,
949                                       unsigned int axis,
950                                       unsigned int numInputs)
951 {
952     armnn::Graph graph;
953     ClWorkloadFactory factory =
954             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
955
956     auto workload = CreateStackWorkloadTest<ClStackWorkload, DataType>(factory,
957                                                                        graph,
958                                                                        TensorShape(inputShape),
959                                                                        TensorShape(outputShape),
960                                                                        axis,
961                                                                        numInputs);
962
963     // Check inputs and output are as expected
964     StackQueueDescriptor queueDescriptor = workload->GetData();
965     for (unsigned int i = 0; i < numInputs; ++i)
966     {
967         auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[i]);
968         BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
969     }
970     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
971     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
972 }
973
974 BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
975 {
976     ClCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
977 }
978
979 BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
980 {
981     ClCreateStackWorkloadTest<armnn::DataType::QuantisedAsymm8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
982 }
983
984 template <typename QuantizedLstmWorkloadType>
985 static void ClCreateQuantizedLstmWorkloadTest()
986 {
987     using namespace armnn::armcomputetensorutils;
988     using boost::polymorphic_downcast;
989
990     Graph graph;
991     ClWorkloadFactory factory =
992             ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
993
994     auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
995
996     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
997
998     IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
999     BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
1000     BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1001
1002     IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
1003     BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
1004     BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1005
1006     IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
1007     BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
1008     BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1009
1010     IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
1011     BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
1012     BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
1013
1014     IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
1015     BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
1016     BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
1017 }
1018
1019 BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload)
1020 {
1021     ClCreateQuantizedLstmWorkloadTest<ClQuantizedLstmWorkload>();
1022 }
1023
1024 BOOST_AUTO_TEST_SUITE_END()