IVGCVSW-1946: Remove armnn/src from the include paths
[platform/upstream/armnn.git] / src / backends / cl / test / ClCreateWorkloadTests.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "ClContextControlFixture.hpp"
7
8 #include <backendsCommon/MemCopyWorkload.hpp>
9
10 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
11
12 #include <cl/ClTensorHandle.hpp>
13 #include <cl/ClWorkloadFactory.hpp>
14 #include <cl/workloads/ClWorkloads.hpp>
15 #include <cl/workloads/ClWorkloadUtils.hpp>
16
17 boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle*                    tensorHandle,
18                                                                 std::initializer_list<unsigned int> expectedDimensions)
19 {
20     return CompareTensorHandleShape<IClTensorHandle>(tensorHandle, expectedDimensions);
21 }
22
23 BOOST_FIXTURE_TEST_SUITE(CreateWorkloadCl, ClContextControlFixture)
24
25 template <armnn::DataType DataType>
26 static void ClCreateActivationWorkloadTest()
27 {
28     Graph graph;
29     ClWorkloadFactory factory;
30
31     auto workload = CreateActivationWorkloadTest<ClActivationWorkload, DataType>(factory, graph);
32
33     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
34     ActivationQueueDescriptor queueDescriptor = workload->GetData();
35     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
36     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
37
38     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {1}));
39     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {1}));
40 }
41
42 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
43 {
44     ClCreateActivationWorkloadTest<armnn::DataType::Float32>();
45 }
46
47 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
48 {
49     ClCreateActivationWorkloadTest<armnn::DataType::Float16>();
50 }
51
52 template <typename WorkloadType,
53           typename DescriptorType,
54           typename LayerType,
55           armnn::DataType DataType>
56 static void ClCreateArithmethicWorkloadTest()
57 {
58     Graph graph;
59     ClWorkloadFactory factory;
60     auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
61
62     // Checks that inputs/outputs are as we expect them (see definition of CreateArithmeticWorkloadTest).
63     DescriptorType queueDescriptor = workload->GetData();
64     auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
65     auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
66     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
67     BOOST_TEST(CompareIClTensorHandleShape(inputHandle1, {2, 3}));
68     BOOST_TEST(CompareIClTensorHandleShape(inputHandle2, {2, 3}));
69     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 3}));
70 }
71
72 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
73 {
74     ClCreateArithmethicWorkloadTest<ClAdditionWorkload,
75                                     AdditionQueueDescriptor,
76                                     AdditionLayer,
77                                     armnn::DataType::Float32>();
78 }
79
80 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
81 {
82     ClCreateArithmethicWorkloadTest<ClAdditionWorkload,
83                                     AdditionQueueDescriptor,
84                                     AdditionLayer,
85                                     armnn::DataType::Float16>();
86 }
87
88 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
89 {
90     ClCreateArithmethicWorkloadTest<ClSubtractionWorkload,
91                                     SubtractionQueueDescriptor,
92                                     SubtractionLayer,
93                                     armnn::DataType::Float32>();
94 }
95
96 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
97 {
98     ClCreateArithmethicWorkloadTest<ClSubtractionWorkload,
99                                     SubtractionQueueDescriptor,
100                                     SubtractionLayer,
101                                     armnn::DataType::Float16>();
102 }
103
104 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest)
105 {
106     ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
107                                     MultiplicationQueueDescriptor,
108                                     MultiplicationLayer,
109                                     armnn::DataType::Float32>();
110 }
111
112 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest)
113 {
114     ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
115                                     MultiplicationQueueDescriptor,
116                                     MultiplicationLayer,
117                                     armnn::DataType::Float16>();
118 }
119
120 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest)
121 {
122     ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
123                                     MultiplicationQueueDescriptor,
124                                     MultiplicationLayer,
125                                     armnn::DataType::QuantisedAsymm8>();
126 }
127
128 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
129 {
130     ClCreateArithmethicWorkloadTest<ClDivisionFloatWorkload,
131                                     DivisionQueueDescriptor,
132                                     DivisionLayer,
133                                     armnn::DataType::Float32>();
134 }
135
136 BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest)
137 {
138     ClCreateArithmethicWorkloadTest<ClDivisionFloatWorkload,
139                                     DivisionQueueDescriptor,
140                                     DivisionLayer,
141                                     armnn::DataType::Float16>();
142 }
143
144 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
145 static void ClCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
146 {
147     Graph graph;
148     ClWorkloadFactory factory;
149
150     auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
151                     (factory, graph, dataLayout);
152
153     // Checks that inputs/outputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
154     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
155     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
156     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
157
158      switch (dataLayout)
159     {
160         case DataLayout::NHWC:
161             BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
162             BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4, 4, 3 }));
163             break;
164         default: // NCHW
165             BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
166             BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 4, 4 }));
167     }
168 }
169
170 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
171 {
172     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
173                                            armnn::DataType::Float32>(DataLayout::NCHW);
174 }
175
176 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
177 {
178     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
179                                            armnn::DataType::Float16>(DataLayout::NCHW);
180 }
181
182 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
183 {
184     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
185                                            armnn::DataType::Float32>(DataLayout::NHWC);
186 }
187
188 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationNhwcFloat16NhwcWorkload)
189 {
190     ClCreateBatchNormalizationWorkloadTest<ClBatchNormalizationFloatWorkload,
191                                            armnn::DataType::Float16>(DataLayout::NHWC);
192 }
193
194 BOOST_AUTO_TEST_CASE(CreateConvertFp16ToFp32Workload)
195 {
196     Graph graph;
197     ClWorkloadFactory factory;
198     auto workload = CreateConvertFp16ToFp32WorkloadTest<ClConvertFp16ToFp32Workload>(factory, graph);
199
200     ConvertFp16ToFp32QueueDescriptor queueDescriptor = workload->GetData();
201     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
202     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
203
204     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3}));
205     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3}));
206     BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
207     BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
208 }
209
210 BOOST_AUTO_TEST_CASE(CreateConvertFp32ToFp16Workload)
211 {
212     Graph graph;
213     ClWorkloadFactory factory;
214     auto workload = CreateConvertFp32ToFp16WorkloadTest<ClConvertFp32ToFp16Workload>(factory, graph);
215
216     ConvertFp32ToFp16QueueDescriptor queueDescriptor = workload->GetData();
217     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
218     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
219
220     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 2, 3}));
221     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 2, 3}));
222     BOOST_TEST((inputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F32));
223     BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
224 }
225
226 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
227 static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
228 {
229     Graph graph;
230     ClWorkloadFactory factory;
231     auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
232                                                                                        graph,
233                                                                                        dataLayout);
234
235     std::initializer_list<unsigned int> inputShape  = (dataLayout == DataLayout::NCHW) ?
236             std::initializer_list<unsigned int>({2, 3, 8, 16}) : std::initializer_list<unsigned int>({2, 8, 16, 3});
237     std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW) ?
238             std::initializer_list<unsigned int>({2, 2, 2, 10}) : std::initializer_list<unsigned int>({2, 2, 10, 2});
239
240     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
241     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
242     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
243     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
244     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
245     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
246 }
247
248 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
249 {
250     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
251 }
252
253 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
254 {
255     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
256 }
257
258 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
259 {
260     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
261 }
262
263 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
264 {
265     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
266 }
267
268 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
269 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
270 {
271     Graph graph;
272     ClWorkloadFactory factory;
273
274     auto workload = CreateDepthwiseConvolution2dWorkloadTest<DepthwiseConvolutionWorkloadType, DataType>
275                     (factory, graph, dataLayout);
276
277     // Checks that inputs/outputs are as we expect them (see definition of CreateDepthwiseConvolution2dWorkloadTest).
278     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
279     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
280     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
281
282     std::initializer_list<unsigned int> inputShape  = (dataLayout == DataLayout::NCHW)
283             ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
284             : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
285     std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW)
286             ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
287             : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
288
289     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
290     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
291 }
292
293 BOOST_AUTO_TEST_CASE(CreateDepthwiseConvolutionFloat32NhwcWorkload)
294 {
295     ClDepthwiseConvolutionWorkloadTest<ClDepthwiseConvolutionWorkload, DataType::Float32>(DataLayout::NHWC);
296 }
297
298 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
299 static void ClDirectConvolution2dWorkloadTest()
300 {
301     Graph graph;
302     ClWorkloadFactory factory;
303     auto workload = CreateDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
304
305     // Checks that outputs and inputs are as we expect them (see definition of CreateDirectConvolution2dWorkloadTest).
306     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
307     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
308     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
309     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 6, 6}));
310     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 6, 6}));
311 }
312
313 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload)
314 {
315     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
316 }
317
318 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload)
319 {
320     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
321 }
322
323 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload)
324 {
325     ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QuantisedAsymm8>();
326 }
327
328 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
329 static void ClCreateFullyConnectedWorkloadTest()
330 {
331     Graph graph;
332     ClWorkloadFactory factory;
333     auto workload =
334         CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
335
336     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
337     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
338     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
339     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
340     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {3, 1, 4, 5}));
341     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 7}));
342 }
343
344
345 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkloadTest)
346 {
347     ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float32>();
348 }
349
350 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16WorkloadTest)
351 {
352     ClCreateFullyConnectedWorkloadTest<ClFullyConnectedWorkload, armnn::DataType::Float16>();
353 }
354
355 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
356 static void ClNormalizationWorkloadTest(DataLayout dataLayout)
357 {
358     Graph graph;
359     ClWorkloadFactory factory;
360     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
361
362     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
363     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
364     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
365     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
366
367     std::initializer_list<unsigned int> inputShape  = (dataLayout == DataLayout::NCHW) ?
368             std::initializer_list<unsigned int>({3, 5, 5, 1}) : std::initializer_list<unsigned int>({3, 1, 5, 5});
369     std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW) ?
370             std::initializer_list<unsigned int>({3, 5, 5, 1}) : std::initializer_list<unsigned int>({3, 1, 5, 5});
371
372     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
373     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
374 }
375
376 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload)
377 {
378     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
379 }
380
381 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
382 {
383     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
384 }
385
386 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload)
387 {
388     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
389 }
390
391 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
392 {
393     ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
394 }
395
396 template <typename armnn::DataType DataType>
397 static void ClPooling2dWorkloadTest(DataLayout dataLayout)
398 {
399     Graph graph;
400     ClWorkloadFactory factory;
401
402     auto workload = CreatePooling2dWorkloadTest<ClPooling2dWorkload, DataType>(factory, graph, dataLayout);
403
404     std::initializer_list<unsigned int> inputShape  = (dataLayout == DataLayout::NCHW) ?
405             std::initializer_list<unsigned int>({3, 2, 5, 5}) : std::initializer_list<unsigned int>({3, 5, 5, 2});
406     std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW) ?
407             std::initializer_list<unsigned int>({3, 2, 2, 4}) : std::initializer_list<unsigned int>({3, 2, 4, 2});
408
409     // Check that inputs/outputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
410     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
411     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
412     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
413
414     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
415     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
416 }
417
418 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
419 {
420     ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NCHW);
421 }
422
423 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
424 {
425     ClPooling2dWorkloadTest<armnn::DataType::Float32>(DataLayout::NHWC);
426 }
427
428 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NchwWorkload)
429 {
430     ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NCHW);
431 }
432
433 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16NhwcWorkload)
434 {
435     ClPooling2dWorkloadTest<armnn::DataType::Float16>(DataLayout::NHWC);
436 }
437
438 template <typename armnn::DataType DataType>
439 static void ClCreateReshapeWorkloadTest()
440 {
441     Graph graph;
442     ClWorkloadFactory factory;
443
444     auto workload = CreateReshapeWorkloadTest<ClReshapeWorkload, DataType>(factory, graph);
445
446     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
447     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
448     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
449     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
450
451     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
452     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4})); // Leading size 1 dimensions are collapsed by ACL.
453 }
454
455 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
456 {
457     ClCreateReshapeWorkloadTest<armnn::DataType::Float32>();
458 }
459
460 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
461 {
462     ClCreateReshapeWorkloadTest<armnn::DataType::Float16>();
463 }
464
465 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
466 {
467     ClCreateReshapeWorkloadTest<armnn::DataType::QuantisedAsymm8>();
468 }
469
470 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
471 static void ClSoftmaxWorkloadTest()
472 {
473     Graph graph;
474     ClWorkloadFactory factory;
475
476     auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
477
478     // Checks that inputs/outputs are as we expect them (see definition of ClSoftmaxFloatWorkload).
479     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
480     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
481     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
482
483     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {4, 1}));
484     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {4, 1}));
485 }
486
487
488 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkloadTest)
489 {
490     ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float32>();
491 }
492
493 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16WorkloadTest)
494 {
495     ClSoftmaxWorkloadTest<ClSoftmaxFloatWorkload, armnn::DataType::Float16>();
496 }
497
498 template <typename armnn::DataType DataType>
499 static void ClSplitterWorkloadTest()
500 {
501     Graph graph;
502     ClWorkloadFactory factory;
503
504     auto workload = CreateSplitterWorkloadTest<ClSplitterWorkload, DataType>(factory, graph);
505
506     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
507     SplitterQueueDescriptor queueDescriptor = workload->GetData();
508     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
509     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {5, 7, 7}));
510
511     auto outputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
512     BOOST_TEST(CompareIClTensorHandleShape(outputHandle1, {2, 7, 7}));
513
514     auto outputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[2]);
515     BOOST_TEST(CompareIClTensorHandleShape(outputHandle2, {2, 7, 7}));
516
517     auto outputHandle0 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
518     // NOTE: At the moment the CL collapses the tensor to a 2 dim when dimension zero = 1
519     //       we are raising this difference between the NEON and CL libs as an issue with the compute library team.
520     BOOST_TEST(CompareIClTensorHandleShape(outputHandle0, {7, 7}));
521 }
522
523 BOOST_AUTO_TEST_CASE(CreateSplitterFloatWorkload)
524 {
525     ClSplitterWorkloadTest<armnn::DataType::Float32>();
526 }
527
528 BOOST_AUTO_TEST_CASE(CreateSplitterFloat16Workload)
529 {
530     ClSplitterWorkloadTest<armnn::DataType::Float16>();
531 }
532
533 template <typename armnn::DataType DataType>
534 static void ClSplitterMergerTest()
535 {
536     // Tests that it is possible to decide which output of the splitter layer
537     // should be lined to which input of the merger layer.
538     // We test that is is possible to specify 0th output
539     // of the splitter to be the 1st input to the merger and the 1st output of the splitter  to be 0th input
540     // of the merger.
541
542     Graph graph;
543     ClWorkloadFactory factory;
544
545     auto workloads =
546         CreateSplitterMergerWorkloadTest<ClSplitterWorkload, ClMergerWorkload, DataType>
547             (factory, graph);
548
549     auto wlSplitter = std::move(workloads.first);
550     auto wlMerger = std::move(workloads.second);
551
552     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
553     armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
554     armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
555     armnn::ClSubTensorHandle* mIn0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[0]);
556     armnn::ClSubTensorHandle* mIn1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlMerger->GetData().m_Inputs[1]);
557
558     BOOST_TEST(sOut0);
559     BOOST_TEST(sOut1);
560     BOOST_TEST(mIn0);
561     BOOST_TEST(mIn1);
562
563     //Fliped order of inputs/outputs.
564     bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
565     BOOST_TEST(validDataPointers);
566
567
568     //Also make sure that the inputs are subtensors of one tensor and outputs are sub tensors of another tensor.
569     bool validSubTensorParents = (mIn0->GetTensor().parent() == mIn1->GetTensor().parent())
570                                     && (sOut0->GetTensor().parent() == sOut1->GetTensor().parent());
571
572     BOOST_TEST(validSubTensorParents);
573 }
574
575 BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloatWorkload)
576 {
577     ClSplitterMergerTest<armnn::DataType::Float32>();
578 }
579
580 BOOST_AUTO_TEST_CASE(CreateSplitterMergerFloat16Workload)
581 {
582     ClSplitterMergerTest<armnn::DataType::Float16>();
583 }
584
585
586 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
587 {
588     // Test that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
589     // We create a splitter with two outputs. That each of those outputs is used by two different activation layers.
590
591     Graph graph;
592     ClWorkloadFactory factory;
593     std::unique_ptr<ClSplitterWorkload> wlSplitter;
594     std::unique_ptr<ClActivationWorkload> wlActiv0_0;
595     std::unique_ptr<ClActivationWorkload> wlActiv0_1;
596     std::unique_ptr<ClActivationWorkload> wlActiv1_0;
597     std::unique_ptr<ClActivationWorkload> wlActiv1_1;
598
599     CreateSplitterMultipleInputsOneOutputWorkloadTest<ClSplitterWorkload,
600         ClActivationWorkload, armnn::DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
601                                                                wlActiv1_0, wlActiv1_1);
602
603     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
604     armnn::ClSubTensorHandle* sOut0 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
605     armnn::ClSubTensorHandle* sOut1 = dynamic_cast<armnn::ClSubTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
606     armnn::ClSubTensorHandle* activ0_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
607     armnn::ClSubTensorHandle* activ0_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
608     armnn::ClSubTensorHandle* activ1_0Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
609     armnn::ClSubTensorHandle* activ1_1Im = dynamic_cast<armnn::ClSubTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
610
611
612     BOOST_TEST(sOut0);
613     BOOST_TEST(sOut1);
614     BOOST_TEST(activ0_0Im);
615     BOOST_TEST(activ0_1Im);
616     BOOST_TEST(activ1_0Im);
617     BOOST_TEST(activ1_1Im);
618
619     bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
620                              (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
621
622     BOOST_TEST(validDataPointers);
623 }
624
625 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsCl)
626 {
627     ClWorkloadFactory    factory;
628     CreateMemCopyWorkloads<IClTensorHandle>(factory);
629 }
630
631 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
632 static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
633 {
634     Graph graph;
635     ClWorkloadFactory factory;
636     auto workload =
637             CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
638
639     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
640     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
641     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
642     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
643
644     std::initializer_list<unsigned int> inputShape  = (dataLayout == DataLayout::NCHW)
645             ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
646             : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
647     std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW)
648             ? std::initializer_list<unsigned int>({ 5, 20, 50, 67 })
649             : std::initializer_list<unsigned int>({ 5, 50, 67, 20 });
650
651     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
652     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
653 }
654
655 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload)
656 {
657     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
658 }
659
660 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload)
661 {
662     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
663 }
664
665 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
666 {
667     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
668 }
669
670 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
671 {
672     ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
673 }
674
675 template <typename LstmWorkloadType>
676 static void ClCreateLstmWorkloadTest()
677 {
678     Graph graph;
679     ClWorkloadFactory factory;
680     auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
681
682     LstmQueueDescriptor queueDescriptor = workload->GetData();
683     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
684     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[1]);
685     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 2 }));
686     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 4 }));
687 }
688
689 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
690 {
691     ClCreateLstmWorkloadTest<ClLstmFloatWorkload>();
692 }
693
694 template <typename ResizeBilinearWorkloadType, typename armnn::DataType DataType>
695 static void ClResizeBilinearWorkloadTest(DataLayout dataLayout)
696 {
697     Graph graph;
698     ClWorkloadFactory factory;
699
700     auto workload = CreateResizeBilinearWorkloadTest<ResizeBilinearWorkloadType, DataType>(factory, graph, dataLayout);
701
702     // Checks that inputs/outputs are as we expect them (see definition of CreateResizeBilinearWorkloadTest).
703     ResizeBilinearQueueDescriptor queueDescriptor = workload->GetData();
704     auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
705     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
706
707     switch (dataLayout)
708     {
709         case DataLayout::NHWC:
710             BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
711             BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
712             break;
713         case DataLayout::NCHW:
714         default:
715             BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
716             BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
717     }
718 }
719
720 BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32NchwWorkload)
721 {
722     ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
723 }
724
725 BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat16NchwWorkload)
726 {
727     ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
728 }
729
730 BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat32NhwcWorkload)
731 {
732     ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
733 }
734
735 BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat16NhwcWorkload)
736 {
737     ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
738 }
739
740 template <typename MeanWorkloadType, typename armnn::DataType DataType>
741 static void ClMeanWorkloadTest()
742 {
743     Graph graph;
744     ClWorkloadFactory factory;
745     auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
746
747     // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
748     MeanQueueDescriptor queueDescriptor = workload->GetData();
749     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
750     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
751
752     // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
753     BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 3, 7, 4 }));
754     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 4 }));
755 }
756
757 BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload)
758 {
759     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
760 }
761
762 BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload)
763 {
764     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
765 }
766
767 BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload)
768 {
769     ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QuantisedAsymm8>();
770 }
771
772 BOOST_AUTO_TEST_SUITE_END()