IVGCVSW-1946: Remove armnn/src from the include paths
[platform/upstream/armnn.git] / src / backends / neon / test / NeonCreateWorkloadTests.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include <backendsCommon/MemCopyWorkload.hpp>
7
8 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
9
10 #include <neon/NeonWorkloadFactory.hpp>
11 #include <neon/NeonTensorHandle.hpp>
12 #include <neon/workloads/NeonWorkloadUtils.hpp>
13 #include <neon/workloads/NeonWorkloads.hpp>
14
15 BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon)
16
17 namespace
18 {
19
20 bool TestNeonTensorHandleInfo(armnn::INeonTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
21 {
22     using namespace armnn::armcomputetensorutils;
23
24     const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
25     const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
26
27     if (handleInfo->data_type() != expectedAclInfo.data_type())
28     {
29         return false;
30     }
31
32     if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
33     {
34         return false;
35     }
36
37     if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
38     {
39         return false;
40     }
41
42     for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
43     {
44         if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
45         {
46             return false;
47         }
48     }
49
50     return true;
51 }
52
53 } // namespace
54
55 template <typename armnn::DataType DataType>
56 static void NeonCreateActivationWorkloadTest()
57 {
58     Graph graph;
59     NeonWorkloadFactory factory;
60     auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
61
62     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
63     ActivationQueueDescriptor queueDescriptor = workload->GetData();
64     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
65     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
66     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
67     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
68 }
69
70 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
71 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
72 {
73     NeonCreateActivationWorkloadTest<DataType::Float16>();
74 }
75 #endif
76
77 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
78 {
79     NeonCreateActivationWorkloadTest<DataType::Float32>();
80 }
81
82 template <typename WorkloadType,
83           typename DescriptorType,
84           typename LayerType,
85           armnn::DataType DataType>
86 static void NeonCreateArithmethicWorkloadTest()
87 {
88     Graph graph;
89     NeonWorkloadFactory factory;
90     auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
91
92     DescriptorType queueDescriptor = workload->GetData();
93     auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
94     auto inputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[1]);
95     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
96     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
97     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
98     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
99 }
100
101 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
102 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
103 {
104     NeonCreateArithmethicWorkloadTest<NeonAdditionFloatWorkload,
105                                       AdditionQueueDescriptor,
106                                       AdditionLayer,
107                                       DataType::Float16>();
108 }
109 #endif
110
111 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
112 {
113     NeonCreateArithmethicWorkloadTest<NeonAdditionFloatWorkload,
114                                       AdditionQueueDescriptor,
115                                       AdditionLayer,
116                                       DataType::Float32>();
117 }
118
119 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
120 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
121 {
122     NeonCreateArithmethicWorkloadTest<NeonSubtractionFloatWorkload,
123                                       SubtractionQueueDescriptor,
124                                       SubtractionLayer,
125                                       DataType::Float16>();
126 }
127 #endif
128
129 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
130 {
131     NeonCreateArithmethicWorkloadTest<NeonSubtractionFloatWorkload,
132                                       SubtractionQueueDescriptor,
133                                       SubtractionLayer,
134                                       DataType::Float32>();
135 }
136
137 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
138 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload)
139 {
140     NeonCreateArithmethicWorkloadTest<NeonMultiplicationFloatWorkload,
141                                       MultiplicationQueueDescriptor,
142                                       MultiplicationLayer,
143                                       DataType::Float16>();
144 }
145 #endif
146
147 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload)
148 {
149     NeonCreateArithmethicWorkloadTest<NeonMultiplicationFloatWorkload,
150                                       MultiplicationQueueDescriptor,
151                                       MultiplicationLayer,
152                                       DataType::Float32>();
153 }
154
155 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
156 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
157 {
158     Graph                graph;
159     NeonWorkloadFactory  factory;
160     auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
161                     (factory, graph, dataLayout);
162
163     // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
164     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
165     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
166     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
167
168     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
169     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
170
171     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
172     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
173 }
174
175 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
176 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
177 {
178     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
179 }
180
181 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NhwcWorkload)
182 {
183     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
184 }
185 #endif
186
187 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
188 {
189     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
190 }
191
192 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
193 {
194     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
195 }
196
197 template <typename armnn::DataType DataType>
198 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
199 {
200     Graph                graph;
201     NeonWorkloadFactory  factory;
202     auto                 workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload,
203                                     DataType>(factory, graph, dataLayout);
204
205     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
206     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
207
208     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
209     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
210     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
211     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
212     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
213     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle,  TensorInfo(outputShape, DataType)));
214 }
215
216 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
217 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
218 {
219     NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
220 }
221
222 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
223 {
224     NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
225 }
226
227 #endif
228 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
229 {
230     NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
231 }
232
233 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
234 {
235     NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
236 }
237
238 template <typename armnn::DataType DataType>
239 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
240 {
241     Graph graph;
242     NeonWorkloadFactory factory;
243
244     auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
245                                                              DataType>(factory, graph, dataLayout);
246
247     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
248     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
249     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
250     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
251
252     std::initializer_list<unsigned int> inputShape  = (dataLayout == DataLayout::NCHW)
253             ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
254             : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
255     std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW)
256             ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
257             : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
258
259     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
260     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
261 }
262
263 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload)
264 {
265     NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
266 }
267
268 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
269 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload)
270 {
271     NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
272 }
273 #endif
274
275 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
276 static void NeonCreateFullyConnectedWorkloadTest()
277 {
278     Graph               graph;
279     NeonWorkloadFactory factory;
280     auto                workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType,
281                                    DataType>(factory, graph);
282
283     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
284     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
285     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
286     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
287     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType)));
288     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType)));
289 }
290
291 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
292 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload)
293 {
294     NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
295 }
296 #endif
297
298 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload)
299 {
300     NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
301 }
302
303 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
304 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
305 {
306     Graph graph;
307     NeonWorkloadFactory factory;
308     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
309
310     // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
311     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
312     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
313     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
314
315     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
316     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
317
318     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
319     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
320 }
321
322 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
323 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
324 {
325     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
326 }
327
328 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
329 {
330     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
331 }
332 #endif
333
334 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNchwWorkload)
335 {
336     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
337 }
338
339 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNhwcWorkload)
340 {
341     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
342 }
343
344
345 template <typename armnn::DataType DataType>
346 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
347 {
348     Graph               graph;
349     NeonWorkloadFactory factory;
350     auto                workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>
351                                    (factory, graph, dataLayout);
352
353     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
354     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
355
356     // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
357     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
358     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
359     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
360     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
361     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
362 }
363
364 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
365 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload)
366 {
367     NeonCreatePooling2dWorkloadTest<DataType::Float16>();
368 }
369 #endif
370
371 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
372 {
373     NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
374 }
375
376 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
377 {
378     NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
379 }
380
381 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NchwWorkload)
382 {
383     NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NCHW);
384 }
385
386 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NhwcWorkload)
387 {
388     NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NHWC);
389 }
390
391 template <typename armnn::DataType DataType>
392 static void NeonCreateReshapeWorkloadTest()
393 {
394     Graph               graph;
395     NeonWorkloadFactory factory;
396     auto                workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
397
398     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
399     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
400     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
401     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
402     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
403     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
404 }
405
406 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
407 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
408 {
409     NeonCreateReshapeWorkloadTest<DataType::Float16>();
410 }
411 #endif
412
413 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
414 {
415     NeonCreateReshapeWorkloadTest<DataType::Float32>();
416 }
417
418 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
419 {
420     NeonCreateReshapeWorkloadTest<DataType::QuantisedAsymm8>();
421 }
422
423 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
424 static void NeonCreateSoftmaxWorkloadTest()
425 {
426     Graph               graph;
427     NeonWorkloadFactory factory;
428     auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
429
430     // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
431     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
432     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
433     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
434     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
435     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType)));
436 }
437
438 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
439 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload)
440 {
441     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float16>();
442 }
443 #endif
444
445 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload)
446 {
447     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float32>();
448 }
449
450 BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
451 {
452     Graph graph;
453     NeonWorkloadFactory factory;
454     auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
455
456     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
457     SplitterQueueDescriptor queueDescriptor = workload->GetData();
458     auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
459     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
460
461     auto outputHandle0 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
462     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
463
464     auto outputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[1]);
465     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
466
467     auto outputHandle2 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[2]);
468     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
469 }
470
471 BOOST_AUTO_TEST_CASE(CreateSplitterMerger)
472 {
473     // Tests that it is possible to decide which output of the splitter layer
474     // should be lined to which input of the merger layer.
475     // We tested that is is possible to specify 0th output
476     // of the splitter to be the 1st input to the merger, and the 1st output of the splitter to be 0th input
477     // of the merger.
478
479     Graph graph;
480     NeonWorkloadFactory factory;
481
482     auto workloads =
483         CreateSplitterMergerWorkloadTest<NeonSplitterWorkload, NeonMergerWorkload,
484             DataType::Float32>(factory, graph);
485
486     auto wlSplitter = std::move(workloads.first);
487     auto wlMerger = std::move(workloads.second);
488
489     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
490     armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
491     armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
492     armnn::INeonTensorHandle* mIn0 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[0]);
493     armnn::INeonTensorHandle* mIn1 = dynamic_cast<armnn::INeonTensorHandle*>(wlMerger->GetData().m_Inputs[1]);
494
495     BOOST_TEST(sOut0);
496     BOOST_TEST(sOut1);
497     BOOST_TEST(mIn0);
498     BOOST_TEST(mIn1);
499
500     bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
501
502     BOOST_TEST(validDataPointers);
503 }
504
505 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
506 {
507     // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
508     // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
509
510     Graph graph;
511     NeonWorkloadFactory factory;
512     std::unique_ptr<NeonSplitterWorkload> wlSplitter;
513     std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
514     std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
515     std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
516     std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
517
518     CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
519         NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
520                                                    wlActiv1_0, wlActiv1_1);
521
522     armnn::INeonTensorHandle* sOut0 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
523     armnn::INeonTensorHandle* sOut1 = dynamic_cast<armnn::INeonTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
524     armnn::INeonTensorHandle* activ0_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
525     armnn::INeonTensorHandle* activ0_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
526     armnn::INeonTensorHandle* activ1_0Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
527     armnn::INeonTensorHandle* activ1_1Im = dynamic_cast<armnn::INeonTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
528
529
530     BOOST_TEST(sOut0);
531     BOOST_TEST(sOut1);
532     BOOST_TEST(activ0_0Im);
533     BOOST_TEST(activ0_1Im);
534     BOOST_TEST(activ1_0Im);
535     BOOST_TEST(activ1_1Im);
536
537     bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
538                              (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
539
540     BOOST_TEST(validDataPointers);
541 }
542
543 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon)
544 {
545     NeonWorkloadFactory    factory;
546     CreateMemCopyWorkloads<INeonTensorHandle>(factory);
547 }
548
549 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
550 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
551 {
552     Graph graph;
553     NeonWorkloadFactory factory;
554     auto workload =
555             CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
556
557     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
558     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
559     auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
560     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
561
562     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ?
563                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
564     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
565                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
566
567     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
568     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
569 }
570
571 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
572 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
573 {
574     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
575 }
576
577 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
578 {
579     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
580 }
581 #endif
582
583 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNchwWorkload)
584 {
585     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
586 }
587
588 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNhwcWorkload)
589 {
590     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
591 }
592
593 BOOST_AUTO_TEST_SUITE_END()