IVGCVSW-3656 Make the reference backend optional
[platform/upstream/armnn.git] / src / backends / neon / test / NeonCreateWorkloadTests.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NeonWorkloadFactoryHelper.hpp"
7
8 #include <aclCommon/ArmComputeTensorUtils.hpp>
9 #include <backendsCommon/MemCopyWorkload.hpp>
10
11 #include <aclCommon/test/CreateWorkloadClNeon.hpp>
12
13 #include <neon/NeonWorkloadFactory.hpp>
14 #include <neon/NeonTensorHandle.hpp>
15 #include <neon/workloads/NeonWorkloadUtils.hpp>
16 #include <neon/workloads/NeonWorkloads.hpp>
17
18 BOOST_AUTO_TEST_SUITE(CreateWorkloadNeon)
19
20 namespace
21 {
22
23 boost::test_tools::predicate_result CompareIAclTensorHandleShape(IAclTensorHandle*                    tensorHandle,
24                                                                 std::initializer_list<unsigned int> expectedDimensions)
25 {
26     return CompareTensorHandleShape<IAclTensorHandle>(tensorHandle, expectedDimensions);
27 }
28
29 bool TestNeonTensorHandleInfo(armnn::IAclTensorHandle* handle, const armnn::TensorInfo& expectedInfo)
30 {
31     using namespace armnn::armcomputetensorutils;
32
33     const arm_compute::ITensorInfo* handleInfo = handle->GetTensor().info();
34     const arm_compute::TensorInfo expectedAclInfo = BuildArmComputeTensorInfo(expectedInfo);
35
36     if (handleInfo->data_type() != expectedAclInfo.data_type())
37     {
38         return false;
39     }
40
41     if (handleInfo->num_dimensions() != expectedAclInfo.num_dimensions())
42     {
43         return false;
44     }
45
46     if (handleInfo->quantization_info() != expectedAclInfo.quantization_info())
47     {
48         return false;
49     }
50
51     for (std::size_t d = 0; d < expectedAclInfo.num_dimensions(); ++d)
52     {
53         if (handleInfo->dimension(d) != expectedAclInfo.dimension(d))
54         {
55             return false;
56         }
57     }
58
59     return true;
60 }
61
62 } // namespace
63
64 template <typename armnn::DataType DataType>
65 static void NeonCreateActivationWorkloadTest()
66 {
67     Graph graph;
68     NeonWorkloadFactory factory =
69         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
70
71     auto workload = CreateActivationWorkloadTest<NeonActivationWorkload, DataType>(factory, graph);
72
73     // Checks that inputs/outputs are as we expect them (see definition of CreateActivationWorkloadTest).
74     ActivationQueueDescriptor queueDescriptor = workload->GetData();
75     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
76     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
77     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({1, 1}, DataType)));
78     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 1}, DataType)));
79 }
80
81 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
82 BOOST_AUTO_TEST_CASE(CreateActivationFloat16Workload)
83 {
84     NeonCreateActivationWorkloadTest<DataType::Float16>();
85 }
86 #endif
87
88 BOOST_AUTO_TEST_CASE(CreateActivationFloatWorkload)
89 {
90     NeonCreateActivationWorkloadTest<DataType::Float32>();
91 }
92
93 template <typename WorkloadType,
94           typename DescriptorType,
95           typename LayerType,
96           armnn::DataType DataType>
97 static void NeonCreateElementwiseWorkloadTest()
98 {
99     Graph graph;
100     NeonWorkloadFactory factory =
101         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
102
103     auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
104
105     DescriptorType queueDescriptor = workload->GetData();
106     auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
107     auto inputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
108     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
109     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({2, 3}, DataType)));
110     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle2, TensorInfo({2, 3}, DataType)));
111     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({2, 3}, DataType)));
112 }
113
114 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
115 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
116 {
117     NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
118                                       AdditionQueueDescriptor,
119                                       AdditionLayer,
120                                       DataType::Float16>();
121 }
122 #endif
123
124 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
125 {
126     NeonCreateElementwiseWorkloadTest<NeonAdditionWorkload,
127                                       AdditionQueueDescriptor,
128                                       AdditionLayer,
129                                       DataType::Float32>();
130 }
131
132 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
133 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
134 {
135     NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
136                                       SubtractionQueueDescriptor,
137                                       SubtractionLayer,
138                                       DataType::Float16>();
139 }
140 #endif
141
142 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
143 {
144     NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
145                                       SubtractionQueueDescriptor,
146                                       SubtractionLayer,
147                                       DataType::Float32>();
148 }
149
150 BOOST_AUTO_TEST_CASE(CreateSubtractionUint8Workload)
151 {
152     NeonCreateElementwiseWorkloadTest<NeonSubtractionWorkload,
153                                       SubtractionQueueDescriptor,
154                                       SubtractionLayer,
155                                       DataType::QuantisedAsymm8>();
156 }
157
158 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
159 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload)
160 {
161     NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
162                                       MultiplicationQueueDescriptor,
163                                       MultiplicationLayer,
164                                       DataType::Float16>();
165 }
166 #endif
167
168 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload)
169 {
170     NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
171                                       MultiplicationQueueDescriptor,
172                                       MultiplicationLayer,
173                                       DataType::Float32>();
174 }
175
176 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload)
177 {
178     NeonCreateElementwiseWorkloadTest<NeonMultiplicationWorkload,
179                                       MultiplicationQueueDescriptor,
180                                       MultiplicationLayer,
181                                       DataType::QuantisedAsymm8>();
182 }
183
184 template <typename BatchNormalizationWorkloadType, typename armnn::DataType DataType>
185 static void NeonCreateBatchNormalizationWorkloadTest(DataLayout dataLayout)
186 {
187     Graph graph;
188     NeonWorkloadFactory factory =
189         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
190
191     auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>
192                     (factory, graph, dataLayout);
193
194     // Checks that outputs and inputs are as we expect them (see definition of CreateBatchNormalizationWorkloadTest).
195     BatchNormalizationQueueDescriptor queueDescriptor = workload->GetData();
196     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
197     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
198
199     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
200     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 4, 4} : TensorShape{2, 4, 4, 3};
201
202     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
203     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
204 }
205
206 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
207 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NchwWorkload)
208 {
209     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NCHW);
210 }
211
212 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat16NhwcWorkload)
213 {
214     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float16>(DataLayout::NHWC);
215 }
216 #endif
217
218 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNchwWorkload)
219 {
220     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NCHW);
221 }
222
223 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloatNhwcWorkload)
224 {
225     NeonCreateBatchNormalizationWorkloadTest<NeonBatchNormalizationWorkload, DataType::Float32>(DataLayout::NHWC);
226 }
227
228 template <typename armnn::DataType DataType>
229 static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
230 {
231     Graph graph;
232     NeonWorkloadFactory factory =
233         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
234
235     auto workload = CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, DataType>(factory, graph, dataLayout);
236
237     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
238     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
239
240     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
241     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
242     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
243     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
244     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
245     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle,  TensorInfo(outputShape, DataType)));
246 }
247
248 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
249 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
250 {
251     NeonCreateConvolution2dWorkloadTest<DataType::Float16>();
252 }
253
254 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
255 {
256     NeonCreateConvolution2dWorkloadTest<DataType::Float16>(DataLayout::NHWC);
257 }
258
259 #endif
260 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
261 {
262     NeonCreateConvolution2dWorkloadTest<DataType::Float32>();
263 }
264
265 BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
266 {
267     NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
268 }
269
270 template <typename armnn::DataType DataType>
271 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
272 {
273     Graph graph;
274     NeonWorkloadFactory factory =
275         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
276
277     auto workload = CreateDepthwiseConvolution2dWorkloadTest<NeonDepthwiseConvolutionWorkload,
278                                                              DataType>(factory, graph, dataLayout);
279
280     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
281     DepthwiseConvolution2dQueueDescriptor queueDescriptor = workload->GetData();
282     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
283     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
284
285     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
286                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
287     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? std::initializer_list<unsigned int>({ 2, 2, 5, 5 })
288                                                                : std::initializer_list<unsigned int>({ 2, 5, 5, 2 });
289
290     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
291     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
292 }
293
294 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat32NhwcWorkload)
295 {
296     NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float32>(DataLayout::NHWC);
297 }
298
299 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
300 BOOST_AUTO_TEST_CASE(CreateDepthWiseConvolution2dFloat16NhwcWorkload)
301 {
302     NeonCreateDepthWiseConvolutionWorkloadTest<DataType::Float16>(DataLayout::NHWC);
303 }
304 #endif
305
306 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
307 static void NeonCreateFullyConnectedWorkloadTest()
308 {
309     Graph graph;
310     NeonWorkloadFactory factory =
311         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
312
313     auto workload = CreateFullyConnectedWorkloadTest<FullyConnectedWorkloadType, DataType>(factory, graph);
314
315     // Checks that outputs and inputs are as we expect them (see definition of CreateFullyConnectedWorkloadTest).
316     FullyConnectedQueueDescriptor queueDescriptor = workload->GetData();
317     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
318     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
319     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({3, 1, 4, 5}, DataType)));
320     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({3, 7}, DataType)));
321 }
322
323 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
324 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloat16Workload)
325 {
326     NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float16>();
327 }
328 #endif
329
330 BOOST_AUTO_TEST_CASE(CreateFullyConnectedFloatWorkload)
331 {
332     NeonCreateFullyConnectedWorkloadTest<NeonFullyConnectedWorkload, DataType::Float32>();
333 }
334
335 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
336 static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
337 {
338     Graph graph;
339     NeonWorkloadFactory factory =
340         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
341
342     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
343
344     // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
345     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
346     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
347     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
348
349     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
350     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 5, 5, 1} : TensorShape{3, 1, 5, 5};
351
352     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
353     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
354 }
355
356 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
357 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
358 {
359     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
360 }
361
362 BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
363 {
364     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
365 }
366 #endif
367
368 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNchwWorkload)
369 {
370     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
371 }
372
373 BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNhwcWorkload)
374 {
375     NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
376 }
377
378
379 template <typename armnn::DataType DataType>
380 static void NeonCreatePooling2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
381 {
382     Graph graph;
383     NeonWorkloadFactory factory =
384         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
385
386     auto workload = CreatePooling2dWorkloadTest<NeonPooling2dWorkload, DataType>(factory, graph, dataLayout);
387
388     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 5, 5} : TensorShape{3, 5, 5, 2};
389     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{3, 2, 2, 4} : TensorShape{3, 2, 4, 2};
390
391     // Checks that outputs and inputs are as we expect them (see definition of CreatePooling2dWorkloadTest).
392     Pooling2dQueueDescriptor queueDescriptor = workload->GetData();
393     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
394     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
395     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
396     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
397 }
398
399 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
400 BOOST_AUTO_TEST_CASE(CreatePooling2dFloat16Workload)
401 {
402     NeonCreatePooling2dWorkloadTest<DataType::Float16>();
403 }
404 #endif
405
406 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNchwWorkload)
407 {
408     NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NCHW);
409 }
410
411 BOOST_AUTO_TEST_CASE(CreatePooling2dFloatNhwcWorkload)
412 {
413     NeonCreatePooling2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
414 }
415
416 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NchwWorkload)
417 {
418     NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NCHW);
419 }
420
421 BOOST_AUTO_TEST_CASE(CreatePooling2dUint8NhwcWorkload)
422 {
423     NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NHWC);
424 }
425
426 static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
427                                         const armnn::TensorShape& alphaShape,
428                                         const armnn::TensorShape& outputShape,
429                                         armnn::DataType dataType)
430 {
431     Graph graph;
432     NeonWorkloadFactory factory =
433             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
434
435     auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
436                                                                graph,
437                                                                inputShape,
438                                                                alphaShape,
439                                                                outputShape,
440                                                                dataType);
441
442     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
443     PreluQueueDescriptor queueDescriptor = workload->GetData();
444     auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
445     auto alphaHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
446     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
447     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
448     BOOST_TEST(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
449     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
450 }
451
452 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
453     BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
454 {
455     NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
456 }
457 #endif
458
459 BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
460 {
461     NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
462 }
463
464 BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
465 {
466     NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QuantisedAsymm8);
467 }
468
469 template <typename armnn::DataType DataType>
470 static void NeonCreateReshapeWorkloadTest()
471 {
472     Graph graph;
473     NeonWorkloadFactory factory =
474         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
475
476     auto workload = CreateReshapeWorkloadTest<NeonReshapeWorkload, DataType>(factory, graph);
477
478     // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
479     ReshapeQueueDescriptor queueDescriptor = workload->GetData();
480     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
481     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
482     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
483     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({1, 4}, DataType)));
484 }
485
486 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
487 BOOST_AUTO_TEST_CASE(CreateReshapeFloat16Workload)
488 {
489     NeonCreateReshapeWorkloadTest<DataType::Float16>();
490 }
491 #endif
492
493 BOOST_AUTO_TEST_CASE(CreateReshapeFloatWorkload)
494 {
495     NeonCreateReshapeWorkloadTest<DataType::Float32>();
496 }
497
498 BOOST_AUTO_TEST_CASE(CreateReshapeUint8Workload)
499 {
500     NeonCreateReshapeWorkloadTest<DataType::QuantisedAsymm8>();
501 }
502
503 template <typename ResizeWorkloadType, armnn::DataType DataType>
504 static void NeonCreateResizeWorkloadTest(DataLayout dataLayout)
505 {
506     Graph graph;
507     NeonWorkloadFactory factory =
508             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
509     auto workload = CreateResizeBilinearWorkloadTest<ResizeWorkloadType, DataType>(factory, graph, dataLayout);
510
511     auto queueDescriptor = workload->GetData();
512
513     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
514     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
515
516     switch (dataLayout)
517     {
518         case DataLayout::NHWC:
519             BOOST_TEST(CompareIAclTensorHandleShape(inputHandle, { 2, 4, 4, 3 }));
520             BOOST_TEST(CompareIAclTensorHandleShape(outputHandle, { 2, 2, 2, 3 }));
521             break;
522         case DataLayout::NCHW:
523         default:
524             BOOST_TEST(CompareIAclTensorHandleShape(inputHandle, { 2, 3, 4, 4 }));
525             BOOST_TEST(CompareIAclTensorHandleShape(outputHandle, { 2, 3, 2, 2 }));
526     }
527 }
528
529 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NchwWorkload)
530 {
531     NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
532 }
533
534 BOOST_AUTO_TEST_CASE(CreateResizeUint8NchwWorkload)
535 {
536     NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NCHW);
537 }
538
539 BOOST_AUTO_TEST_CASE(CreateResizeFloat32NhwcWorkload)
540 {
541     NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
542 }
543
544 BOOST_AUTO_TEST_CASE(CreateResizeUint8NhwcWorkload)
545 {
546     NeonCreateResizeWorkloadTest<NeonResizeWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NHWC);
547 }
548
549 template <typename SoftmaxWorkloadType, typename armnn::DataType DataType>
550 static void NeonCreateSoftmaxWorkloadTest()
551 {
552     Graph graph;
553     NeonWorkloadFactory factory =
554         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
555
556     auto workload = CreateSoftmaxWorkloadTest<SoftmaxWorkloadType, DataType>(factory, graph);
557
558     // Checks that outputs and inputs are as we expect them (see definition of CreateSoftmaxWorkloadTest).
559     SoftmaxQueueDescriptor queueDescriptor = workload->GetData();
560     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
561     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
562     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({4, 1}, DataType)));
563     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({4, 1}, DataType)));
564 }
565
566 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
567 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloat16Workload)
568 {
569     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float16>();
570 }
571 #endif
572
573 BOOST_AUTO_TEST_CASE(CreateSoftmaxFloatWorkload)
574 {
575     NeonCreateSoftmaxWorkloadTest<NeonSoftmaxFloatWorkload, DataType::Float32>();
576 }
577
578 template <typename SpaceToDepthWorkloadType, typename armnn::DataType DataType>
579 static void NeonSpaceToDepthWorkloadTest()
580 {
581     Graph graph;
582     NeonWorkloadFactory factory =
583             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
584
585     auto workload = CreateSpaceToDepthWorkloadTest<SpaceToDepthWorkloadType, DataType>(factory, graph);
586
587     SpaceToDepthQueueDescriptor queueDescriptor = workload->GetData();
588     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
589     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
590
591     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 1, 2, 2, 1 }, DataType)));
592     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 1, 1, 1, 4 }, DataType)));
593 }
594
595 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat32Workload)
596 {
597     NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float32>();
598 }
599
600 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthFloat16Workload)
601 {
602     NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::Float16>();
603 }
604
605 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQAsymm8Workload)
606 {
607     NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QuantisedAsymm8>();
608 }
609
610 BOOST_AUTO_TEST_CASE(CreateSpaceToDepthQSymm16Workload)
611 {
612     NeonSpaceToDepthWorkloadTest<NeonSpaceToDepthWorkload, armnn::DataType::QuantisedSymm16>();
613 }
614
615 BOOST_AUTO_TEST_CASE(CreateSplitterWorkload)
616 {
617     Graph graph;
618     NeonWorkloadFactory factory =
619         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
620
621     auto workload = CreateSplitterWorkloadTest<NeonSplitterWorkload, DataType::Float32>(factory, graph);
622
623     // Checks that outputs are as we expect them (see definition of CreateSplitterWorkloadTest).
624     SplitterQueueDescriptor queueDescriptor = workload->GetData();
625     auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
626     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({5, 7, 7}, DataType::Float32)));
627
628     auto outputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
629     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle0, TensorInfo({1, 7, 7}, DataType::Float32)));
630
631     auto outputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
632     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle1, TensorInfo({2, 7, 7}, DataType::Float32)));
633
634     auto outputHandle2 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[2]);
635     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle2, TensorInfo({2, 7, 7}, DataType::Float32)));
636 }
637
638 BOOST_AUTO_TEST_CASE(CreateSplitterConcat)
639 {
640     // Tests that it is possible to decide which output of the splitter layer
641     // should be lined to which input of the concat layer.
642     // We tested that is is possible to specify 0th output
643     // of the splitter to be the 1st input to the concat, and the 1st output of the splitter to be 0th input
644     // of the concat.
645
646     Graph graph;
647     NeonWorkloadFactory factory =
648         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
649
650     auto workloads =
651         CreateSplitterConcatWorkloadTest<NeonSplitterWorkload, NeonConcatWorkload,
652             DataType::Float32>(factory, graph);
653
654     auto wlSplitter = std::move(workloads.first);
655     auto wlConcat = std::move(workloads.second);
656
657     //Checks that the index of inputs/outputs matches what we declared on InputDescriptor construction.
658     armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
659     armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
660     armnn::IAclTensorHandle* mIn0 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[0]);
661     armnn::IAclTensorHandle* mIn1 = dynamic_cast<armnn::IAclTensorHandle*>(wlConcat->GetData().m_Inputs[1]);
662
663     BOOST_TEST(sOut0);
664     BOOST_TEST(sOut1);
665     BOOST_TEST(mIn0);
666     BOOST_TEST(mIn1);
667
668     bool validDataPointers = (sOut0 == mIn1) && (sOut1 == mIn0);
669
670     BOOST_TEST(validDataPointers);
671 }
672
673 BOOST_AUTO_TEST_CASE(CreateSingleOutputMultipleInputs)
674 {
675     // Tests that it is possible to assign multiple (two) different layers to each of the outputs of a splitter layer.
676     // We created a splitter with two outputs. That each of those outputs is used by two different activation layers
677
678     Graph graph;
679     NeonWorkloadFactory factory =
680         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
681
682     std::unique_ptr<NeonSplitterWorkload> wlSplitter;
683     std::unique_ptr<NeonActivationWorkload> wlActiv0_0;
684     std::unique_ptr<NeonActivationWorkload> wlActiv0_1;
685     std::unique_ptr<NeonActivationWorkload> wlActiv1_0;
686     std::unique_ptr<NeonActivationWorkload> wlActiv1_1;
687
688     CreateSplitterMultipleInputsOneOutputWorkloadTest<NeonSplitterWorkload,
689         NeonActivationWorkload, DataType::Float32>(factory, graph, wlSplitter, wlActiv0_0, wlActiv0_1,
690                                                    wlActiv1_0, wlActiv1_1);
691
692     armnn::IAclTensorHandle* sOut0 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[0]);
693     armnn::IAclTensorHandle* sOut1 = dynamic_cast<armnn::IAclTensorHandle*>(wlSplitter->GetData().m_Outputs[1]);
694     armnn::IAclTensorHandle* activ0_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_0->GetData().m_Inputs[0]);
695     armnn::IAclTensorHandle* activ0_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv0_1->GetData().m_Inputs[0]);
696     armnn::IAclTensorHandle* activ1_0Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_0->GetData().m_Inputs[0]);
697     armnn::IAclTensorHandle* activ1_1Im = dynamic_cast<armnn::IAclTensorHandle*>(wlActiv1_1->GetData().m_Inputs[0]);
698
699
700     BOOST_TEST(sOut0);
701     BOOST_TEST(sOut1);
702     BOOST_TEST(activ0_0Im);
703     BOOST_TEST(activ0_1Im);
704     BOOST_TEST(activ1_0Im);
705     BOOST_TEST(activ1_1Im);
706
707     bool validDataPointers = (sOut0 == activ0_0Im) && (sOut0 == activ0_1Im) &&
708                              (sOut1 == activ1_0Im) && (sOut1 == activ1_1Im);
709
710     BOOST_TEST(validDataPointers);
711 }
712
713 #if defined(ARMCOMPUTEREF_ENABLED)
714
715 // This test unit needs the reference backend, it's not available if the reference backend is not built
716
717 BOOST_AUTO_TEST_CASE(CreateMemCopyWorkloadsNeon)
718 {
719     NeonWorkloadFactory factory =
720         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
721     CreateMemCopyWorkloads<IAclTensorHandle>(factory);
722 }
723
724 #endif
725
726 template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
727 static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
728 {
729     Graph graph;
730     NeonWorkloadFactory factory =
731         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
732
733     auto workload =
734             CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
735
736     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
737     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
738     auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
739     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
740
741     TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ?
742                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
743     TensorShape outputShape = (dataLayout == DataLayout::NCHW) ?
744                 TensorShape{ 5, 20, 50, 67 } : TensorShape{ 5, 50, 67, 20 };
745
746     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
747     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
748 }
749
750 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
751 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
752 {
753     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
754 }
755
756 BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
757 {
758     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
759 }
760 #endif
761
762 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNchwWorkload)
763 {
764     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
765 }
766
767 BOOST_AUTO_TEST_CASE(CreateL2NormalizationNhwcWorkload)
768 {
769     NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
770 }
771
772 template <typename LstmWorkloadType>
773 static void NeonCreateLstmWorkloadTest()
774 {
775     Graph graph;
776     NeonWorkloadFactory factory =
777             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
778
779     auto workload = CreateLstmWorkloadTest<LstmWorkloadType>(factory, graph);
780
781     LstmQueueDescriptor queueDescriptor = workload->GetData();
782
783     auto inputHandle  = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
784     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
785
786     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 2, 2 }, DataType::Float32)));
787     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 2, 4 }, DataType::Float32)));
788 }
789
790 BOOST_AUTO_TEST_CASE(CreateLSTMWorkloadFloatWorkload)
791 {
792     NeonCreateLstmWorkloadTest<NeonLstmFloatWorkload>();
793 }
794
795 template <typename ConcatWorkloadType, armnn::DataType DataType>
796 static void NeonCreateConcatWorkloadTest(std::initializer_list<unsigned int> outputShape,
797                                          unsigned int concatAxis)
798 {
799     Graph graph;
800     NeonWorkloadFactory factory =
801         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
802
803     auto workload = CreateConcatWorkloadTest<ConcatWorkloadType, DataType>(factory, graph, outputShape, concatAxis);
804
805     ConcatQueueDescriptor queueDescriptor = workload->GetData();
806     auto inputHandle0 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
807     auto inputHandle1 = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
808     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
809
810     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle0, TensorInfo({ 2, 3, 2, 5 }, DataType)));
811     BOOST_TEST(TestNeonTensorHandleInfo(inputHandle1, TensorInfo({ 2, 3, 2, 5 }, DataType)));
812     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
813 }
814
815 BOOST_AUTO_TEST_CASE(CreateConcatDim0Float32Workload)
816 {
817     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 4, 3, 2, 5 }, 0);
818 }
819
820 BOOST_AUTO_TEST_CASE(CreateConcatDim1Float32Workload)
821 {
822     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 6, 2, 5 }, 1);
823 }
824
825 BOOST_AUTO_TEST_CASE(CreateConcatDim3Float32Workload)
826 {
827     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::Float32>({ 2, 3, 2, 10 }, 3);
828 }
829
830 BOOST_AUTO_TEST_CASE(CreateConcatDim0Uint8Workload)
831 {
832     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 4, 3, 2, 5 }, 0);
833 }
834
835 BOOST_AUTO_TEST_CASE(CreateConcatDim1Uint8Workload)
836 {
837     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 6, 2, 5 }, 1);
838 }
839
840 BOOST_AUTO_TEST_CASE(CreateConcatDim3Uint8Workload)
841 {
842     NeonCreateConcatWorkloadTest<NeonConcatWorkload, armnn::DataType::QuantisedAsymm8>({ 2, 3, 2, 10 }, 3);
843 }
844
845 template <armnn::DataType DataType>
846 static void NeonCreateStackWorkloadTest(const std::initializer_list<unsigned int>& inputShape,
847                                         const std::initializer_list<unsigned int>& outputShape,
848                                         unsigned int axis,
849                                         unsigned int numInputs)
850 {
851     armnn::Graph graph;
852     NeonWorkloadFactory factory =
853             NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
854
855     auto workload = CreateStackWorkloadTest<NeonStackWorkload, DataType>(factory,
856                                                                          graph,
857                                                                          TensorShape(inputShape),
858                                                                          TensorShape(outputShape),
859                                                                          axis,
860                                                                          numInputs);
861
862     // Check inputs and output are as expected
863     StackQueueDescriptor queueDescriptor = workload->GetData();
864     for (unsigned int i = 0; i < numInputs; ++i)
865     {
866         auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[i]);
867         BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
868     }
869     auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
870     BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, DataType)));
871 }
872
873 BOOST_AUTO_TEST_CASE(CreateStackFloat32Workload)
874 {
875     NeonCreateStackWorkloadTest<armnn::DataType::Float32>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
876 }
877
878 BOOST_AUTO_TEST_CASE(CreateStackUint8Workload)
879 {
880     NeonCreateStackWorkloadTest<armnn::DataType::QuantisedAsymm8>({ 3, 4, 5 }, { 3, 4, 2, 5 }, 2, 2);
881 }
882
883 template <typename QuantizedLstmWorkloadType>
884 static void NeonCreateQuantizedLstmWorkloadTest()
885 {
886     using boost::polymorphic_downcast;
887
888     Graph graph;
889     NeonWorkloadFactory factory = NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
890
891     auto workload = CreateQuantizedLstmWorkloadTest<QuantizedLstmWorkloadType>(factory, graph);
892
893     QuantizedLstmQueueDescriptor queueDescriptor = workload->GetData();
894
895     IAclTensorHandle* inputHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
896     BOOST_TEST((inputHandle->GetShape() == TensorShape({2, 2})));
897     BOOST_TEST((inputHandle->GetDataType() == arm_compute::DataType::QASYMM8));
898
899     IAclTensorHandle* cellStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
900     BOOST_TEST((cellStateInHandle->GetShape() == TensorShape({2, 4})));
901     BOOST_TEST((cellStateInHandle->GetDataType() == arm_compute::DataType::QSYMM16));
902
903     IAclTensorHandle* outputStateInHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[2]);
904     BOOST_TEST((outputStateInHandle->GetShape() == TensorShape({2, 4})));
905     BOOST_TEST((outputStateInHandle->GetDataType() == arm_compute::DataType::QASYMM8));
906
907     IAclTensorHandle* cellStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
908     BOOST_TEST((cellStateOutHandle->GetShape() == TensorShape({2, 4})));
909     BOOST_TEST((cellStateOutHandle->GetDataType() == arm_compute::DataType::QSYMM16));
910
911     IAclTensorHandle* outputStateOutHandle = polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[1]);
912     BOOST_TEST((outputStateOutHandle->GetShape() == TensorShape({2, 4})));
913     BOOST_TEST((outputStateOutHandle->GetDataType() == arm_compute::DataType::QASYMM8));
914 }
915
916 BOOST_AUTO_TEST_CASE(CreateQuantizedLstmWorkload)
917 {
918     NeonCreateQuantizedLstmWorkloadTest<NeonQuantizedLstmWorkload>();
919 }
920
921 BOOST_AUTO_TEST_SUITE_END()