15048069985a401c0640fb35b6a36c176e2f5649
[platform/upstream/armnn.git] / src / backends / backendsCommon / test / LayerTests.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include "LayerTests.hpp"
6 #include "WorkloadTestUtils.hpp"
7 #include "TensorUtils.hpp"
8 #include <ResolveType.hpp>
9
10 #include "test/TensorHelpers.hpp"
11 #include "TensorCopyUtils.hpp"
12 #include "Permute.hpp"
13
14 #include <boost/test/unit_test.hpp>
15 #include <boost/assert.hpp>
16
17 #include <armnn/LayerSupport.hpp>
18
19 #include <backendsCommon/CpuTensorHandle.hpp>
20 #include <backendsCommon/IBackendInternal.hpp>
21 #include <backendsCommon/WorkloadFactory.hpp>
22
23 #include <algorithm>
24 #include <boost/cast.hpp>
25
26 #include "WorkloadTestUtils.hpp"
27 #include "Conv2dTestImpl.hpp"
28 #include "BatchNormTestImpl.hpp"
29 #include "ActivationTestImpl.hpp"
30 #include "Pooling2dTestImpl.hpp"
31 #include "FullyConnectedTestImpl.hpp"
32 #include "GatherTestImpl.hpp"
33 #include "SpaceToBatchNdTestImpl.hpp"
34 #include "SpaceToDepthTestImpl.hpp"
35 #include "SplitterTestImpl.hpp"
36 #include "SoftmaxTestImpl.hpp"
37 #include "StridedSliceTestImpl.hpp"
38 #include "NormTestImpl.hpp"
39 #include "LstmTestImpl.hpp"
40 #include "ConvertFp16ToFp32TestImpl.hpp"
41 #include "ConvertFp32ToFp16TestImpl.hpp"
42 #include "DebugTestImpl.hpp"
43 #include "DequantizeTestImpl.hpp"
44 #include "QuantizeTestImpl.hpp"
45 #include "TransposeConvolution2dTestImpl.hpp"
46
47 // 3-channel 16x8 image used as common input data for a number of Conv2d tests.
48 static std::vector<float> ConvInput3x8x16({
49     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
50     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
51     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
52     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
53     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
54     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
55     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
56     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
57     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
66     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
67     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
68     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
69     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
70     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
71     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
72     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
73 });
74
75 // 2-channel bias used by a number of Conv2d tests.
76 static std::vector<float> Bias2({0, 2});
77
78 static std::vector<float> Bias4({1, 2, 3, 4});
79
80 static std::vector<float> Bias8({1, 2, 3, 4, 1, 2, 3, 4});
81
82 struct Simple3dSoftmaxOutputData
83 {
84     const std::vector<float> outputData =
85             {
86                 0.0964599f, 0.26220518f, 0.0964599f, 0.0964599f,
87                 0.15903549f, 0.0964599f, 0.0964599f, 0.0964599f
88             };
89
90     const armnn::TensorShape inputShape{ 1, 8, 1 };
91
92     const std::vector<float> inputData =
93             {
94                     0.f, 1.f, 0.f, 0.f,
95                     .5f, 0.f, 0.f, 0.f,
96             };
97 };
98
99 struct Simple4dSoftmaxData
100 {
101     const armnn::TensorShape inputShape{ 1, 8, 1, 1 };
102
103     const std::vector<float> outputData = { 0.0964599f, 0.26220518f, 0.0964599f, 0.0964599f,
104                                             0.15903549f, 0.0964599f, 0.0964599f, 0.0964599f };
105     const std::vector<float> inputData =
106             {
107                     0.f, 1.f, 0.f, 0.f,
108                     .5f, 0.f, 0.f, 0.f
109             };
110 };
111
112 // Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled.
113 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
114 boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale)
115 {
116     if(biasEnabled)
117     {
118         armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, ArmnnType);
119         boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias2));
120         return bias;
121     }
122     else
123     {
124         return boost::multi_array<T, 1>();
125     }
126 }
127
128 // Helper function that returns either Bias4 or an empty vector depending on whether bias is enabled.
129 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
130 boost::multi_array<T, 1> GetBias4(bool biasEnabled, float qScale)
131 {
132     if(biasEnabled)
133     {
134         armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
135         boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias4));
136         return bias;
137     }
138     else
139     {
140         return boost::multi_array<T, 1>();
141     }
142 }
143
144 // Helper function that returns either Bias8 or an empty vector depending on whether bias is enabled.
145 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
146 boost::multi_array<T, 1> GetBias8(bool biasEnabled, float qScale)
147 {
148     if(biasEnabled)
149     {
150         armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias4.size())}, ArmnnType);
151         boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, 0.0f, Bias8));
152         return bias;
153     }
154     else
155     {
156         return boost::multi_array<T, 1>();
157     }
158 }
159
160 // Helper function that returns either Bias4 or an empty vector depending on whether bias is enabled.
161 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
162 boost::multi_array<T, 1> GetBias(bool biasEnabled, float qScale, armnn::TensorInfo outputInfo, armnn::DataLayout layout)
163 {
164     const armnnUtils::DataLayoutIndexed dataLayoutIndexed(layout);
165     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
166     const unsigned int outputChannels = outputInfo.GetShape()[channelsIndex];
167
168     switch (outputChannels)
169     {
170         case 2:
171         default:
172         {
173             return GetBias2<ArmnnType>(biasEnabled, qScale);
174         }
175         case 4:
176         {
177             return GetBias4<ArmnnType>(biasEnabled, qScale);
178         }
179         case 8:
180         {
181             return GetBias8<ArmnnType>(biasEnabled, qScale);
182         }
183     }
184 }
185
186
187 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
188 LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(
189     armnn::IWorkloadFactory& workloadFactory,
190     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
191     float qScale,
192     int32_t qOffset,
193     bool biasEnabled,
194     const armnn::DataLayout layout)
195 {
196     // Use common single-batch 3-channel 16x8 image.
197     armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
198     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
199
200     // Use a 2-element batch with 3-channel 3x5 kernels.
201     armnn::TensorInfo kernelDesc({2, 3, 5, 3}, ArmnnType);
202     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
203         QuantizedVector<T>(qScale, qOffset, {
204             1, 1, 1,
205             1, -1, 1,
206             1, 1, 1,
207             1, 1, 1,
208             1, 1, 1,
209
210             0, 0, 0,
211             0, 0, 0,
212             0, 0, 0,
213             0, 0, 0,
214             0, 0, 0,
215
216             2, 2, 2,
217             2, 2, 2,
218             2, 2, 2,
219             2, 2, 2,
220             2, 2, 2,
221
222
223             0, 0, 0,
224             0, 0, 0,
225             0, 0, 0,
226             0, 0, 0,
227             0, 0, 0,
228
229             1, 1, 1,
230             1, 1, 1,
231             1, 1, 1,
232             1, 1, 1,
233             1, 1, 1,
234
235             0, 0, 0,
236             0, 0, 0,
237             0, 0, 0,
238             0, 0, 0,
239             0, 0, 0
240         })));
241
242     // Expected output is 2 batch elements of a 1-channel 14x4 image.
243     armnn::TensorInfo outputDesc({1, 2, 4, 14}, ArmnnType);
244     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
245         QuantizedVector<T>(qScale, qOffset, {
246             -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
247             -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
248             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
249             -23.5f, -23.5f, -23.5f,
250             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
251             -23.5f, -23.5f, -23.5f,
252
253             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
254             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
256             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
257         })));
258
259     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
260         workloadFactory,
261         memoryManager,
262         input,
263         kernel,
264         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
265         expectedOutput,
266         qScale,
267         qOffset,
268         layout);
269 }
270
271 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
272          typename T = armnn::ResolveType<ArmnnType>>
273 LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(
274     armnn::IWorkloadFactory& workloadFactory,
275     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
276     float qScale,
277     int32_t qOffset,
278     bool biasEnabled,
279     const armnn::DataLayout layout)
280 {
281     // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path.
282
283     // Use common single-batch 3-channel 16x8 image.
284     armnn::TensorInfo inputDesc({1, 3, 8, 16}, ArmnnType);
285     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
286
287     // Use a 2-element batch of 3-channel 3x3 kernels.
288     armnn::TensorInfo kernelDesc({2, 3, 3, 3}, ArmnnType);
289     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
290         QuantizedVector<T>(qScale, qOffset, {
291             1, 1, 1,
292             1, -1, 1,
293             1, 1, 1,
294
295             0, 0, 0,
296             0, 0, 0,
297             0, 0, 0,
298
299             2, 2, 2,
300             2, 2, 2,
301             2, 2, 2,
302
303
304             0, 0, 0,
305             0, 0, 0,
306             0, 0, 0,
307
308             1, 1, 1,
309             1, 1, 1,
310             1, 1, 1,
311
312             0, 0, 0,
313             0, 0, 0,
314             0, 0, 0
315         })));
316
317     // Expected output is 1 batch of a 2-channel 14x6 image.
318     armnn::TensorInfo outputDesc({1, 2, 6, 14}, ArmnnType);
319     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
320         QuantizedVector<T>(qScale, qOffset, {
321             -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
322             -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
323             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
324             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
325             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
326             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
327
328             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
334         })));
335
336     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
337         workloadFactory,
338         memoryManager,
339         input,
340         kernel,
341         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
342         expectedOutput,
343         qScale,
344         qOffset,
345         layout);
346 }
347
348 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
349 LayerTestResult<T, 4> SimpleConvolution2d3x3NhwcTestCommon(
350     armnn::IWorkloadFactory& workloadFactory,
351     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
352     float qScale,
353     int32_t qOffset,
354     bool biasEnabled,
355     armnn::DataLayout dataLayout)
356 {
357     // Use common single-batch 5x5 image.
358
359     armnn::TensorInfo inputDesc({1, 3, 4, 1}, ArmnnType);
360     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
361                                                       {
362                                                        1, 5, 2, 3,
363                                                        8, 7, 3, 6,
364                                                        3, 3, 9, 1
365                                                        });
366
367
368     // Use a 2-element batch of 3-channel 3x3 kernels.
369     armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
370     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, {
371                                                                     4, 5, 6,
372                                                                     0, 0, 0,
373                                                                     3, 2, 1
374                                                                     });
375
376     // Expected output is 1 batch of a 5x5 image.
377     armnn::TensorInfo outputDesc({1, 3, 4, 1}, ArmnnType);
378
379     const std::vector<float> outputData =
380             {
381                     23, 41, 33, 21,
382                     44, 65, 76, 52,
383                     82, 85, 79, 42
384             };
385
386     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
387
388     return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
389         workloadFactory,
390         memoryManager,
391         input,
392         kernel,
393         boost::multi_array<T, 1>(),
394         expectedOutput,
395         dataLayout,
396         qScale,
397         qOffset);
398 }
399
400 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
401 LayerTestResult<T, 4> SimpleConvolution2d3x3Stride2x2TestCommon(
402         armnn::IWorkloadFactory& workloadFactory,
403         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
404         float qScale,
405         int32_t qOffset,
406         bool biasEnabled,
407         const armnn::DataLayout& dataLayout)
408 {
409     // Input is a single-batch, 1 channel, 5x5 image.
410     armnn::TensorInfo inputDesc({1, 5, 5, 1}, ArmnnType);
411     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc,
412             {
413                 1, 5, 2, 3, 5,
414                 8, 7, 3, 6, 3,
415                 3, 3, 9, 1, 9,
416                 4, 1, 8, 1, 3,
417                 6, 8, 1, 9, 2
418             });
419
420     // Use a 3x3 kernel.
421     armnn::TensorInfo kernelDesc({1, 3, 3, 1}, ArmnnType);
422     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc,
423             {
424                 4, 5, 6,
425                 0, 0, 0,
426                 3, 2, 1
427             });
428
429     // Expected output is a single-batch, 1 channel, 3x3 image.
430     armnn::TensorInfo outputDesc({1, 3, 3, 1}, ArmnnType);
431
432     const std::vector<T> outputData =
433             {
434                 23, 33, 24,
435                 91, 99, 48,
436                 26, 50, 19
437             };
438
439     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, outputData);
440
441     uint32_t padLeft = 1;
442     uint32_t padTop = 1;
443     uint32_t padRight = 1;
444     uint32_t padBottom = 1;
445     uint32_t strideX  = 2;
446     uint32_t strideY  = 2;
447
448     return SimpleConvolution2dNhwcTestImpl<ArmnnType, ArmnnType>(
449         workloadFactory,
450         memoryManager,
451         input,
452         kernel,
453         boost::multi_array<T, 1>(),
454         expectedOutput,
455         dataLayout,
456         qScale,
457         qOffset,
458         padLeft,
459         padTop,
460         padRight,
461         padBottom,
462         strideX,
463         strideY);
464 }
465
466 LayerTestResult<float, 4> SimpleConvolution2d3x5Test(
467     armnn::IWorkloadFactory& workloadFactory,
468     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
469     bool biasEnabled,
470     const armnn::DataLayout layout)
471 {
472     return SimpleConvolution2d3x5TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
473         workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
474 }
475
476 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(
477     armnn::IWorkloadFactory& workloadFactory,
478     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
479     bool biasEnabled,
480     const armnn::DataLayout layout)
481 {
482     return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
483         workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
484 }
485
486 LayerTestResult<float, 4> SimpleConvolution2d3x3Test(
487     armnn::IWorkloadFactory& workloadFactory,
488     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
489     bool biasEnabled,
490     const armnn::DataLayout layout)
491 {
492     return SimpleConvolution2d3x3TestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
493         workloadFactory, memoryManager, 0.f, 0, biasEnabled, layout);
494 }
495
496 LayerTestResult<float, 4> SimpleConvolution2d3x3NhwcTest(
497     armnn::IWorkloadFactory& workloadFactory,
498     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
499     bool biasEnabled)
500 {
501     return SimpleConvolution2d3x3NhwcTestCommon<armnn::DataType::Float32>(
502         workloadFactory,
503         memoryManager,
504         0.f,
505         0,
506         biasEnabled,
507         armnn::DataLayout::NHWC);
508 }
509
510 LayerTestResult<float, 4> SimpleConvolution2d3x3Stride2x2Test(
511         armnn::IWorkloadFactory& workloadFactory,
512         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
513         bool biasEnabled,
514         const armnn::DataLayout layout)
515 {
516     return SimpleConvolution2d3x3Stride2x2TestCommon<armnn::DataType::Float32>(
517         workloadFactory,
518         memoryManager,
519         0.f,
520         0,
521         biasEnabled,
522         layout);
523 }
524
525 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(
526     armnn::IWorkloadFactory& workloadFactory,
527     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
528     bool biasEnabled,
529     const armnn::DataLayout layout)
530 {
531     return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
532         workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
533 }
534
535 LayerTestResult<int16_t, 4> SimpleConvolution2d3x5QSymm16Test(
536     armnn::IWorkloadFactory& workloadFactory,
537     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
538     bool biasEnabled,
539     const armnn::DataLayout layout)
540 {
541 return SimpleConvolution2d3x5TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
542         workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
543 }
544
545 LayerTestResult<int16_t, 4> SimpleConvolution2d3x3QSymm16Test(
546     armnn::IWorkloadFactory& workloadFactory,
547     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
548     bool biasEnabled,
549     const armnn::DataLayout layout)
550 {
551     return SimpleConvolution2d3x3TestCommon<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
552             workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
553 }
554
555 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
556          typename T = armnn::ResolveType<ArmnnType>>
557 LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
558     armnn::IWorkloadFactory& workloadFactory,
559     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
560     const armnn::DataLayout layout,
561     float qScale,
562     int32_t qOffset)
563 {
564     // Use a single-batch 1-channel 3x3 image as input.
565     armnn::TensorInfo inputDesc({1, 1, 3, 3}, ArmnnType);
566     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
567         QuantizedVector<T>(qScale, qOffset, {
568             11,21,31,
569             12,22,32,
570             13,23,33
571         })));
572
573     // Use 1 batch of a 1-channel 2x2 kernel.
574     armnn::TensorInfo kernelDesc({1, 1, 2, 2}, ArmnnType);
575     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
576         QuantizedVector<T>(qScale, qOffset, {
577             -11,-21,
578             -12,-22,
579         })));
580
581 // Expected output is 1 batch of a 1-channel 6x8 image.
582 // Manually calculated like this:
583 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
584 //[-11*0 -21*0  -12*0 -22*11 ; -11*0  -21*0  -12*11 -22*21 ; -11*0  -21*0  -12*21 -22*31 ; -11*0  -21*0 -12*31 -22*0 ..]
585 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
586 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
587 //[-11*0 -21*13 -12*0 -22*0  ; -11*13 -21*23 -12*0  -22*0  ; -11*23 -21*33 -12*0  -22*0  ; -11*33 -21*0 -12*0  -22*0 ..]
588 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
589 //[..... .....  ..... .....  ; .....  .....  .....  .....  ; .....  .....  .....  .....  ; .....  ..... .....  ..... ..]
590     armnn::TensorInfo outputDesc({1, 1, 8, 6}, ArmnnType);
591     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
592         QuantizedVector<T>(qScale, qOffset, {
593                0,    0,      0,    0,    0,    0,
594             -242,  -594,  -934, -372,    0,    0,
595             -495, -1190, -1850, -725,    0,    0,
596             -538, -1256, -1916, -748,    0,    0,
597             -273, -626,  -946,  -363,    0,    0,
598                0,    0,     0,     0,    0,    0,
599                0,    0,     0,     0,    0,    0,
600                0,    0,     0,     0,    0,    0
601         })));
602
603     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
604         workloadFactory,
605         memoryManager,
606         input,
607         kernel,
608         GetBias2<ArmnnBType>(false, qScale * qScale),
609         expectedOutput,
610         qScale,
611         qOffset,
612         layout,
613         1,  // Padding left.
614         2,  // Padding top.
615         3,  // Padding right.
616         4); // Padding bottom.
617 }
618
619 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
620          typename T = armnn::ResolveType<ArmnnType>>
621 LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(
622     armnn::IWorkloadFactory& workloadFactory,
623     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
624     const armnn::DataLayout layout,
625     float qScale,
626     int32_t qOffset)
627 {
628     // Use a single-batch 1-channel 5x5 image as input.
629     armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, ArmnnType);
630     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
631         QuantizedVector<T>(qScale, qOffset, {
632             11,21,31,41,51,
633             12,22,32,42,52,
634             13,23,33,43,53,
635             14,24,34,44,54,
636             15,25,35,45,55,
637         })));
638
639     // Use 1 batch of a 1-channel 4x4 kernel.
640     armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, ArmnnType);
641     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
642         QuantizedVector<T>(qScale, qOffset, {
643             -11,-21,-31,-41,
644             -12,-22,-32,-42,
645             -13,-23,-33,-43,
646             -14,-24,-34,-44,
647         })));
648
649     // Expected output is 1 batch of a 1-channel 5x5 image.
650     armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, ArmnnType);
651     std::vector<T> myVec(outputDesc.GetNumElements(), 0);
652     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
653         QuantizedVector<T>(qScale, qOffset, {
654             -7140, -10580, -13940,  -9300, -5230,
655             -9590, -14120, -18520, -12290, -6860,
656             -9980, -14560, -18960, -12560, -7000,
657             -7518, -10904, -14144,  -9318, -5152,
658             -5032,  -7256,  -9376,  -6142, -3368,
659         })));
660
661     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
662         workloadFactory,
663         memoryManager,
664         input,
665         kernel,
666         GetBias2<ArmnnBType>(false, qScale * qScale),
667         expectedOutput,
668         qScale,
669         qOffset,
670         layout,
671         1,  // Padding left.
672         1,  // Padding top.
673         2,  // Padding right.
674         2); // Padding bottom.
675 }
676
677 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(
678     armnn::IWorkloadFactory& workloadFactory,
679     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
680     armnn::DataLayout layout)
681 {
682     return SimpleConvolution2dAsymmetricPaddingTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
683             workloadFactory, memoryManager, layout, 0.0f, 0);
684 }
685
686 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(
687     armnn::IWorkloadFactory& workloadFactory,
688     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
689     armnn::DataLayout layout)
690 {
691     return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon
692             <armnn::DataType::Float32, armnn::DataType::Float32>(
693             workloadFactory, memoryManager, layout, 0.0f, 0);
694 }
695
696 LayerTestResult<float, 4> Convolution1dTest(
697     armnn::IWorkloadFactory& workloadFactory,
698     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
699     bool biasEnabled)
700 {
701     return Convolution1dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
702             workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
703 }
704
705 LayerTestResult<uint8_t, 4> Convolution1dUint8Test(
706     armnn::IWorkloadFactory& workloadFactory,
707     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
708     bool biasEnabled)
709 {
710     return Convolution1dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
711             workloadFactory, memoryManager, 0.1f, 128, biasEnabled);
712 }
713
714 LayerTestResult<float,4> CompareConvolution2dTest(
715     armnn::IWorkloadFactory& workloadFactory,
716     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
717     armnn::IWorkloadFactory& refWorkloadFactory)
718 {
719     return CompareConvolution2dTestImpl<armnn::DataType::Float32>(
720             workloadFactory, memoryManager, refWorkloadFactory);
721 }
722
723 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
724 LayerTestResult<T, 4> Convolution2d3x3DilationTestCommon(
725     armnn::IWorkloadFactory& workloadFactory,
726     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
727     const std::vector<float>& inputNoQuantizedValues,
728     armnn::TensorInfo& inputTensorInfo,
729     const std::vector<float>& kernelNoQuantizedValues,
730     armnn::TensorInfo& kernelTensorInfo,
731     const std::vector<float>& outputExpectedNoQuantizedValues,
732     armnn::TensorInfo& outputTensorInfo,
733     uint32_t dilationX,
734     uint32_t dilationY,
735     armnn::DataLayout layout = armnn::DataLayout::NCHW,
736     uint32_t padLeft = 0,
737     uint32_t padTop = 0,
738     uint32_t padRight = 0,
739     uint32_t padBottom = 0,
740     uint32_t strideX  = 1,
741     uint32_t strideY  = 1,
742     bool biasEnabled = false
743 )
744 {
745     float qScale;
746     int32_t qOffset;
747     switch (ArmnnType)
748     {
749         case armnn::DataType::QuantisedAsymm8:
750         {
751             qScale = 0.1f;
752             qOffset = 128;
753             break;
754         }
755         case armnn::DataType::QuantisedSymm16:
756         {
757             qScale = 0.1f;
758             qOffset = 0;
759             break;
760         }
761         case armnn::DataType::Float32:
762         default:
763         {
764             qScale = 0.f;
765             qOffset = 0;
766             break;
767         }
768     }
769
770     inputTensorInfo.SetQuantizationScale(qScale);
771     inputTensorInfo.SetQuantizationOffset(qOffset);
772     kernelTensorInfo.SetQuantizationScale(qScale);
773     kernelTensorInfo.SetQuantizationOffset(qOffset);
774     outputTensorInfo.SetQuantizationScale(qScale);
775     outputTensorInfo.SetQuantizationOffset(qOffset);
776
777     auto input = MakeTensor<T, 4>(inputTensorInfo,
778                                   std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
779                                                                     inputTensorInfo.GetQuantizationOffset(),
780                                                                     inputNoQuantizedValues)));
781     auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
782                                   std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(),
783                                                                     kernelTensorInfo.GetQuantizationOffset(),
784                                                                     kernelNoQuantizedValues)));
785     auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo,
786                                            std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
787                                                                              outputTensorInfo.GetQuantizationOffset(),
788                                                                              outputExpectedNoQuantizedValues)));
789
790     return SimpleConvolution2dTestImpl<ArmnnType, ArmnnBType>(
791             workloadFactory,
792             memoryManager,
793             input,
794             kernel,
795             GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
796             expectedOutput,
797             qScale,
798             qOffset,
799             layout,
800             padLeft,
801             padTop,
802             padRight,
803             padBottom,
804             strideX,
805             strideY,
806             dilationX,
807             dilationY);
808 }
809
810 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
811 LayerTestResult<T, 4> Convolution2d3x3Dilation3x3Test(
812     armnn::IWorkloadFactory& workloadFactory,
813     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
814     bool biasEnabled,
815     const armnn::DataLayout layout)
816 {
817     armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
818     std::vector<float> inputNoQuantizedValues =
819     {
820         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
821         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
822         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
823         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
824         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
825         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
826         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
827         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
828         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
829         0, 0, 0, 0, 0, 0, 0, 0, 0, 0
830     };
831
832     armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
833     std::vector<float> kernelNoQuantizedValues =
834     {
835         1, 2, 3,
836         4, 5, 6,
837         7, 8, 9
838     };
839
840     // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
841     // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
842     armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
843     std::vector<float> outputExpectedNoQuantizedValues =
844     {
845         6., 5., 5., 5.,
846         6., 5., 5., 5.,
847         6., 5., 5., 5.,
848         3., 2., 2., 2.
849     };
850
851     return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
852             workloadFactory,
853             memoryManager,
854             inputNoQuantizedValues,
855             inputTensorInfo,
856             kernelNoQuantizedValues,
857             kernelTensorInfo,
858             outputExpectedNoQuantizedValues,
859             outputTensorInfo,
860             3,
861             3,
862             layout,
863             biasEnabled);
864 }
865
866 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
867 LayerTestResult<T, 4> Convolution2d2x3x3Dilation3x3Test(
868     armnn::IWorkloadFactory& workloadFactory,
869     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
870     bool biasEnabled,
871     const armnn::DataLayout layout)
872 {
873     armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
874     std::vector<float> inputNoQuantizedValues =
875     {
876         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
877         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
878         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
879         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
880         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
881         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
882         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
883         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
884         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
885         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
886
887         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
888         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
889         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
890         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
891         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
892         0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
893         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
894         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
895         0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
896         0, 0, 0, 0, 0, 0, 0, 0, 0, 0
897     };
898
899     armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
900     std::vector<float> kernelNoQuantizedValues =
901     {
902         1, 2, 3,
903         4, 5, 6,
904         7, 8, 9,
905
906         1, 2, 3,
907         4, 5, 6,
908         7, 8, 9
909     };
910
911     // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
912     // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
913     armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
914     std::vector<float> outputExpectedNoQuantizedValues =
915     {
916         12., 10., 10., 10.,
917         12., 10., 10., 10.,
918         12., 10., 10., 10.,
919          6.,  4.,  4.,  4.
920     };
921
922     return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
923             workloadFactory,
924             memoryManager,
925             inputNoQuantizedValues,
926             inputTensorInfo,
927             kernelNoQuantizedValues,
928             kernelTensorInfo,
929             outputExpectedNoQuantizedValues,
930             outputTensorInfo,
931             3,
932             3,
933             layout,
934             biasEnabled);
935 }
936
937 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
938 LayerTestResult<T, 4> Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test(
939         armnn::IWorkloadFactory &workloadFactory,
940         const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
941         bool biasEnabled,
942         const armnn::DataLayout layout)
943 {
944     armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
945     std::vector<float> inputNoQuantizedValues =
946     {
947         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
948         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
949         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
950         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
951         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
952         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
953         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
954         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
955         1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
956         1, 1, 1, 1, 1, 1, 1, 1, 1, 1
957     };
958
959     armnn::TensorInfo kernelTensorInfo({ 1, 1, 2, 2}, ArmnnType);
960     std::vector<float> kernelNoQuantizedValues =
961     {
962         1, 2,
963         3, 4
964     };
965
966     // Since the dilation rate is 2 this will dilate the kernel to be like 3x3: d(K-1)+1 --> 2 x (2-1) + 1 = 3,
967     // therefore the output will be 4x4: (I − K + 2P)/S +1 => trunc ( (10 - 3 + 2x2 ) / 3 + 1 )
968     // where, dilation size = d = 2; kernel size = K = 2; input size = I = 10; padding size = P = 2; stride = S = 3
969     armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
970     std::vector<float> outputExpectedNoQuantizedValues =
971     {
972         4,  7,  7, 3,
973         6, 10, 10, 4,
974         6, 10, 10, 4,
975         2,  3,  3, 1
976     };
977     uint32_t padLeft = 1;
978     uint32_t padTop = 1;
979     uint32_t padRight = 1;
980     uint32_t padBottom = 1;
981
982     return Convolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
983             workloadFactory,
984             memoryManager,
985             inputNoQuantizedValues,
986             inputTensorInfo,
987             kernelNoQuantizedValues,
988             kernelTensorInfo,
989             outputExpectedNoQuantizedValues,
990             outputTensorInfo,
991             2,
992             2,
993             layout,
994             padLeft,
995             padTop,
996             padRight,
997             padBottom,
998             3,
999             3,
1000             biasEnabled
1001             );
1002 }
1003
1004 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
1005 Convolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
1006     armnn::IWorkloadFactory&,
1007     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1008     bool,
1009     armnn::DataLayout);
1010
1011 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
1012 Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
1013     armnn::IWorkloadFactory&,
1014     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1015     bool,
1016     armnn::DataLayout);
1017
1018 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
1019 Convolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
1020     armnn::IWorkloadFactory&,
1021     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1022     bool,
1023     armnn::DataLayout);
1024
1025 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
1026 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
1027     armnn::IWorkloadFactory&,
1028     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1029     bool,
1030     armnn::DataLayout);
1031
1032 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
1033 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
1034     armnn::IWorkloadFactory&,
1035     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1036     bool,
1037     armnn::DataLayout);
1038
1039 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
1040 Convolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
1041     armnn::IWorkloadFactory&,
1042     const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1043     bool,
1044     armnn::DataLayout);
1045
1046 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
1047 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
1048     armnn::IWorkloadFactory &workloadFactory,
1049     const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
1050     bool biasEnabled,
1051     const armnn::DataLayout layout);
1052
1053 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
1054 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
1055     armnn::IWorkloadFactory &workloadFactory,
1056     const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
1057     bool biasEnabled,
1058     const armnn::DataLayout layout);
1059
1060 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
1061 Convolution2d2x2Dilation2x2Padding2x2Stride3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
1062     armnn::IWorkloadFactory &workloadFactory,
1063     const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
1064     bool biasEnabled,
1065     const armnn::DataLayout layout);
1066
1067 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1068          typename T = armnn::ResolveType<ArmnnType>>
1069 LayerTestResult<T, 4> DepthwiseConvolution2dAsymmetricTestCommon(
1070     armnn::IWorkloadFactory& workloadFactory,
1071     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1072     float qScale,
1073     int32_t qOffset,
1074     bool biasEnabled,
1075     const armnn::DataLayout layout)
1076 {
1077     // Use a single-batch 2-channel 5x5 image as input.
1078     armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
1079     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
1080         QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
1081         {
1082              0,  1,  2,  3,  4,
1083              5,  6,  7,  8,  9,
1084             10, 11, 12, 13, 14,
1085             15, 16, 17, 18, 19,
1086             20, 21, 22, 23, 24,
1087
1088             25, 26, 27, 28, 29,
1089             30, 31, 32, 33, 34,
1090             35, 36, 37, 38, 39,
1091             40, 41, 42, 43, 44,
1092             45, 46, 47, 48, 49
1093         })));
1094
1095     // Use a depth multiplier of 1 on a 2-channel 4x4 kernel.
1096     armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
1097     auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
1098         QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
1099         {
1100             32, 31, 30, 29,
1101             28, 27, 26, 25,
1102             24, 23, 22, 21,
1103             20, 19, 18, 17,
1104
1105             16, 15, 14, 13,
1106             12, 11, 10,  9,
1107              8,  7,  6,  5,
1108              4,  3,  2,  1
1109         })));
1110
1111     // Expected output is 1 batch of a 2-channel 5x5 image.
1112     // Calculated using the python tensorflow library with strideX=1, strideY=1.
1113     armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5 }, ArmnnType);
1114     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
1115         QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1116         {
1117             1062, 1580, 1850, 1530, 1117,
1118             2140, 3108, 3500, 2842, 2042,
1119             3580, 5068, 5460, 4342, 3062,
1120             3618, 5072, 5390, 4248, 2971,
1121             3074, 4282, 4510, 3533, 2457,
1122
1123             1550, 2284, 2362, 1955, 1428,
1124             2910, 4206, 4342, 3528, 2536,
1125             3390, 4886, 5022, 4068, 2916,
1126             3566, 5056, 5182, 4133, 2922,
1127             3100, 4352, 4452, 3517, 2465
1128         })));
1129
1130     return DepthwiseConvolution2dAsymmetricTestImpl<ArmnnType, ArmnnBType>(
1131         workloadFactory,
1132         memoryManager,
1133         input,
1134         kernel,
1135         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1136         expectedOutput,
1137         qScale,
1138         qOffset,
1139         layout,
1140         1,  // Padding left.
1141         1,  // Padding top.
1142         2,  // Padding right.
1143         2,  // Padding bottom.
1144         1,  // strideX
1145         1); // strideY
1146 }
1147
1148 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1149          typename T = armnn::ResolveType<ArmnnType>>
1150 LayerTestResult<T, 4> DepthwiseConvolution2dNhwcTestCommon(
1151     armnn::IWorkloadFactory& workloadFactory,
1152     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1153     float qScale,
1154     int32_t qOffset,
1155     bool biasEnabled)
1156 {
1157     auto layout = armnn::DataLayout::NHWC;
1158
1159     armnn::TensorInfo inputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
1160     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
1161         QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
1162         {
1163              0,  1,  2,  3,  4,
1164              5,  6,  7,  8,  9,
1165             10, 11, 12, 13, 14,
1166             15, 16, 17, 18, 19,
1167             20, 21, 22, 23, 24,
1168
1169             25, 26, 27, 28, 29,
1170             30, 31, 32, 33, 34,
1171             35, 36, 37, 38, 39,
1172             40, 41, 42, 43, 44,
1173             45, 46, 47, 48, 49
1174         })));
1175
1176     armnn::TensorInfo kernelTensorInfo({ 1, 2, 4, 4 }, ArmnnType);
1177     auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
1178         QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
1179         {
1180              32, 31, 30, 29,
1181              28, 27, 26, 25,
1182              24, 23, 22, 21,
1183              20, 19, 18, 17,
1184
1185              16, 15, 14, 13,
1186              12, 11, 10,  9,
1187               8,  7,  6,  5,
1188               4,  3,  2,  1
1189         })));
1190
1191     armnn::TensorInfo outputTensorInfo({ 1, 2, 5, 5}, ArmnnType);
1192     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
1193         QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1194         {
1195             1062, 1580, 1850, 1530, 1117,
1196             2140, 3108, 3500, 2842, 2042,
1197             3580, 5068, 5460, 4342, 3062,
1198             3618, 5072, 5390, 4248, 2971,
1199             3074, 4282, 4510, 3533, 2457,
1200
1201             1550, 2284, 2362, 1955, 1428,
1202             2910, 4206, 4342, 3528, 2536,
1203             3390, 4886, 5022, 4068, 2916,
1204             3566, 5056, 5182, 4133, 2922,
1205             3100, 4352, 4452, 3517, 2465
1206         })));
1207
1208     return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1209         workloadFactory,
1210         memoryManager,
1211         input,
1212         kernel,
1213         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1214         expectedOutput,
1215         qScale,
1216         qOffset,
1217         layout,
1218         1,  // Padding left.
1219         1,  // Padding top.
1220         2,  // Padding right.
1221         2,  // Padding bottom.
1222         1,  // strideX
1223         1);  // strideY
1224 }
1225
1226 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType,
1227          typename T = armnn::ResolveType<ArmnnType>>
1228 LayerTestResult<T, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon(
1229     armnn::IWorkloadFactory& workloadFactory,
1230     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1231     float qScale,
1232     int32_t qOffset,
1233     bool biasEnabled)
1234 {
1235     auto layout = armnn::DataLayout::NHWC;
1236
1237     armnn::TensorInfo inputTensorInfo({ 1, 1, 9, 9}, ArmnnType);
1238     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
1239         QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(), inputTensorInfo.GetQuantizationOffset(),
1240         {
1241              0, 0, 0, 0, 0, 0, 0, 0, 0,
1242              0, 0, 0, 0, 0, 0, 0, 0, 0,
1243              0, 0, 0, 0, 0, 0, 0, 0, 0,
1244              0, 0, 0, 1, 1, 1, 0, 0, 0,
1245              0, 0, 0, 1, 1, 1, 0, 0, 0,
1246              0, 0, 0, 1, 1, 1, 0, 0, 0,
1247              0, 0, 0, 0, 0, 0, 0, 0, 0,
1248              0, 0, 0, 0, 0, 0, 0, 0, 0,
1249              0, 0, 0, 0, 0, 0, 0, 0, 0
1250         })));
1251
1252     armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1253     auto kernel = MakeTensor<T, 4>(kernelTensorInfo, std::vector<T>(
1254         QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(), kernelTensorInfo.GetQuantizationOffset(),
1255         {
1256              1, 2, 3,
1257              4, 5, 6,
1258              7, 8, 9
1259         })));
1260
1261     uint32_t padLeft = 0;
1262     uint32_t padTop = 0;
1263     uint32_t padRight = 0;
1264     uint32_t padBottom = 0;
1265     uint32_t strideX  = 1;
1266     uint32_t strideY  = 1;
1267     uint32_t dilationX  = 3;
1268     uint32_t dilationY  = 3;
1269
1270     // Since the dilation rate is 3 this will reduce the size of the output from 9x9 to 3x3 of all 5s.
1271     armnn::TensorInfo outputTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1272     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(
1273         QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(), outputTensorInfo.GetQuantizationOffset(),
1274         {
1275              5, 5, 5,
1276              5, 5, 5,
1277              5, 5, 5
1278         })));
1279
1280     return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1281         workloadFactory,
1282         memoryManager,
1283         input,
1284         kernel,
1285         GetBias2<ArmnnBType>(biasEnabled, qScale * qScale),
1286         expectedOutput,
1287         qScale,
1288         qOffset,
1289         layout,
1290         padLeft,
1291         padTop,
1292         padRight,
1293         padBottom,
1294         strideX,
1295         strideY,
1296         dilationX,
1297         dilationY);
1298 }
1299
1300
1301 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T = armnn::ResolveType<ArmnnType>>
1302 LayerTestResult<T, 4> DepthwiseConvolution2d3x3DilationTestCommon(
1303         armnn::IWorkloadFactory& workloadFactory,
1304         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1305         const std::vector<float>& inputNoQuantizedValues,
1306         armnn::TensorInfo& inputTensorInfo,
1307         const std::vector<float>& kernelNoQuantizedValues,
1308         armnn::TensorInfo& kernelTensorInfo,
1309         const std::vector<float>& outputExpectedNoQuantizedValues,
1310         armnn::TensorInfo& outputTensorInfo,
1311         uint32_t dilationX,
1312         uint32_t dilationY,
1313         armnn::DataLayout layout = armnn::DataLayout::NCHW,
1314         bool biasEnabled = false)
1315 {
1316     float qScale;
1317     int32_t qOffset;
1318     switch (ArmnnType)
1319     {
1320         case armnn::DataType::QuantisedAsymm8:
1321         {
1322             qScale = 0.1f;
1323             qOffset = 128;
1324             break;
1325         }
1326         case armnn::DataType::QuantisedSymm16:
1327         {
1328             qScale = 0.1f;
1329             qOffset = 0;
1330             break;
1331         }
1332         case armnn::DataType::Float32:
1333         default:
1334         {
1335             qScale = 0.f;
1336             qOffset = 0;
1337             break;
1338         }
1339     }
1340
1341     inputTensorInfo.SetQuantizationScale(qScale);
1342     inputTensorInfo.SetQuantizationOffset(qOffset);
1343     kernelTensorInfo.SetQuantizationScale(qScale);
1344     kernelTensorInfo.SetQuantizationOffset(qOffset);
1345     outputTensorInfo.SetQuantizationScale(qScale);
1346     outputTensorInfo.SetQuantizationOffset(qOffset);
1347
1348     auto input = MakeTensor<T, 4>(inputTensorInfo,
1349                                   std::vector<T>(QuantizedVector<T>(inputTensorInfo.GetQuantizationScale(),
1350                                                                     inputTensorInfo.GetQuantizationOffset(),
1351                                                                     inputNoQuantizedValues)));
1352     auto kernel = MakeTensor<T, 4>(kernelTensorInfo,
1353                                    std::vector<T>(QuantizedVector<T>(kernelTensorInfo.GetQuantizationScale(),
1354                                                                      kernelTensorInfo.GetQuantizationOffset(),
1355                                                                      kernelNoQuantizedValues)));
1356     auto expectedOutput = MakeTensor<T, 4>(outputTensorInfo,
1357                                            std::vector<T>(QuantizedVector<T>(outputTensorInfo.GetQuantizationScale(),
1358                                                                              outputTensorInfo.GetQuantizationOffset(),
1359                                                                              outputExpectedNoQuantizedValues)));
1360
1361     uint32_t padLeft = 0;
1362     uint32_t padTop = 0;
1363     uint32_t padRight = 0;
1364     uint32_t padBottom = 0;
1365     uint32_t strideX  = 1;
1366     uint32_t strideY  = 1;
1367
1368     return DepthwiseConvolution2dTestImpl<ArmnnType, ArmnnBType>(
1369             workloadFactory,
1370             memoryManager,
1371             input,
1372             kernel,
1373             GetBias<ArmnnBType>(biasEnabled, qScale * qScale, outputTensorInfo, layout),
1374             expectedOutput,
1375             qScale,
1376             qOffset,
1377             layout,
1378             padLeft,
1379             padTop,
1380             padRight,
1381             padBottom,
1382             strideX,
1383             strideY,
1384             dilationX,
1385             dilationY);
1386 }
1387
1388 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1389 LayerTestResult<T, 4> DepthwiseConvolution2d3x3Dilation3x3Test(
1390         armnn::IWorkloadFactory& workloadFactory,
1391         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1392         bool biasEnabled,
1393         const armnn::DataLayout layout)
1394 {
1395     armnn::TensorInfo inputTensorInfo({1, 1, 10, 10}, ArmnnType);
1396     std::vector<float> inputNoQuantizedValues =
1397             {
1398                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1399                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1400                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1401                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1402                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1403                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1404                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1405                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1406                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1407                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1408             };
1409
1410     armnn::TensorInfo kernelTensorInfo({ 1, 1, 3, 3}, ArmnnType);
1411     std::vector<float> kernelNoQuantizedValues =
1412             {
1413                     1, 2, 3,
1414                     4, 5, 6,
1415                     7, 8, 9
1416             };
1417
1418     // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1419     // therefore the output will be 4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1420     armnn::TensorInfo outputTensorInfo({ 1, 1, 4, 4}, ArmnnType);
1421     std::vector<float> outputExpectedNoQuantizedValues =
1422             {
1423                     6., 5., 5., 5.,
1424                     6., 5., 5., 5.,
1425                     6., 5., 5., 5.,
1426                     3., 2., 2., 2.
1427             };
1428
1429     return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1430             workloadFactory,
1431             memoryManager,
1432             inputNoQuantizedValues,
1433             inputTensorInfo,
1434             kernelNoQuantizedValues,
1435             kernelTensorInfo,
1436             outputExpectedNoQuantizedValues,
1437             outputTensorInfo,
1438             3,
1439             3,
1440             layout,
1441             biasEnabled);
1442 }
1443
1444 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1445 LayerTestResult<T, 4> DepthwiseConvolution2d2x3x3Dilation3x3Test(
1446         armnn::IWorkloadFactory& workloadFactory,
1447         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1448         bool biasEnabled,
1449         const armnn::DataLayout layout)
1450 {
1451     armnn::TensorInfo inputTensorInfo({1, 2, 10, 10}, ArmnnType);
1452     std::vector<float> inputNoQuantizedValues =
1453             {
1454                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1455                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1456                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1457                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1458                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1459                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1460                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1461                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1462                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1463                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1464
1465                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1466                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1467                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1468                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1469                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1470                     0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
1471                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1472                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1473                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1474                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1475             };
1476
1477     armnn::TensorInfo kernelTensorInfo({ 1, 2, 3, 3}, ArmnnType);
1478     std::vector<float> kernelNoQuantizedValues =
1479             {
1480                     1, 2, 3,
1481                     4, 5, 6,
1482                     7, 8, 9,
1483
1484                     1, 2, 3,
1485                     4, 5, 6,
1486                     7, 8, 9
1487             };
1488
1489     // Since the dilation rate is 3 this will dilate the kernel to be like 7x7,
1490     // therefore the output will be 2x4x4: (I−K+2P)/S +1 => (10-7 +0)/1 +1
1491     armnn::TensorInfo outputTensorInfo({ 1, 2, 4, 4}, ArmnnType);
1492     std::vector<float> outputExpectedNoQuantizedValues =
1493             {
1494                     6., 5., 5., 5.,
1495                     6., 5., 5., 5.,
1496                     6., 5., 5., 5.,
1497                     3., 2., 2., 2.,
1498
1499                     6., 5., 5., 5.,
1500                     6., 5., 5., 5.,
1501                     6., 5., 5., 5.,
1502                     3., 2., 2., 2.
1503             };
1504
1505     return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1506             workloadFactory,
1507             memoryManager,
1508             inputNoQuantizedValues,
1509             inputTensorInfo,
1510             kernelNoQuantizedValues,
1511             kernelTensorInfo,
1512             outputExpectedNoQuantizedValues,
1513             outputTensorInfo,
1514             3,
1515             3,
1516             layout,
1517             biasEnabled);
1518 }
1519
1520 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1521 LayerTestResult<T, 4> DepthwiseConvolution2dMult4Test(
1522             armnn::IWorkloadFactory& workloadFactory,
1523             const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1524             bool biasEnabled,
1525             const armnn::DataLayout layout)
1526 {
1527     armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
1528     std::vector<float> inputNoQuantizedValues =
1529             {
1530                     10.0, 10.0, 10.0,
1531                     10.0, 10.0, 10.0,
1532                     10.0, 10.0, 10.0,
1533
1534                     21.0, 22.0, 23.0,
1535                     24.0, 25.0, 26.0,
1536                     27.0, 28.0, 29.0
1537             };
1538
1539     armnn::TensorInfo kernelTensorInfo({ 4, 2, 2, 2}, ArmnnType);
1540
1541     std::vector<float> kernelNoQuantizedValues =
1542             {
1543                     0.25f, 0.25f,
1544                     0.25f, 0.25f,
1545
1546                     0.25f, 0.25f,
1547                     0.25f, 0.25f,
1548
1549                     0.0f , 0.0f,
1550                     0.0f , 0.1f,
1551
1552                     0.0f , 0.0f,
1553                     0.0f , 0.1f,
1554
1555                     0.2f , 0.0f,
1556                     0.0f , 0.0f,
1557
1558                     0.2f , 0.0f,
1559                     0.0f , 0.0f,
1560
1561                     0.0f , 0.3f,
1562                     0.0f , 0.0f,
1563
1564                     0.0f , 0.3f,
1565                     0.0f , 0.0f
1566             };
1567
1568     armnn::TensorInfo outputTensorInfo({ 1, 8, 2, 2}, ArmnnType);
1569     std::vector<float> outputExpectedNoQuantizedValues =
1570             {
1571                     10.f, 10.f,
1572                     10.f, 10.f,
1573
1574                     1.f, 1.f,
1575                     1.f, 1.f,
1576
1577                     2.f, 2.f,
1578                     2.f, 2.f,
1579
1580                     3.f, 3.f,
1581                     3.f, 3.f,
1582
1583                     23.f, 24.f,
1584                     26.f, 27.f,
1585
1586                     2.5f, 2.6000001f,
1587                     2.8f, 2.9f,
1588
1589                     4.2000003f, 4.4f,
1590                     4.8f, 5.f,
1591
1592                     6.6000004f, 6.9f,
1593                     7.5000005f, 7.8f
1594             };
1595
1596
1597     return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1598             workloadFactory,
1599             memoryManager,
1600             inputNoQuantizedValues,
1601             inputTensorInfo,
1602             kernelNoQuantizedValues,
1603             kernelTensorInfo,
1604             outputExpectedNoQuantizedValues,
1605             outputTensorInfo,
1606             1,
1607             1,
1608             layout,
1609             biasEnabled);
1610 }
1611
1612 template<armnn::DataType ArmnnType, armnn::DataType ArmnnBType, typename T>
1613 LayerTestResult<T, 4> DepthwiseConvolution2dMult2Test(
1614             armnn::IWorkloadFactory& workloadFactory,
1615             const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1616             bool biasEnabled,
1617             const armnn::DataLayout layout)
1618 {
1619     armnn::TensorInfo inputTensorInfo({1, 2, 3, 3}, ArmnnType);
1620     std::vector<float> inputNoQuantizedValues =
1621             {
1622                     10.0, 10.0, 10.0,
1623                     10.0, 10.0, 10.0,
1624                     10.0, 10.0, 10.0,
1625
1626                     21.0, 22.0, 23.0,
1627                     24.0, 25.0, 26.0,
1628                     27.0, 28.0, 29.0
1629             };
1630
1631     armnn::TensorInfo kernelTensorInfo({ 2, 2, 2, 2}, ArmnnType);
1632
1633     std::vector<float> kernelNoQuantizedValues =
1634             {
1635                     0.25f, 0.25f,
1636                     0.25f, 0.25f,
1637
1638                     0.2f , 0.0f,
1639                     0.0f , 0.0f,
1640
1641                     0.0f , 0.0f,
1642                     0.0f , 0.1f,
1643
1644                     0.0f , 0.3f,
1645                     0.0f , 0.0f
1646
1647             };
1648
1649     armnn::TensorInfo outputTensorInfo({ 1, 4, 2, 2}, ArmnnType);
1650     std::vector<float> outputExpectedNoQuantizedValues =
1651             {
1652                     10.f, 10.f,
1653                     10.f, 10.f,
1654
1655                     1.f, 1.f,
1656                     1.f, 1.f,
1657
1658                     4.2000003f, 4.4f,
1659                     4.8f, 5.f,
1660
1661                     6.6000004f, 6.9f,
1662                     7.5000005f, 7.8f
1663             };
1664
1665
1666     return DepthwiseConvolution2d3x3DilationTestCommon<ArmnnType, ArmnnBType>(
1667             workloadFactory,
1668             memoryManager,
1669             inputNoQuantizedValues,
1670             inputTensorInfo,
1671             kernelNoQuantizedValues,
1672             kernelTensorInfo,
1673             outputExpectedNoQuantizedValues,
1674             outputTensorInfo,
1675             1,
1676             1,
1677             layout,
1678             biasEnabled);
1679 }
1680
1681 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
1682 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
1683         armnn::IWorkloadFactory&,
1684         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1685         bool,
1686         armnn::DataLayout);
1687
1688 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
1689 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
1690         armnn::IWorkloadFactory&,
1691         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1692         bool,
1693         armnn::DataLayout);
1694
1695 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
1696 DepthwiseConvolution2d3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
1697         armnn::IWorkloadFactory&,
1698         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1699         bool,
1700         armnn::DataLayout);
1701
1702 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
1703 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::Float32, armnn::DataType::Float32>(
1704         armnn::IWorkloadFactory&,
1705         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1706         bool,
1707         armnn::DataLayout);
1708
1709 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
1710 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
1711         armnn::IWorkloadFactory&,
1712         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1713         bool,
1714         armnn::DataLayout);
1715
1716 template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
1717 DepthwiseConvolution2d2x3x3Dilation3x3Test<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
1718         armnn::IWorkloadFactory&,
1719         const armnn::IBackendInternal::IMemoryManagerSharedPtr&,
1720         bool,
1721         armnn::DataLayout);
1722
1723 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
1724 DepthwiseConvolution2dMult4Test<armnn::DataType::Float32, armnn::DataType::Float32>(
1725         armnn::IWorkloadFactory &workloadFactory,
1726         const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
1727         bool biasEnabled,
1728         const armnn::DataLayout layout);
1729
1730 template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
1731 DepthwiseConvolution2dMult2Test<armnn::DataType::Float32, armnn::DataType::Float32>(
1732         armnn::IWorkloadFactory &workloadFactory,
1733         const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager,
1734         bool biasEnabled,
1735         const armnn::DataLayout layout);
1736
1737 LayerTestResult<float, 4> DepthwiseConvolution2dTest(
1738     armnn::IWorkloadFactory& workloadFactory,
1739     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1740     bool biasEnabled,
1741     const armnn::DataLayout layout)
1742 {
1743     return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
1744         workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
1745 }
1746
1747 LayerTestResult<float, 4> DepthwiseConvolution2dDepthNhwcTest(
1748     armnn::IWorkloadFactory& workloadFactory,
1749     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1750     bool biasEnabled)
1751 {
1752     return DepthwiseConvolution2dNhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
1753         workloadFactory, memoryManager, 0.0f, 0, biasEnabled);
1754 }
1755
1756 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(
1757     armnn::IWorkloadFactory& workloadFactory,
1758     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1759     bool biasEnabled,
1760     const armnn::DataLayout layout)
1761 {
1762     return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
1763         workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
1764 }
1765
1766 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul64Test(
1767     armnn::IWorkloadFactory& workloadFactory,
1768     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
1769 {
1770     armnn::TensorInfo inputTensorInfo({ 1, 1, 2, 2 }, armnn::DataType::Float32);
1771     auto input = MakeTensor<float, 4>(inputTensorInfo, { 1.f, 2.f, 3.f, 4.f });
1772
1773     std::vector<float> kernelData;
1774     std::vector<float> singleDepthKernel{ 1.f, -1.f, -1.f, 1.f };
1775     for (unsigned int i = 0; i < 64; ++i)
1776     {
1777         kernelData.insert(kernelData.end(), singleDepthKernel.begin(), singleDepthKernel.end());
1778     }
1779     armnn::TensorInfo kernelTensorInfo({ 64, 1, 2, 2 }, armnn::DataType::Float32);
1780     auto kernel = MakeTensor<float, 4>(kernelTensorInfo, kernelData);
1781
1782     std::vector<float> expectedOutputData(64, 0.f);
1783     armnn::TensorInfo outputTensorInfo({ 1, 64, 1, 1 }, armnn::DataType::Float32);
1784     auto expectedOutput = MakeTensor<float, 4>(outputTensorInfo, expectedOutputData);
1785
1786     return DepthwiseConvolution2dTestImpl<armnn::DataType::Float32, armnn::DataType::Float32>(
1787             workloadFactory,
1788             memoryManager,
1789             input,
1790             kernel,
1791             boost::multi_array<float, 1>(),
1792             expectedOutput,
1793             0.f,
1794             0,
1795             armnn::DataLayout::NCHW);
1796 }
1797
1798 LayerTestResult<float, 4> DepthwiseConvolution2dAsymmetricTest(
1799     armnn::IWorkloadFactory& workloadFactory,
1800     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1801     bool biasEnabled,
1802     const armnn::DataLayout layout)
1803 {
1804     return DepthwiseConvolution2dAsymmetricTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
1805         workloadFactory, memoryManager, 0.0f, 0, biasEnabled, layout);
1806 }
1807
1808 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(
1809     armnn::IWorkloadFactory& workloadFactory,
1810     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1811     bool biasEnabled,
1812     const armnn::DataLayout layout)
1813 {
1814     return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
1815         workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
1816 }
1817
1818 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(
1819     armnn::IWorkloadFactory& workloadFactory,
1820     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1821     bool biasEnabled,
1822     const armnn::DataLayout layout)
1823 {
1824     return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedAsymm8, armnn::DataType::Signed32>(
1825         workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
1826 }
1827
1828 LayerTestResult<float, 4> SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTest(
1829         armnn::IWorkloadFactory& workloadFactory,
1830         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
1831 {
1832     return SimpleDepthwiseConvolution2d3x3Dilation3x3NhwcTestCommon<armnn::DataType::Float32, armnn::DataType::Float32>(
1833             workloadFactory,
1834             memoryManager,
1835             0.f,
1836             0,
1837             false);
1838 }
1839
1840 LayerTestResult<int16_t, 4> DepthwiseConvolution2dInt16Test(
1841         armnn::IWorkloadFactory& workloadFactory,
1842         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1843         bool biasEnabled,
1844         const armnn::DataLayout layout)
1845 {
1846     return DepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
1847         workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
1848 }
1849
1850 LayerTestResult<int16_t, 4> DepthwiseConvolution2dDepthMul1Int16Test(
1851                 armnn::IWorkloadFactory& workloadFactory,
1852                 const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1853                 bool biasEnabled,
1854                 const armnn::DataLayout layout)
1855 {
1856     return DepthwiseConvolution2dDepthMul1TestImpl<armnn::DataType::QuantisedSymm16, armnn::DataType::Signed32>(
1857         workloadFactory, memoryManager, 0.5f, 50, biasEnabled, layout);
1858 }
1859
1860 LayerTestResult<float, 4> CompareDepthwiseConvolution2dFloatTest(
1861     armnn::IWorkloadFactory& workloadFactory,
1862     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1863     armnn::IWorkloadFactory& refWorkloadFactory,
1864     const armnn::DataLayout layout)
1865 {
1866     return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::Float32>(
1867         workloadFactory, memoryManager, refWorkloadFactory, layout);
1868 }
1869
1870 LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dUint8Test(
1871     armnn::IWorkloadFactory& workloadFactory,
1872     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1873     armnn::IWorkloadFactory& refWorkloadFactory,
1874     const armnn::DataLayout layout)
1875 {
1876     return CompareDepthwiseConvolution2dTestImpl<armnn::DataType::QuantisedAsymm8>(
1877         workloadFactory, memoryManager, refWorkloadFactory, layout);
1878 }
1879
1880 LayerTestResult<float,4> SimpleNormalizationAcrossTest(
1881     armnn::IWorkloadFactory& workloadFactory,
1882     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
1883 {
1884     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
1885     auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
1886     return SimpleNormalizationTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
1887 }
1888
1889 LayerTestResult<float,4> SimpleNormalizationWithinTest(
1890     armnn::IWorkloadFactory& workloadFactory,
1891     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
1892 {
1893     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
1894     auto normChannel = armnn::NormalizationAlgorithmChannel::Within;
1895     return SimpleNormalizationTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
1896 }
1897
1898 LayerTestResult<float,4> SimpleNormalizationAcrossNhwcTest(
1899     armnn::IWorkloadFactory& workloadFactory,
1900     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
1901 {
1902     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
1903     auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
1904     return SimpleNormalizationNhwcTestImpl(workloadFactory, memoryManager, normChannel, normMethod);
1905 }
1906
1907 LayerTestResult<float,2> SimpleSoftmaxTest(
1908     armnn::IWorkloadFactory& workloadFactory,
1909     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1910     float beta)
1911 {
1912     return SimpleSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta);
1913 }
1914
1915 LayerTestResult<float,2> SimpleAxisSoftmaxTest(
1916         armnn::IWorkloadFactory& workloadFactory,
1917         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1918         float beta,
1919         int axis)
1920 {
1921     return SimpleSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta, axis);
1922 }
1923
1924 LayerTestResult<float,3> Simple3dSoftmaxTest(
1925         armnn::IWorkloadFactory& workloadFactory,
1926         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1927         float beta)
1928 {
1929     Simple3dSoftmaxOutputData data;
1930     return Simple3dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta,
1931                                                              data.inputShape, data.outputData, data.inputData);
1932 }
1933
1934 LayerTestResult<float,3> Simple3dAxisSoftmaxTest(
1935         armnn::IWorkloadFactory& workloadFactory,
1936         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1937         float beta,
1938         int axis)
1939 {
1940     armnn::TensorShape inputShape;
1941     std::vector<float> inputData;
1942     std::vector<float> outputData;
1943     switch (axis)
1944     {
1945     case -3:
1946     case 0:
1947         {
1948             inputShape = {5, 2, 2};
1949
1950             inputData =
1951                     {
1952                             17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
1953
1954                             15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f
1955                     };
1956
1957             outputData =
1958                     {
1959                             0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
1960                             0.236882800924671f,
1961                             0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
1962                             0.087144312427294f,
1963
1964                             0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
1965                             0.032058600957022f,
1966                             0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
1967                             7.246299848982885e-08f
1968                     };
1969             break;
1970         }
1971     case -2:
1972     case 1:
1973         {
1974             inputShape = {2, 5, 2};
1975
1976             inputData =
1977                     {
1978                             17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
1979
1980                             17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f
1981                     };
1982
1983             outputData =
1984                     {
1985                             0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
1986                             0.087144312427294f,
1987                             0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
1988                             7.246299848982885e-08f,
1989
1990                             0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
1991                             0.087144312427294f,
1992                             0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
1993                             7.246299848982885e-08f
1994                     };
1995         break;
1996         }
1997     case -1:
1998     case 2:
1999         {
2000             inputShape = {2, 2, 5};
2001
2002             inputData =
2003                     {
2004                             17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
2005                             17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f
2006                     };
2007
2008             outputData =
2009                     {
2010                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2011                             7.246299848982885e-08f,
2012                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2013                             7.246299848982885e-08f,
2014
2015                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2016                             7.246299848982885e-08f,
2017                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2018                             7.246299848982885e-08f
2019                     };
2020             break;
2021         }
2022     }
2023
2024     return Simple3dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta,
2025                                                              inputShape, outputData, inputData, axis);
2026 }
2027
2028 LayerTestResult<float,4> Simple4dSoftmaxTest(
2029         armnn::IWorkloadFactory& workloadFactory,
2030         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2031         float beta)
2032 {
2033     Simple4dSoftmaxData data;
2034     return Simple4dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta, data.inputShape,
2035                                                              data.outputData, data.inputData);
2036 }
2037
2038 LayerTestResult<float,4> Simple4dAxisSoftmaxTest(
2039         armnn::IWorkloadFactory& workloadFactory,
2040         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2041         float beta,
2042         int axis)
2043 {
2044     armnn::TensorShape inputShape;
2045     std::vector<float> inputData;
2046     std::vector<float> outputData;
2047     switch (axis)
2048     {
2049     case -4:
2050     case 0:
2051         {
2052             inputShape = {5, 2, 2, 2};
2053
2054             inputData =
2055                     {
2056                             17.0f, -1.0f, 17.0f, -1.0f, 17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f,
2057                             16.0f, -2.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f, 15.0f, -3.0f,
2058                             15.0f, -3.0f, 15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 14.0f, -4.0f,
2059                             14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f, 1.0f, -17.0f, 1.0f, -17.0f
2060                     };
2061
2062             outputData =
2063                     {
2064                             0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
2065                             0.643914213228014f,
2066                             0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.236882800924671f,
2067                             0.236882800924671f,
2068                             0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.236882800924671f,
2069                             0.236882800924671f,
2070                             0.236882800924671f, 0.087144312427294f, 0.087144312427294f, 0.087144312427294f,
2071                             0.087144312427294f,
2072
2073                             0.087144312427294f, 0.087144312427294f, 0.087144312427294f, 0.087144312427294f,
2074                             0.032058600957022f,
2075                             0.032058600957022f, 0.032058600957022f, 0.032058600957022f, 0.032058600957022f,
2076                             0.032058600957022f,
2077                             0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f,
2078                             7.246299848982885e-08f,
2079                             7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
2080                             7.246299848982885e-08f, 7.246299848982885e-08f
2081                     };
2082             break;
2083         }
2084     case -3:
2085     case 1:
2086         {
2087             inputShape = {2, 5, 2, 2};
2088
2089             inputData =
2090                     {
2091                             17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
2092                             15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f,
2093                             17.0f, -1.0f, 17.0f, -1.0f, 16.0f, -2.0f, 16.0f, -2.0f, 15.0f, -3.0f,
2094                             15.0f, -3.0f, 14.0f, -4.0f, 14.0f, -4.0f, 1.0f, -17.0f, 1.0f, -17.0f
2095                     };
2096
2097             outputData =
2098                     {
2099                             0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
2100                             0.236882800924671f,
2101                             0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
2102                             0.087144312427294f,
2103                             0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
2104                             0.032058600957022f,
2105                             0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
2106                             7.246299848982885e-08f,
2107
2108
2109                             0.643914213228014f, 0.643914213228014f, 0.643914213228014f, 0.643914213228014f,
2110                             0.236882800924671f,
2111                             0.236882800924671f, 0.236882800924671f, 0.236882800924671f, 0.087144312427294f,
2112                             0.087144312427294f,
2113                             0.087144312427294f, 0.087144312427294f, 0.032058600957022f, 0.032058600957022f,
2114                             0.032058600957022f,
2115                             0.032058600957022f, 7.246299848982885e-08f, 7.246299848982885e-08f, 7.246299848982885e-08f,
2116                             7.246299848982885e-08f
2117                     };
2118             break;
2119         }
2120     case -2:
2121     case 2:
2122         {
2123         inputShape = {2, 2, 5, 2};
2124
2125         inputData =
2126                 {
2127                         17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
2128                         17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
2129                         17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f,
2130                         17.0f, -1.0f, 16.0f, -2.0f, 15.0f, -3.0f, 14.0f, -4.0f, 1.0f, -17.0f
2131                 };
2132
2133         outputData =
2134                 {
2135                         0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
2136                         0.087144312427294f,
2137                         0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
2138                         7.246299848982885e-08f,
2139                         0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
2140                         0.087144312427294f,
2141                         0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
2142                         7.246299848982885e-08f,
2143
2144                         0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
2145                         0.087144312427294f,
2146                         0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
2147                         7.246299848982885e-08f,
2148                         0.643914213228014f, 0.643914213228014f, 0.236882800924671f, 0.236882800924671f,
2149                         0.087144312427294f,
2150                         0.087144312427294f, 0.032058600957022f, 0.032058600957022f, 7.246299848982885e-08f,
2151                         7.246299848982885e-08f
2152                 };
2153         break;
2154         }
2155     case -1:
2156     case 3:
2157         {
2158             inputShape = {2, 2, 2, 5};
2159
2160             inputData =
2161                     {
2162                             17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
2163                             17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
2164                             17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f,
2165                             17.0f, 16.0f, 15.0f, 14.0f, 1.0f, -1.0f, -2.0f, -3.0f, -4.0f, -17.0f
2166                     };
2167
2168             outputData =
2169                     {
2170                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2171                             7.246299848982885e-08f,
2172                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2173                             7.246299848982885e-08f,
2174                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2175                             7.246299848982885e-08f,
2176                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2177                             7.246299848982885e-08f,
2178
2179                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2180                             7.246299848982885e-08f,
2181                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2182                             7.246299848982885e-08f,
2183                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2184                             7.246299848982885e-08f,
2185                             0.643914213228014f, 0.236882800924671f, 0.087144312427294f, 0.032058600957022f,
2186                             7.246299848982885e-08f
2187                     };
2188             break;
2189         }
2190     }
2191
2192     return Simple4dSoftmaxTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, beta, inputShape,
2193                                                              outputData, inputData, axis);
2194 }
2195
2196 LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(
2197     armnn::IWorkloadFactory& workloadFactory,
2198     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2199     float beta)
2200 {
2201     return SimpleSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, beta);
2202 }
2203
2204 LayerTestResult<uint8_t,3> Simple3dSoftmaxUint8Test(
2205         armnn::IWorkloadFactory& workloadFactory,
2206         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2207         float beta)
2208 {
2209     Simple3dSoftmaxOutputData data;
2210     return Simple3dSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, beta,
2211                                                                      data.inputShape, data.outputData, data.inputData);
2212 }
2213
2214 LayerTestResult<uint8_t,4> Simple4dSoftmaxUint8Test(
2215         armnn::IWorkloadFactory& workloadFactory,
2216         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2217         float beta)
2218 {
2219     Simple4dSoftmaxData data;
2220
2221     return Simple4dSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, beta,
2222                                                                      data.inputShape, data.outputData, data.inputData);
2223 }
2224
2225 LayerTestResult<int16_t,2> SimpleSoftmaxUint16Test(
2226         armnn::IWorkloadFactory& workloadFactory,
2227         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2228         float beta)
2229 {
2230     return SimpleSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta);
2231 }
2232
2233 LayerTestResult<int16_t,3> Simple3dSoftmaxUint16Test(
2234         armnn::IWorkloadFactory& workloadFactory,
2235         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2236         float beta)
2237 {
2238     Simple3dSoftmaxOutputData data;
2239     return Simple3dSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta,
2240                                                                      data.inputShape, data.outputData, data.inputData);
2241 }
2242
2243 LayerTestResult<int16_t,4> Simple4dSoftmaxUint16Test(
2244         armnn::IWorkloadFactory& workloadFactory,
2245         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2246         float beta)
2247 {
2248     Simple4dSoftmaxData data;
2249
2250     return Simple4dSoftmaxTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, beta,
2251                                                                      data.inputShape, data.outputData, data.inputData);
2252 }
2253
2254 LayerTestResult<float,4> CompareNormalizationTest(
2255     armnn::IWorkloadFactory& workloadFactory,
2256     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2257     armnn::IWorkloadFactory& refWorkloadFactory,
2258     armnn::NormalizationAlgorithmChannel normChannel,
2259     armnn::NormalizationAlgorithmMethod normMethod)
2260 {
2261     return CompareNormalizationTestImpl(workloadFactory, memoryManager, refWorkloadFactory, normChannel, normMethod);
2262 }
2263
2264 LayerTestResult<float,2> CompareSoftmaxTest(
2265     armnn::IWorkloadFactory& workloadFactory,
2266     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2267     armnn::IWorkloadFactory& refWorkloadFactory,
2268     float beta)
2269 {
2270     return CompareSoftmaxTestImpl<armnn::DataType::Float32>(
2271         workloadFactory, memoryManager, refWorkloadFactory, beta);
2272 }
2273
2274 LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(
2275     armnn::IWorkloadFactory& workloadFactory,
2276     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2277     armnn::IWorkloadFactory& refWorkloadFactory,
2278     float beta)
2279 {
2280     return CompareSoftmaxTestImpl<armnn::DataType::QuantisedAsymm8>(
2281         workloadFactory, memoryManager, refWorkloadFactory, beta);
2282 }
2283
2284 std::vector<LayerTestResult<float,3>> SplitterTest(
2285     armnn::IWorkloadFactory& workloadFactory,
2286     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2287 {
2288     return SplitterTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
2289 }
2290
2291 std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(
2292     armnn::IWorkloadFactory& workloadFactory,
2293     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2294 {
2295     return SplitterTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
2296 }
2297
2298 std::vector<LayerTestResult<int16_t,3>> SplitterInt16Test(
2299     armnn::IWorkloadFactory& workloadFactory,
2300     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2301 {
2302     return SplitterTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
2303 }
2304
2305 LayerTestResult<float, 3> CopyViaSplitterTest(
2306     armnn::IWorkloadFactory& workloadFactory,
2307     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2308 {
2309     return CopyViaSplitterTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
2310 }
2311
2312 LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(
2313     armnn::IWorkloadFactory& workloadFactory,
2314     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2315 {
2316     return CopyViaSplitterTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
2317 }
2318
2319 LayerTestResult<int16_t, 3> CopyViaSplitterInt16Test(
2320         armnn::IWorkloadFactory& workloadFactory,
2321         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2322 {
2323     return CopyViaSplitterTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
2324 }
2325
2326 #if defined(ARMCOMPUTEREF_ENABLED)
2327
2328 // The LSTM test units are run only for the reference backend at the moment
2329
2330 void LstmUtilsZeroVectorTest()
2331 {
2332     armnn::TensorInfo inputDesc({4}, armnn::DataType::Float32);
2333     boost::multi_array<float, 1> input = MakeTensor<float, 1>(inputDesc, std::vector<float>(
2334             {2., 3., 3., 4.}));
2335
2336     boost::multi_array<float, 1> expectedOutput = MakeTensor<float, 1>(inputDesc, std::vector<float>(
2337             {0., 0., 0., 0.}));
2338
2339     return LstmUtilsZeroVectorTestImpl<armnn::DataType::Float32>(input, 4, expectedOutput);
2340 }
2341
2342 void LstmUtilsMeanStddevNormalizationNoneZeroInputTest()
2343 {
2344     uint32_t batchSize = 2;
2345     uint32_t vecSize = 4;
2346     armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
2347     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2348             { 0.1f, 0.2f, 0.3f, 0.4f,      //batch 0
2349               0.9f, 1.0f, 1.1f, 1.2f }));  //batch 1
2350
2351     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2352             { -1.34164071f, -0.447213531f, 0.44721365f,  1.34164071f,      //batch 0
2353               -1.34163153f, -0.447210163f, 0.447211236f, 1.3416326f  }));  //batch 1
2354
2355     return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
2356             vecSize, batchSize, expectedOutput);
2357 }
2358
2359 void LstmUtilsMeanStddevNormalizationAllZeroInputTest()
2360 {
2361     uint32_t batchSize = 2;
2362     uint32_t vecSize = 4;
2363     armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
2364     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2365             { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
2366               0.0f, 0.0f, 0.0f, 0.0f }));  //batch 1
2367
2368     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2369             { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
2370               0.0f, 0.0f, 0.0f, 0.0f }));  //batch 1
2371
2372     return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
2373             vecSize, batchSize, expectedOutput);
2374 }
2375
2376 void LstmUtilsMeanStddevNormalizationMixedZeroInputTest()
2377 {
2378     uint32_t batchSize = 2;
2379     uint32_t vecSize = 4;
2380     armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
2381     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2382             { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
2383               0.1f, 0.2f, 0.3f, 0.4f }));  //batch 1
2384
2385     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2386             {         0.0f,          0.0f,        0.0f,        0.0f,      //batch 0
2387               -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f }));  //batch 1
2388
2389     return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
2390             vecSize, batchSize, expectedOutput);
2391 }
2392
2393
2394 void LstmUtilsVectorBatchVectorCwiseProductTest()
2395 {
2396     uint32_t batchSize = 4;
2397     uint32_t vecSize = 29;
2398     armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
2399     boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
2400             {   1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f, 10.1f,
2401               11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f,
2402               21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f,     0.0f}));
2403
2404     armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
2405     boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
2406             { /* batch 0 */
2407                 1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f,  10.1f,
2408               11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f,  20.2f,
2409               21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f,   0.0f,
2410               /* batch 1 */
2411                 -1.1f,   -2.2f,   -3.3f,   -4.4f,   -5.5f,   -6.6f,   -7.7f,   -8.8f,   -9.9f, -10.1f,
2412               -11.11f, -12.12f, -13.13f, -14.14f, -15.15f, -16.16f, -17.17f, -18.18f, -19.19f, -20.2f,
2413               -21.21f, -22.22f, -23.23f, -24.24f, -25.25f, -26.26f, -27.27f, -28.28f,    0.0f,
2414               /* batch 2 */
2415                 1.1f,   -2.2f,   3.3f,   -4.4f,   5.5f,   -6.6f,   7.7f,   -8.8f,   9.9f, -10.1f,
2416               11.11f, -12.12f, 13.13f, -14.14f, 15.15f, -16.16f, 17.17f, -18.18f, 19.19f, -20.2f,
2417               21.21f, -22.22f, 23.23f, -24.24f, 25.25f, -26.26f, 27.27f, -28.28f,   0.0f,
2418               /* batch 3 */
2419                 -1.1f,   2.2f,   -3.3f,   4.4f,   -5.5f,   6.6f,   -7.7f,   8.8f,   -9.9f, 10.1f,
2420               -11.11f, 12.12f, -13.13f, 14.14f, -15.15f, 16.16f, -17.17f, 18.18f, -19.19f, 20.2f,
2421               -21.21f, 22.22f, -23.23f, 24.24f, -25.25f, 26.26f, -27.27f, 28.28f,    0.0f}));
2422
2423     // Expect output = input * output + output.
2424     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
2425             { /* batch 0 */
2426                  1.210000f,    4.840000f,   10.889999f,   19.360001f,   30.250000f,   43.559998f,
2427                 59.289997f,   77.440002f,   98.009995f,  102.010010f,  123.432091f,  146.894394f,
2428                172.396896f,  199.939606f,  229.522491f,  261.145599f,  294.808899f,  330.512421f,
2429                368.256134f,  408.040039f,  449.864075f,  493.728363f,  539.632874f,  587.577576f,
2430                637.562500f,  689.587585f,  743.652954f,  799.758423f,    0.000000f,
2431               /* batch 1 */
2432                 -1.210000f,   -4.840000f,  -10.889999f,  -19.360001f,  -30.250000f,  -43.559998f,
2433                -59.289997f,  -77.440002f,  -98.009995f, -102.010010f, -123.432091f, -146.894394f,
2434               -172.396896f, -199.939606f, -229.522491f, -261.145599f, -294.808899f, -330.512421f,
2435               -368.256134f, -408.040039f, -449.864075f, -493.728363f, -539.632874f, -587.577576f,
2436               -637.562500f, -689.587585f, -743.652954f, -799.758423f,    0.000000f,
2437               /* batch 2 */
2438                  1.210000f,   -4.840000f,  10.889999f,   -19.360001f,   30.250000f,  -43.559998f,
2439                 59.289997f,  -77.440002f,  98.009995f,  -102.010010f,  123.432091f, -146.894394f,
2440                172.396896f, -199.939606f, 229.522491f,  -261.145599f,  294.808899f, -330.512421f,
2441                368.256134f, -408.040039f, 449.864075f,  -493.728363f,  539.632874f, -587.577576f,
2442                637.562500f, -689.587585f, 743.652954f,  -799.758423f,    0.000000f,
2443               /* batch 3 */
2444                 -1.210000f,    4.840000f,  -10.889999f,   19.360001f,  -30.250000f,   43.559998f,
2445                -59.289997f,   77.440002f,  -98.009995f,  102.010010f, -123.432091f,  146.894394f,
2446               -172.396896f,  199.939606f, -229.522491f,  261.145599f, -294.808899f,  330.512421f,
2447               -368.256134f,  408.040039f, -449.864075f,  493.728363f, -539.632874f,  587.577576f,
2448               -637.562500f,  689.587585f, -743.652954f,  799.758423f,    0.000000f}));
2449
2450     return LstmUtilsVectorBatchVectorCwiseProductTestImpl<armnn::DataType::Float32>(vector, batchVector,
2451             vecSize, batchSize, expectedOutput);
2452 }
2453
2454
2455 void LstmUtilsVectorBatchVectorAddTest()
2456 {
2457     uint32_t batchSize = 2;
2458     uint32_t vecSize = 3;
2459     armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
2460     boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
2461             { 0.0f, -0.5f, 1.0f}));
2462
2463     armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
2464     boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
2465             { 1.0f, 2.0f, 3.0f,    //batch 0
2466               4.0f, 5.0f, 6.0f})); //batch 1
2467
2468     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
2469             { 1.0f, 1.5f, 4.0f,
2470               4.0f, 4.5f, 7.0f}));
2471
2472     return LstmUtilsVectorBatchVectorAddTestImpl<armnn::DataType::Float32>(vector, batchVector,
2473             vecSize, batchSize, expectedOutput);
2474 }
2475
2476 #endif
2477
2478 LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
2479     armnn::IWorkloadFactory& workloadFactory,
2480     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2481 {
2482     armnn::TensorInfo inputDesc({ 2, 2 }, armnn::DataType::Float32);
2483     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2484             { 2., 3., 3., 4. }));
2485
2486     armnn::TensorInfo outputDesc({ 2, 4 }, armnn::DataType::Float32);
2487     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
2488             {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
2489              -0.42734814f, -0.00478661f,  0.13455015f, -0.03560682f}));
2490     return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl<armnn::DataType::Float32>(
2491         workloadFactory, memoryManager, input, expectedOutput);
2492 }
2493
2494 LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
2495     armnn::IWorkloadFactory& workloadFactory,
2496     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2497 {
2498     armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
2499     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2500             {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
2501              0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f}));
2502
2503     armnn::TensorInfo outputDesc({ 2, 16 }, armnn::DataType::Float32);
2504     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
2505             {-0.00396806f, 0.029352f,     -0.00279226f, 0.0159977f,   -0.00835576f,
2506              -0.0211779f,  0.0283512f,    -0.0114597f,  0.00907307f,  -0.0244004f,
2507              -0.0152191f,  -0.0259063f,   0.00914318f,  0.00415118f,  0.017147f,
2508              0.0134203f, -0.013869f,    0.0287268f,   -0.00334693f, 0.00733398f,  -0.0287926f,
2509              -0.0186926f,   0.0193662f,   -0.0115437f,  0.00422612f,  -0.0345232f,
2510              0.00223253f,   -0.00957321f, 0.0210624f,   0.013331f,    0.0150954f,
2511              0.02168f}));
2512     return LstmLayerNoCifgWithPeepholeWithProjectionTestImpl<armnn::DataType::Float32>(
2513         workloadFactory, memoryManager, input, expectedOutput);
2514 }
2515
2516 LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(
2517     armnn::IWorkloadFactory& workloadFactory,
2518     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2519 {
2520     armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::Float32);
2521     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2522             {2., 3., 3., 4.}));
2523
2524
2525     armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::Float32);
2526     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
2527             {{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
2528               -0.0185422f,   0.11281417f,  0.24466537f, -0.1826292f}}));
2529
2530     return LstmNoCifgNoPeepholeNoProjectionTestImpl<armnn::DataType::Float32>(
2531         workloadFactory, memoryManager, input, expectedOutput);
2532 }
2533
2534
2535 LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(
2536         armnn::IWorkloadFactory& workloadFactory,
2537         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2538 {
2539     armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
2540     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2541             {0.7f, 0.8f, 0.1f, 0.2f, 0.3f,     //batch 0
2542              0.3f, 0.2f, 0.9f, 0.8f, 0.1f}));  //batch 1
2543
2544     armnn::TensorInfo outputDesc({ 2, 3 }, armnn::DataType::Float32);
2545     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
2546             {  0.0244077f,  0.128027f, -0.00170918f,    //batch 0
2547              -0.00692428f, 0.0848741f,    0.063445f})); //batch 1
2548     return LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl<armnn::DataType::Float32>(
2549             workloadFactory, memoryManager, input, expectedOutput);
2550 }
2551
2552
2553 LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(
2554     armnn::IWorkloadFactory& workloadFactory,
2555     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2556 {
2557     const float qScale = 1.0f;
2558     const int32_t qOffset = 0;
2559
2560     const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
2561     const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
2562
2563     armnn::TensorInfo inputDesc({2, 2}, datatype);
2564     boost::multi_array<int16_t , 2> input = MakeTensor<int16_t , 2>(inputDesc, QuantizedVector<int16_t>(qScale, qOffset,
2565             std::vector<float>{2., 3., 3., 4.}));
2566
2567     armnn::TensorInfo outputDesc({2, 4}, datatype);
2568     boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
2569             qOffset, std::vector<float>({{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
2570                                           -0.0185422f,  0.11281417f,  0.24466537f, -0.1826292f}})));
2571
2572     return LstmNoCifgNoPeepholeNoProjectionTestImpl<datatype>(
2573         workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
2574
2575 }
2576
2577 LayerTestResult<int16_t, 2> LstmLayerInt16WithCifgWithPeepholeNoProjectionTest(
2578     armnn::IWorkloadFactory& workloadFactory,
2579     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2580 {
2581     const float qScale = 1.0f;
2582     const int32_t qOffset = 0;
2583
2584     const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
2585     const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
2586
2587     armnn::TensorInfo inputDesc({ 2, 2 }, datatype);
2588     boost::multi_array<int16_t, 2> input = MakeTensor<int16_t, 2>(inputDesc, QuantizedVector<int16_t>(qScale, qOffset,
2589             std::vector<float>({ 2., 3., 3., 4. })));
2590
2591     armnn::TensorInfo outputDesc({ 2, 4 }, datatype);
2592     boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
2593             qOffset, std::vector<float>(
2594             {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
2595              -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f})));
2596
2597     return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl<datatype>(
2598         workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
2599 }
2600
2601 LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgWithPeepholeWithProjectionTest(
2602     armnn::IWorkloadFactory& workloadFactory,
2603     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2604 {
2605     const float qScale = 2.0f;
2606     const int32_t qOffset = 0;
2607
2608     const armnn::DataType datatype = armnn::DataType::QuantisedSymm16;
2609     const armnn::DataType constantDatatype = armnn::DataType::QuantisedAsymm8;
2610
2611     armnn::TensorInfo inputDesc({ 2, 5 }, datatype);
2612     boost::multi_array<int16_t, 2> input = MakeTensor<int16_t, 2>(inputDesc, QuantizedVector<int16_t>(qScale,
2613             qOffset, std::vector<float>(
2614             {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
2615              0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f})));
2616
2617     armnn::TensorInfo outputDesc({ 2, 16 }, datatype);
2618     boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
2619             qOffset, std::vector<float>(
2620             {-0.00396806f,  0.029352f,   -0.00279226f, 0.0159977f,  -0.00835576f,
2621              -0.0211779f,   0.0283512f,  -0.0114597f,  0.00907307f, -0.0244004f,
2622              -0.0152191f,  -0.0259063f,   0.00914318f, 0.00415118f,  0.017147f,
2623               0.0134203f,  -0.013869f,    0.0287268f, -0.00334693f,  0.00733398f, -0.0287926f,
2624              -0.0186926f,   0.0193662f,  -0.0115437f,  0.00422612f, -0.0345232f,
2625               0.00223253f, -0.00957321f,  0.0210624f,  0.013331f,    0.0150954f,   0.02168f})));
2626
2627     return LstmLayerNoCifgWithPeepholeWithProjectionTestImpl<datatype>(
2628         workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, constantDatatype);
2629 }
2630
2631 LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16ConstantTest(
2632     armnn::IWorkloadFactory& workloadFactory,
2633     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2634 {
2635     const float qScale = 1.0f;
2636     const int32_t qOffset = 0;
2637
2638     const armnn::DataType datatype = armnn::DataType::QuantisedSymm16; // datatype & constants set to QSymm16
2639
2640     armnn::TensorInfo inputDesc({2, 2}, datatype);
2641     boost::multi_array<int16_t , 2> input = MakeTensor<int16_t , 2>(inputDesc, QuantizedVector<int16_t>(qScale,
2642             qOffset, std::vector<float>{2., 3., 3., 4.}));
2643
2644     armnn::TensorInfo outputDesc({2, 4}, datatype);
2645     boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(outputDesc, QuantizedVector<int16_t>(qScale,
2646             qOffset, std::vector<float>({{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
2647                                           -0.0185422f,  0.11281417f,  0.24466537f, -0.1826292f}})));
2648
2649     return LstmNoCifgNoPeepholeNoProjectionTestImpl<datatype>(
2650         workloadFactory, memoryManager, input, expectedOutput, qScale, qOffset, datatype);
2651 }
2652
2653 // QuantizedLstm
2654 LayerTestResult<uint8_t, 2> QuantizedLstmTest(
2655     armnn::IWorkloadFactory& workloadFactory,
2656     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2657 {
2658     armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::QuantisedAsymm8);
2659     boost::multi_array<uint8_t, 2> input = MakeTensor<uint8_t, 2>(inputDesc, std::vector<uint8_t>(
2660         {166, 179, 50, 150}));
2661
2662     armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::QuantisedAsymm8);
2663     boost::multi_array<uint8_t, 2> expectedOutput = MakeTensor<uint8_t, 2>(outputDesc, std::vector<uint8_t>(
2664         {140, 151, 146, 112, 136, 156, 142, 112 }));
2665
2666     return QuantizedLstmTestImpl(workloadFactory, memoryManager, input, expectedOutput);
2667 }
2668
2669 LayerTestResult<float,3> ConcatTest(
2670     armnn::IWorkloadFactory& workloadFactory,
2671     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2672 {
2673     unsigned int outputWidth = 3;
2674     unsigned int outputHeight = 6;
2675     unsigned int outputChannels = 3;
2676
2677     unsigned int inputWidth1 = 3;
2678     unsigned int inputHeight1 = 6;
2679     unsigned int inputChannels1 = 2;
2680
2681     unsigned int inputWidth2 = 3;
2682     unsigned int inputHeight2 = 6;
2683     unsigned int inputChannels2 = 1;
2684
2685     // Define the tensor descriptors.
2686     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32);
2687     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32);
2688     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32);
2689
2690     LayerTestResult<float,3> ret(outputTensorInfo);
2691
2692     ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
2693     {
2694             1.0f, 2.0f, 3.0f,
2695             4.0f, 5.0f, 6.0f,
2696             7.0f, 8.0f, 9.0f,
2697             10.0f, 11.0f, 12.0f,
2698             13.0f, 14.0f, 15.0f,
2699             16.0f, 17.0f, 18.0f,
2700
2701             19.0f, 20.0f, 21.0f,
2702             22.0f, 23.0f, 24.0f,
2703             25.0f, 26.0f, 27.0f,
2704             28.0f, 29.0f, 30.0f,
2705             31.0f, 32.0f, 33.0f,
2706             34.0f, 35.0f, 36.0f,
2707
2708             37.0f, 38.0f, 39.0f,
2709             40.0f, 41.0f, 42.0f,
2710             43.0f, 44.0f, 45.0f,
2711             46.0f, 47.0f, 48.0f,
2712             49.0f, 50.0f, 51.0f,
2713             52.0f, 53.0f, 54.0f,
2714         })
2715     );
2716
2717     auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
2718         {
2719             1.0f, 2.0f, 3.0f,
2720             4.0f, 5.0f, 6.0f,
2721             7.0f, 8.0f, 9.0f,
2722             10.0f, 11.0f, 12.0f,
2723             13.0f, 14.0f, 15.0f,
2724             16.0f, 17.0f, 18.0f,
2725
2726             19.0f, 20.0f, 21.0f,
2727             22.0f, 23.0f, 24.0f,
2728             25.0f, 26.0f, 27.0f,
2729             28.0f, 29.0f, 30.0f,
2730             31.0f, 32.0f, 33.0f,
2731             34.0f, 35.0f, 36.0f,
2732         })
2733     );
2734
2735     auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
2736         {
2737             37.0f, 38.0f, 39.0f,
2738             40.0f, 41.0f, 42.0f,
2739             43.0f, 44.0f, 45.0f,
2740             46.0f, 47.0f, 48.0f,
2741             49.0f, 50.0f, 51.0f,
2742             52.0f, 53.0f, 54.0f,
2743         })
2744     );
2745
2746     std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //Extent of the window is defined by size of input[0].
2747     armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
2748
2749     std::vector<unsigned int> wOrigin2 = {2, 0, 0}; //Extent of the window is defined by size of input[1].
2750     armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
2751
2752     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2753
2754     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
2755
2756     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
2757         subTensorsSupported ?
2758             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
2759             workloadFactory.CreateTensorHandle(inputTensorInfo1);
2760
2761     std::unique_ptr<armnn::ITensorHandle> inputHandle2  =
2762         subTensorsSupported ?
2763             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
2764             workloadFactory.CreateTensorHandle(inputTensorInfo2);
2765
2766     armnn::ConcatQueueDescriptor data;
2767     armnn::WorkloadInfo info;
2768     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2769     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2770     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2771
2772     data.m_ViewOrigins.push_back(window1);
2773     data.m_ViewOrigins.push_back(window2);
2774
2775     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
2776
2777     inputHandle1->Allocate();
2778     inputHandle2->Allocate();
2779     outputHandle->Allocate();
2780
2781     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
2782     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
2783
2784     workload->PostAllocationConfigure();
2785     workload->Execute();
2786
2787     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
2788
2789     return ret;
2790 }
2791
2792 LayerTestResult<float,4> AdditionTest(
2793     armnn::IWorkloadFactory& workloadFactory,
2794     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2795 {
2796     unsigned int batchSize = 2;
2797     unsigned int channels  = 2;
2798     unsigned int height    = 2;
2799     unsigned int width     = 3;
2800
2801     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
2802     armnn::TensorInfo outputTensorInfo;
2803
2804     unsigned int shape[] = {batchSize, channels, height, width};
2805
2806     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
2807     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
2808     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
2809
2810
2811     auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>(
2812         {
2813             0.0f, 2.0f, 1.0f,
2814             0.2f, 1.0f, 2.0f,
2815
2816             1.0f, 2.0f, 1.0f,
2817             0.2f, 1.0f, 2.0f,
2818
2819             0.0f, 2.0f, 1.0f,
2820             4.2f, 1.0f, 2.0f,
2821
2822             0.0f, 0.0f, 1.0f,
2823             0.2f, 1.0f, 2.0f,
2824         }));
2825
2826     auto input2 = MakeTensor<float, 4>(inputTensorInfo2, std::vector<float>(
2827         {
2828             1.0f, 2.0f, 1.0f,
2829             0.0f, 1.0f, 2.0f,
2830
2831             1.0f, 2.0f, -2.0f,
2832             0.2f, 1.0f, 2.0f,
2833
2834             0.0f, 2.0f, 1.0f,
2835             4.2f, 0.0f, -3.0f,
2836
2837             0.0f, 0.0f, 1.0f,
2838             0.7f, 1.0f, 5.0f,
2839         }));
2840
2841     LayerTestResult<float,4> ret(outputTensorInfo);
2842     ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>(
2843         {
2844             1.0f, 4.0f, 2.0f,
2845             0.2f, 2.0f, 4.0f,
2846
2847             2.0f, 4.0f, -1.0f,
2848             0.4f, 2.0f, 4.0f,
2849
2850             0.0f, 4.0f, 2.0f,
2851             8.4f, 1.0f, -1.0f,
2852
2853             0.0f, 0.0f, 2.0f,
2854             0.9f, 2.0f, 7.0f,
2855         }));
2856
2857     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
2858     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
2859     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2860
2861     armnn::AdditionQueueDescriptor data;
2862     armnn::WorkloadInfo info;
2863     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2864     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2865     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2866
2867     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
2868
2869     inputHandle1->Allocate();
2870     inputHandle2->Allocate();
2871     outputHandle->Allocate();
2872
2873     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
2874     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
2875
2876     workload->PostAllocationConfigure();
2877     workload->Execute();
2878
2879     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
2880
2881     return ret;
2882 }
2883
2884 LayerTestResult<float, 5> Addition5dTest(
2885     armnn::IWorkloadFactory& workloadFactory,
2886     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
2887 {
2888     unsigned int depth     = 2;
2889     unsigned int batchSize = 2;
2890     unsigned int channels  = 2;
2891     unsigned int height    = 2;
2892     unsigned int width     = 3;
2893
2894     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
2895     armnn::TensorInfo outputTensorInfo;
2896
2897     unsigned int shape[] = {depth, batchSize, channels, height, width};
2898
2899     inputTensorInfo1 = armnn::TensorInfo(5, shape, armnn::DataType::Float32);
2900     inputTensorInfo2 = armnn::TensorInfo(5, shape, armnn::DataType::Float32);
2901     outputTensorInfo = armnn::TensorInfo(5, shape, armnn::DataType::Float32);
2902
2903
2904     auto input1 = MakeTensor<float, 5>(inputTensorInfo1, std::vector<float>(
2905         {
2906             2.6f, 4.0f, 4.4f,  2.7f, 4.6f, 2.8f,
2907             2.3f, 1.9f, 3.4f,  2.9f, 2.2f, 4.5f,
2908
2909             2.8f, 1.9f, 2.3f,  2.6f, 4.7f, 3.5f,
2910             0.4f, 1.5f, 2.1f,  0.7f, 5.0f, 1.1f,
2911
2912
2913             1.0f, 2.7f, 0.0f,  0.6f, 0.8f, 0.9f,
2914             1.0f, 2.6f, 0.4f,  3.8f, 0.4f, 0.8f,
2915
2916             0.5f, 4.3f, 3.1f,  4.4f, 0.7f, 1.4f,
2917             0.4f, 4.4f, 0.7f,  0.6f, 4.7f, 1.2f,
2918
2919         }));
2920
2921     auto input2 = MakeTensor<float, 5>(inputTensorInfo2, std::vector<float>(
2922         {
2923             4.4f, 3.0f, 1.0f,  0.0f, 3.9f, 3.1f,
2924             1.7f, 2.9f, 1.3f,  0.4f, 0.4f, 4.3f,
2925
2926             4.5f, 0.2f, 2.2f,  4.1f, 3.9f, 3.0f,
2927             0.1f, 2.5f, 4.1f,  4.6f, 1.5f, 0.0f,
2928
2929
2930             0.5f, 4.9f, 2.5f,  1.5f, 3.4f, 4.5f,
2931             2.0f, 3.0f, 4.9f,  1.6f, 2.4f, 3.4f,
2932
2933             3.6f, 1.8f, 1.3f,  2.6f, 2.1f, 4.8f,
2934             2.0f, 4.3f, 4.0f,  0.2f, 0.6f, 4.4f,
2935         }));
2936
2937     LayerTestResult<float, 5> ret(outputTensorInfo);
2938     ret.outputExpected = MakeTensor<float, 5>(outputTensorInfo, std::vector<float>(
2939         {
2940             7.0f, 7.0f, 5.4f,  2.7f, 8.5f, 5.9f,
2941             4.0f, 4.8f, 4.7f,  3.3f, 2.6f, 8.8f,
2942
2943             7.3f, 2.1f, 4.5f,  6.7f, 8.6f, 6.5f,
2944             0.5f, 4.0f, 6.2f,  5.3f, 6.5f, 1.1f,
2945
2946
2947             1.5f, 7.6f, 2.5f,  2.1f, 4.2f, 5.4f,
2948             3.0f, 5.6f, 5.3f,  5.4f, 2.8f, 4.2f,
2949
2950             4.1f, 6.1f, 4.4f,  7.0f, 2.8f, 6.2f,
2951             2.4f, 8.7f, 4.7f,  0.8f, 5.3f, 5.6f,
2952         }));
2953
2954     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
2955     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
2956     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2957
2958     armnn::AdditionQueueDescriptor data;
2959     armnn::WorkloadInfo info;
2960     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
2961     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
2962     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
2963
2964     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
2965
2966     inputHandle1->Allocate();
2967     inputHandle2->Allocate();
2968     outputHandle->Allocate();
2969
2970     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0][0]);
2971     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0][0]);
2972
2973     workload->PostAllocationConfigure();
2974     workload->Execute();
2975
2976     CopyDataFromITensorHandle(&ret.output[0][0][0][0][0], outputHandle.get());
2977
2978     return ret;
2979 }
2980
2981 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
2982 LayerTestResult<T, 4> AdditionBroadcastTestImpl(
2983     armnn::IWorkloadFactory& workloadFactory,
2984     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2985     float qScale,
2986     int32_t qOffset)
2987 {
2988     armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 1}, ArmnnType);
2989     armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 2, 3}, ArmnnType);
2990     armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, ArmnnType);
2991
2992     if (armnn::IsQuantizedType<T>())
2993     {
2994         inputTensorInfo1.SetQuantizationScale(qScale);
2995         inputTensorInfo1.SetQuantizationOffset(qOffset);
2996         inputTensorInfo2.SetQuantizationScale(qScale);
2997         inputTensorInfo2.SetQuantizationOffset(qOffset);
2998         outputTensorInfo.SetQuantizationScale(qScale);
2999         outputTensorInfo.SetQuantizationOffset(qOffset);
3000     }
3001
3002     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset,
3003         {
3004             0.0f,
3005             1.0f,
3006
3007             2.0f,
3008             3.0f,
3009
3010             4.0f,
3011             5.0f,
3012         }));
3013
3014     auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset,
3015         {
3016             0.5f, 1.5f, 2.5f,
3017             3.5f, 4.5f, 5.5f,
3018         }));
3019
3020     LayerTestResult<T,4> ret(outputTensorInfo);
3021     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset,
3022         {
3023             0.5f, 1.5f, 2.5f,
3024             4.5f, 5.5f, 6.5f,
3025
3026             2.5f, 3.5f, 4.5f,
3027             6.5f, 7.5f, 8.5f,
3028
3029             4.5f, 5.5f, 6.5f,
3030             8.5f, 9.5f, 10.5f,
3031         }));
3032
3033     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3034     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
3035     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3036
3037     armnn::AdditionQueueDescriptor data;
3038     armnn::WorkloadInfo info;
3039     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3040     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
3041     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3042
3043     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
3044
3045     inputHandle1->Allocate();
3046     inputHandle2->Allocate();
3047     outputHandle->Allocate();
3048
3049     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3050     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
3051
3052     workload->PostAllocationConfigure();
3053     workload->Execute();
3054
3055     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
3056
3057     return ret;
3058 }
3059
3060 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
3061 LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(
3062     armnn::IWorkloadFactory& workloadFactory,
3063     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3064     float qScale,
3065     int32_t qOffset)
3066 {
3067     armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 3}, ArmnnType);
3068     armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 1, 1}, ArmnnType);
3069     armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, ArmnnType);
3070
3071     if (armnn::IsQuantizedType<T>())
3072     {
3073         inputTensorInfo1.SetQuantizationScale(qScale);
3074         inputTensorInfo1.SetQuantizationOffset(qOffset);
3075         inputTensorInfo2.SetQuantizationScale(qScale);
3076         inputTensorInfo2.SetQuantizationOffset(qOffset);
3077         outputTensorInfo.SetQuantizationScale(qScale);
3078         outputTensorInfo.SetQuantizationOffset(qOffset);
3079     }
3080
3081     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset,
3082         {
3083              0.0f,  1.0f,  2.0f,
3084              3.0f,  4.0f,  5.0f,
3085              6.0f,  7.0f,  8.0f,
3086              9.0f, 10.0f, 11.0f,
3087             12.0f, 13.0f, 14.0f,
3088             15.0f, 16.0f, 17.0f,
3089         }));
3090
3091     auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset,
3092         {
3093             0.5f,
3094         }));
3095
3096     LayerTestResult<T,4> ret(outputTensorInfo);
3097     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset,
3098         {
3099              0.5f,  1.5f,  2.5f,
3100              3.5f,  4.5f,  5.5f,
3101              6.5f,  7.5f,  8.5f,
3102              9.5f, 10.5f, 11.5f,
3103             12.5f, 13.5f, 14.5f,
3104             15.5f, 16.5f, 17.5f,
3105         }));
3106
3107     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3108     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
3109     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3110
3111     armnn::AdditionQueueDescriptor data;
3112     armnn::WorkloadInfo info;
3113     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3114     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
3115     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3116
3117     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
3118
3119     inputHandle1->Allocate();
3120     inputHandle2->Allocate();
3121     outputHandle->Allocate();
3122
3123     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3124     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
3125
3126     workload->PostAllocationConfigure();
3127     workload->Execute();
3128
3129     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
3130
3131     return ret;
3132 }
3133
3134 LayerTestResult<float, 4> AdditionBroadcastTest(
3135     armnn::IWorkloadFactory& workloadFactory,
3136     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3137 {
3138     return AdditionBroadcastTestImpl<armnn::DataType::Float32>(
3139         workloadFactory, memoryManager, 0.0f, 0);
3140 }
3141
3142 LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(
3143     armnn::IWorkloadFactory& workloadFactory,
3144     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3145 {
3146     return AdditionBroadcastTestImpl<armnn::DataType::QuantisedAsymm8>(
3147         workloadFactory, memoryManager, 2.f, 0);
3148 }
3149
3150 LayerTestResult<int16_t, 4> AdditionBroadcastInt16Test(
3151     armnn::IWorkloadFactory& workloadFactory,
3152     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3153 {
3154     return AdditionBroadcastTestImpl<armnn::DataType::QuantisedSymm16>(
3155         workloadFactory, memoryManager, 2.f, 0);
3156 }
3157
3158 LayerTestResult<float, 4> AdditionBroadcast1ElementTest(
3159     armnn::IWorkloadFactory& workloadFactory,
3160     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3161 {
3162     return AdditionBroadcast1ElementTestImpl<armnn::DataType::Float32>(
3163         workloadFactory, memoryManager, 0.0f, 0);
3164 }
3165
3166 LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(
3167     armnn::IWorkloadFactory& workloadFactory,
3168     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3169 {
3170     return AdditionBroadcast1ElementTestImpl<armnn::DataType::QuantisedAsymm8>(
3171         workloadFactory, memoryManager, 0.1333333f, 128);
3172 }
3173
3174 LayerTestResult<int16_t, 4> AdditionBroadcast1ElementInt16Test(
3175     armnn::IWorkloadFactory& workloadFactory,
3176     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3177 {
3178     return AdditionBroadcast1ElementTestImpl<armnn::DataType::QuantisedSymm16>(
3179         workloadFactory, memoryManager, 0.1333333f, 0);
3180 }
3181
3182 LayerTestResult<float,4> CompareAdditionTest(
3183     armnn::IWorkloadFactory& workloadFactory,
3184     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3185     armnn::IWorkloadFactory& refWorkloadFactory)
3186 {
3187     unsigned int batchSize = 4;
3188     unsigned int channels  = 1;
3189     unsigned int height    = 2;
3190     unsigned int width     = 3;
3191
3192     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
3193     armnn::TensorInfo outputTensorInfo;
3194
3195     unsigned int shape[] = {batchSize, channels, height, width};
3196
3197     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
3198     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
3199     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
3200
3201     auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 1232);
3202     auto input2 = MakeRandomTensor<float, 4>(inputTensorInfo2, 456);
3203
3204     LayerTestResult<float,4> ret(outputTensorInfo);
3205
3206     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3207     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
3208     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3209
3210     std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1);
3211     std::unique_ptr<armnn::ITensorHandle> inputHandle2Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo2);
3212     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
3213
3214     armnn::AdditionQueueDescriptor data;
3215     armnn::WorkloadInfo info;
3216     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3217     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
3218     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3219
3220     armnn::AdditionQueueDescriptor refData = data;
3221     armnn::WorkloadInfo refInfo = info;
3222     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo1, inputHandle1Ref.get());
3223     SetWorkloadInput(refData, refInfo, 1, inputTensorInfo2, inputHandle2Ref.get());
3224     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
3225
3226     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
3227     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateAddition(refData, refInfo);
3228
3229     inputHandle1->Allocate();
3230     inputHandle2->Allocate();
3231     outputHandle->Allocate();
3232     inputHandle1Ref->Allocate();
3233     inputHandle2Ref->Allocate();
3234     outputHandleRef->Allocate();
3235
3236     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3237     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
3238     CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
3239     CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]);
3240
3241     workload->PostAllocationConfigure();
3242     workload->Execute();
3243     workloadRef->PostAllocationConfigure();
3244     workloadRef->Execute();
3245
3246     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
3247     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
3248
3249     return ret;
3250 }
3251
3252 namespace {
3253 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
3254 LayerTestResult<T, 4> DivisionTestHelper(
3255     armnn::IWorkloadFactory& workloadFactory,
3256     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
3257     const unsigned int shape0[4],
3258     const std::vector<T>& values0,
3259     float scale0,
3260     int32_t offset0,
3261     const unsigned int shape1[4],
3262     const std::vector<T> & values1,
3263     float scale1,
3264     int32_t offset1,
3265     const unsigned int outShape[4],
3266     const std::vector<T> & outValues,
3267     float outScale,
3268     int32_t outOffset)
3269 {
3270     armnn::TensorInfo inputTensorInfo0(4, shape0, ArmnnType);
3271     armnn::TensorInfo inputTensorInfo1(4, shape1, ArmnnType);
3272     armnn::TensorInfo outputTensorInfo(4, outShape, ArmnnType);
3273
3274     inputTensorInfo0.SetQuantizationScale(scale0);
3275     inputTensorInfo0.SetQuantizationOffset(offset0);
3276
3277     inputTensorInfo1.SetQuantizationScale(scale1);
3278     inputTensorInfo1.SetQuantizationOffset(offset1);
3279
3280     outputTensorInfo.SetQuantizationScale(outScale);
3281     outputTensorInfo.SetQuantizationOffset(outOffset);
3282
3283     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, values0);
3284     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, values1);
3285
3286     LayerTestResult<T, 4> result(outputTensorInfo);
3287     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outValues);
3288
3289     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
3290     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3291     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3292
3293     armnn::DivisionQueueDescriptor data;
3294     armnn::WorkloadInfo info;
3295     AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
3296     AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
3297     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3298
3299     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDivision(data, info);
3300
3301     inputHandle0->Allocate();
3302     inputHandle1->Allocate();
3303     outputHandle->Allocate();
3304
3305     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
3306     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3307
3308     workload->PostAllocationConfigure();
3309     workload->Execute();
3310
3311     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3312
3313     return result;
3314 }
3315 } // anonymous namespace
3316
3317 LayerTestResult<float,4> DivisionByZeroTest(
3318     armnn::IWorkloadFactory& workloadFactory,
3319     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3320 {
3321     const unsigned int width = 2;
3322     const unsigned int height = 2;
3323     const unsigned int channelCount = 2;
3324     const unsigned int batchSize = 2;
3325
3326     unsigned int shape[] = { batchSize, channelCount, height, width };
3327
3328     std::vector<float> input0({
3329                                 1.f,  1.f,  1.f,  1.f,  0.f, 0.f, 0.f, 0.f,
3330                                -1.f, -1.f, -1.f, -1.f,  5.f, 5.f, 5.f, 5.f });
3331
3332     std::vector<float> input1({
3333                                0.f, 0.f, -0.f, -0.f,  0.f, 0.f, -0.f, -0.f,
3334                                0.f, 0.f, -0.f, -0.f,  5.f, 5.f,  5.f,  5.f });
3335
3336     std::vector<float> output({
3337                                INFINITY, INFINITY, -INFINITY, -INFINITY,  NAN, NAN, -NAN, -NAN,
3338                                -INFINITY, -INFINITY, INFINITY, INFINITY,  1, 1, 1, 1 });
3339
3340     return DivisionTestHelper<armnn::DataType::Float32>(workloadFactory,
3341                                                         memoryManager,
3342                                                         shape, input0, 1.0f, 0,
3343                                                         shape, input1, 1.0f, 0,
3344                                                         shape, output, 1.0f, 0);
3345 }
3346
3347 LayerTestResult<float,4> DivisionTest(
3348     armnn::IWorkloadFactory& workloadFactory,
3349     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3350 {
3351     const unsigned int width = 2;
3352     const unsigned int height = 2;
3353     const unsigned int channelCount = 2;
3354     const unsigned int batchSize = 2;
3355
3356     unsigned int shape[] = { batchSize, channelCount, height, width };
3357
3358     std::vector<float> input0({
3359                                       2,  2,  2,  2,    3,  3,  3,  3,
3360                                       4,  4,  4,  4,    5,  5,  5,  5 });
3361
3362     std::vector<float> input1({
3363                                       1,  1,  1,  1,    2,  2,  2,  2,
3364                                       4,  4,  4,  4,    4,  4,  4,  4 });
3365
3366     std::vector<float> output({
3367                                       2,  2,  2,  2,    1.5,  1.5,  1.5,  1.5,
3368                                       1, 1, 1, 1,  1.25, 1.25, 1.25, 1.25 });
3369
3370
3371     return DivisionTestHelper<armnn::DataType::Float32>(workloadFactory,
3372                                                         memoryManager,
3373                                                         shape, input0, 1.0f, 0,
3374                                                         shape, input1, 1.0f, 0,
3375                                                         shape, output, 1.0f, 0);
3376 }
3377
3378 LayerTestResult<float, 4> DivisionBroadcast1ElementTest(
3379     armnn::IWorkloadFactory& workloadFactory,
3380     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3381 {
3382     unsigned int shape0[] = { 1, 2, 2, 2 };
3383     std::vector<float> input0({ 2, 4, 6, 8, 10, 12, 14, 16});
3384
3385     unsigned int shape1[] = { 1, 1, 1, 1 };
3386     std::vector<float> input1({ 2 });
3387
3388     std::vector<float> output({ 1, 2, 3, 4, 5, 6, 7, 8});
3389
3390
3391     return DivisionTestHelper<armnn::DataType::Float32>(workloadFactory,
3392                                                         memoryManager,
3393                                                         shape0, input0, 1.0f, 0,
3394                                                         shape1, input1, 1.0f, 0,
3395                                                         shape0, output, 1.0f, 0);
3396 }
3397
3398 LayerTestResult<float, 4> DivisionBroadcast1DVectorTest(
3399     armnn::IWorkloadFactory& workloadFactory,
3400     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3401 {
3402     unsigned int shape0[] = { 1, 3, 3, 2 };
3403     std::vector<float> input0({
3404                                       1,   4,       3,  8,      5, 12,
3405                                       7,   16,      9, 20,     11, 24,
3406                                       13,  28,     15, 32,     17, 36});
3407
3408     unsigned int shape1[] = { 1, 1, 1, 2 };
3409     std::vector<float> input1({ 1, 2 });
3410
3411     std::vector<float> output({
3412                                       1,   2,      3,  4,      5,  6,
3413                                       7,   8,      9, 10,     11, 12,
3414                                       13, 14,     15, 16,     17, 18});
3415
3416     return DivisionTestHelper<armnn::DataType::Float32>(workloadFactory,
3417                                                         memoryManager,
3418                                                         shape0, input0, 1.0f, 0,
3419                                                         shape1, input1, 1.0f, 0,
3420                                                         shape0, output, 1.0f, 0);
3421 }
3422
3423 LayerTestResult<uint8_t,4> DivisionUint8Test(
3424     armnn::IWorkloadFactory& workloadFactory,
3425     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3426 {
3427     const unsigned int width = 2;
3428     const unsigned int height = 2;
3429     const unsigned int channelCount = 2;
3430     const unsigned int batchSize = 2;
3431
3432     unsigned int shape[] = { batchSize, channelCount, height, width };
3433
3434     std::vector<uint8_t> input0({2,  2,  2,  2,    3,  3,  3,  3,
3435                                  4,  4,  4,  4,    5,  5,  5,  5 });
3436
3437     std::vector<uint8_t> input1({1,  1,  1,  1,    2,  2,  2,  2,
3438                                  4,  4,  4,  4,    4,  4,  4,  4 });
3439
3440     std::vector<uint8_t> output({8,  8,  8,  8,    6,  6,  6,  6,
3441                                  4,  4,  4,  4,    5,  5,  5,  5});
3442
3443
3444     return DivisionTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
3445                                                                 memoryManager,
3446                                                                 shape, input0, 1.0f,  0,
3447                                                                 shape, input1, 1.0f,  0,
3448                                                                 shape, output, 0.25f, 0);
3449 }
3450
3451 LayerTestResult<uint8_t, 4> DivisionBroadcast1ElementUint8Test(
3452     armnn::IWorkloadFactory& workloadFactory,
3453     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3454 {
3455     unsigned int shape0[] = { 1, 2, 2, 2 };
3456     std::vector<uint8_t> input0({ 2, 4, 6, 8, 10, 12, 14, 16});
3457
3458     unsigned int shape1[] = { 1, 1, 1, 1 };
3459     std::vector<uint8_t> input1({ 2 });
3460
3461     std::vector<uint8_t> output({ 1, 2, 3, 4, 5, 6, 7, 8});
3462
3463     return DivisionTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
3464                                                                 memoryManager,
3465                                                                 shape0, input0, 1.0f, 0,
3466                                                                 shape1, input1, 1.0f, 0,
3467                                                                 shape0, output, 1.0f, 0);
3468 }
3469
3470 LayerTestResult<uint8_t, 4> DivisionBroadcast1DVectorUint8Test(
3471     armnn::IWorkloadFactory& workloadFactory,
3472     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3473 {
3474     unsigned int shape0[] = { 1, 3, 3, 2 };
3475     std::vector<uint8_t> input0({1,   4,     3,  8,      5,  12,
3476                                  7,   16,    9,  20,     11, 24,
3477                                  13,  28,    15, 32,     17, 36});
3478
3479     unsigned int shape1[] = { 1, 1, 1, 2 };
3480     std::vector<uint8_t> input1({ 1, 2 });
3481
3482     std::vector<uint8_t> output({1,   2,      3,  4,      5,  6,
3483                                  7,   8,      9, 10,     11, 12,
3484                                  13, 14,     15, 16,     17, 18});
3485
3486     return DivisionTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
3487                                                                 memoryManager,
3488                                                                 shape0, input0, 1.0f, 0,
3489                                                                 shape1, input1, 1.0f, 0,
3490                                                                 shape0, output, 1.0f, 0);
3491 }
3492
3493 LayerTestResult<int16_t,4> DivisionInt16Test(
3494     armnn::IWorkloadFactory& workloadFactory,
3495     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3496 {
3497     unsigned int shape[] = { 2, 2, 2, 2 };
3498
3499     std::vector<int16_t> input0({2,  2,  2,  2,    3,  3,  3,  3,
3500                                  4,  4,  4,  4,    5,  5,  5,  5 });
3501
3502     std::vector<int16_t> input1({1,  1,  1,  1,    2,  2,  2,  2,
3503                                  4,  4,  4,  4,    4,  4,  4,  4 });
3504
3505     std::vector<int16_t> output({8,  8,  8,  8,    6,  6,  6,  6,
3506                                  4,  4,  4,  4,    5,  5,  5,  5});
3507
3508
3509     return DivisionTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
3510                                                                 memoryManager,
3511                                                                 shape, input0, 1.0f,  0,
3512                                                                 shape, input1, 1.0f,  0,
3513                                                                 shape, output, 0.25f, 0);
3514 }
3515
3516 LayerTestResult<int16_t, 4> DivisionBroadcast1ElementInt16Test(
3517     armnn::IWorkloadFactory& workloadFactory,
3518     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3519 {
3520     unsigned int shape0[] = { 1, 2, 2, 2 };
3521     std::vector<int16_t> input0({ 2, 4, 6, 8, 10, 12, 14, 16});
3522
3523     unsigned int shape1[] = { 1, 1, 1, 1 };
3524     std::vector<int16_t> input1({ 2 });
3525
3526     std::vector<int16_t> output({ 1, 2, 3, 4, 5, 6, 7, 8});
3527
3528     return DivisionTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
3529                                                                 memoryManager,
3530                                                                 shape0, input0, 1.0f, 0,
3531                                                                 shape1, input1, 1.0f, 0,
3532                                                                 shape0, output, 1.0f, 0);
3533 }
3534
3535 LayerTestResult<int16_t, 4> DivisionBroadcast1DVectorInt16Test(
3536     armnn::IWorkloadFactory& workloadFactory,
3537     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3538 {
3539     unsigned int shape0[] = { 1, 3, 3, 2 };
3540     std::vector<int16_t> input0({1,   4,     3,  8,      5,  12,
3541                                  7,   16,    9,  20,     11, 24,
3542                                  13,  28,    15, 32,     17, 36});
3543
3544     unsigned int shape1[] = { 1, 1, 1, 2 };
3545     std::vector<int16_t> input1({ 1, 2 });
3546
3547     std::vector<int16_t> output({1,   2,      3,  4,      5,  6,
3548                                  7,   8,      9, 10,     11, 12,
3549                                  13, 14,     15, 16,     17, 18});
3550
3551     return DivisionTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
3552                                                                 memoryManager,
3553                                                                 shape0, input0, 1.0f, 0,
3554                                                                 shape1, input1, 1.0f, 0,
3555                                                                 shape0, output, 1.0f, 0);
3556 }
3557
3558 template<typename DescriptorType>
3559 std::unique_ptr<armnn::IWorkload> CreateWorkload(
3560     const armnn::IWorkloadFactory& workloadFactory,
3561     const armnn::WorkloadInfo& info,
3562     const DescriptorType& descriptor)
3563 {
3564     return CreateWorkload(workloadFactory, info, descriptor);
3565 };
3566
3567 template<>
3568 std::unique_ptr<armnn::IWorkload> CreateWorkload<armnn::MaximumQueueDescriptor>(
3569     const armnn::IWorkloadFactory& workloadFactory,
3570     const armnn::WorkloadInfo& info,
3571     const armnn::MaximumQueueDescriptor& descriptor)
3572 {
3573     return workloadFactory.CreateMaximum(descriptor, info);
3574 }
3575
3576 template<>
3577 std::unique_ptr<armnn::IWorkload> CreateWorkload<armnn::MinimumQueueDescriptor>(
3578     const armnn::IWorkloadFactory& workloadFactory,
3579     const armnn::WorkloadInfo& info,
3580     const armnn::MinimumQueueDescriptor& descriptor)
3581 {
3582     return workloadFactory.CreateMinimum(descriptor, info);
3583 }
3584
3585 template<>
3586 std::unique_ptr<armnn::IWorkload> CreateWorkload<armnn::EqualQueueDescriptor>(
3587         const armnn::IWorkloadFactory& workloadFactory,
3588         const armnn::WorkloadInfo& info,
3589         const armnn::EqualQueueDescriptor& descriptor)
3590 {
3591     return workloadFactory.CreateEqual(descriptor, info);
3592 }
3593
3594 template<>
3595 std::unique_ptr<armnn::IWorkload> CreateWorkload<armnn::GreaterQueueDescriptor>(
3596         const armnn::IWorkloadFactory& workloadFactory,
3597         const armnn::WorkloadInfo& info,
3598         const armnn::GreaterQueueDescriptor& descriptor)
3599 {
3600     return workloadFactory.CreateGreater(descriptor, info);
3601 }
3602
3603 namespace {
3604
3605 template <typename Descriptor,
3606           armnn::DataType ArmnnTypeInput,
3607           armnn::DataType ArmnnTypeOutput,
3608           typename TInput = armnn::ResolveType<ArmnnTypeInput>,
3609           typename TOutput = armnn::ResolveType<ArmnnTypeOutput>>
3610 LayerTestResult<TOutput, 4> ElementwiseTestHelper(
3611     armnn::IWorkloadFactory & workloadFactory,
3612     const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
3613     const unsigned int shape0[4], std::vector<TInput> values0,
3614     const unsigned int shape1[4], std::vector<TInput> values1,
3615     const unsigned int outShape[4], std::vector<TOutput> outValues,
3616     float qScale = 0.0f, int qOffset = 0)
3617 {
3618     const uint32_t dimensionCount = 4;
3619     armnn::TensorInfo inputTensorInfo0{dimensionCount, shape0, ArmnnTypeInput};
3620     armnn::TensorInfo inputTensorInfo1{dimensionCount, shape1, ArmnnTypeInput};
3621     armnn::TensorInfo outputTensorInfo{dimensionCount, outShape, ArmnnTypeOutput};
3622
3623     auto input0 = MakeTensor<TInput, 4>(inputTensorInfo0, values0);
3624     auto input1 = MakeTensor<TInput, 4>(inputTensorInfo1, values1);
3625
3626     if (armnn::IsQuantizedType<TInput>())
3627     {
3628         inputTensorInfo0.SetQuantizationScale(qScale);
3629         inputTensorInfo0.SetQuantizationOffset(qOffset);
3630
3631         inputTensorInfo1.SetQuantizationScale(qScale);
3632         inputTensorInfo1.SetQuantizationOffset(qOffset);
3633
3634         outputTensorInfo.SetQuantizationScale(qScale);
3635         outputTensorInfo.SetQuantizationOffset(qOffset);
3636     }
3637
3638     LayerTestResult<TOutput,4> ret(outputTensorInfo);
3639
3640     if(ArmnnTypeOutput == armnn::DataType::Boolean)
3641     {
3642         ret.compareBoolean = true;
3643     }
3644
3645     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
3646     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3647     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3648
3649     Descriptor data;
3650     armnn::WorkloadInfo info;
3651     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
3652     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3653     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3654     auto workload = CreateWorkload<Descriptor>(workloadFactory, info, data);
3655
3656     inputHandle0->Allocate();
3657     inputHandle1->Allocate();
3658     outputHandle->Allocate();
3659
3660     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
3661     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3662
3663     workload->PostAllocationConfigure();
3664     ExecuteWorkload(*workload, memoryManager);
3665
3666     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
3667
3668     ret.outputExpected = MakeTensor<TOutput, 4>(outputTensorInfo, outValues);
3669     return ret;
3670 }
3671
3672 template <typename Descriptor, armnn::DataType ArmnnT, typename T = armnn::ResolveType<ArmnnT>>
3673 LayerTestResult<T, 4> ElementwiseTestHelper(
3674     armnn::IWorkloadFactory & workloadFactory,
3675     const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
3676     const unsigned int shape0[4], std::vector<T> values0,
3677     const unsigned int shape1[4], std::vector<T> values1,
3678     const unsigned int outShape[4], std::vector<T> outValues,
3679     float qScale = 0.0f, int qOffset = 0)
3680 {
3681     return ElementwiseTestHelper<Descriptor, ArmnnT, ArmnnT>
3682         (workloadFactory,
3683          memoryManager,
3684          shape0,
3685          values0,
3686          shape1,
3687          values1,
3688          outShape,
3689          outValues,
3690          qScale,
3691          qOffset);
3692 }
3693 }
3694
3695 LayerTestResult<uint8_t, 4> EqualSimpleTest(armnn::IWorkloadFactory& workloadFactory,
3696                                             const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3697 {
3698     const unsigned int width = 2;
3699     const unsigned int height = 2;
3700     const unsigned int channelCount = 2;
3701     const unsigned int batchSize = 2;
3702
3703     unsigned int shape[] = { batchSize, channelCount, height, width };
3704
3705     std::vector<float> input0({ 1, 1, 1, 1,  5, 5, 5, 5,
3706                                 3, 3, 3, 3,  4, 4, 4, 4 });
3707
3708     std::vector<float> input1({ 1, 1, 1, 1,  3, 3, 3, 3,
3709                                 5, 5, 5, 5,  4, 4, 4, 4 });
3710
3711     std::vector<uint8_t> output({ 1, 1, 1, 1,  0, 0, 0, 0,
3712                                   0, 0, 0, 0,  1, 1, 1, 1 });
3713
3714     return ElementwiseTestHelper<armnn::EqualQueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean>(
3715         workloadFactory,
3716         memoryManager,
3717         shape,
3718         input0,
3719         shape,
3720         input1,
3721         shape,
3722         output);
3723 }
3724
3725 LayerTestResult<uint8_t, 4> EqualBroadcast1ElementTest(
3726         armnn::IWorkloadFactory& workloadFactory,
3727         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3728 {
3729     unsigned int shape0[] = { 1, 2, 2, 2 };
3730     std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
3731
3732     unsigned int shape1[] = { 1, 1, 1, 1 };
3733     std::vector<float> input1({ 1 });
3734
3735     std::vector<uint8_t> output({ 1, 0, 0, 0, 0, 0, 0, 0});
3736
3737     return ElementwiseTestHelper<armnn::EqualQueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean>(
3738         workloadFactory,
3739         memoryManager,
3740         shape0,
3741         input0,
3742         shape1,
3743         input1,
3744         shape0,
3745         output);
3746 }
3747
3748 LayerTestResult<uint8_t, 4> EqualBroadcast1DVectorTest(
3749         armnn::IWorkloadFactory& workloadFactory,
3750         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3751 {
3752     const unsigned int shape0[] = { 1, 2, 2, 3 };
3753     const unsigned int shape1[] = { 1, 1, 1, 3 };
3754
3755     std::vector<float> input0({ 1, 2, 3, 4, 5, 6,
3756                                 7, 8, 9, 10, 11, 12 });
3757
3758     std::vector<float> input1({ 1, 2, 3});
3759
3760     std::vector<uint8_t> output({ 1, 1, 1, 0, 0, 0,
3761                                   0, 0, 0, 0, 0, 0 });
3762
3763     return ElementwiseTestHelper<armnn::EqualQueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean>(
3764         workloadFactory,
3765         memoryManager,
3766         shape0,
3767         input0,
3768         shape1,
3769         input1,
3770         shape0,
3771         output);
3772 }
3773
3774 LayerTestResult<uint8_t, 4> EqualUint8Test(
3775         armnn::IWorkloadFactory& workloadFactory,
3776         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3777 {
3778     unsigned int shape[] = { 2, 2, 2, 2 };
3779
3780     // See dequantized values to the right.
3781     std::vector<uint8_t> input0({ 1, 1, 1, 1, 6, 6, 6, 6,
3782                                   3, 3, 3, 3, 7, 7, 7, 7 });
3783
3784     std::vector<uint8_t> input1({ 2, 2, 2, 2, 6, 6, 6, 6,
3785                                   3, 3, 3, 3, 5, 5, 5, 5 });
3786
3787     std::vector<uint8_t> output({ 0, 0, 0, 0, 1, 1, 1, 1,
3788                                   1, 1, 1, 1, 0, 0, 0, 0 });
3789
3790     return ElementwiseTestHelper<armnn::EqualQueueDescriptor,
3791                                  armnn::DataType::QuantisedAsymm8,
3792                                  armnn::DataType::Boolean>(
3793         workloadFactory,
3794         memoryManager,
3795         shape,
3796         input0,
3797         shape,
3798         input1,
3799         shape,
3800         output,
3801         1.0f,
3802         0);
3803 }
3804
3805 LayerTestResult<uint8_t, 4> EqualBroadcast1ElementUint8Test(
3806         armnn::IWorkloadFactory& workloadFactory,
3807         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3808 {
3809     const unsigned int shape0[] = { 1, 2, 2, 3 };
3810     const unsigned int shape1[] = { 1, 1, 1, 1 };
3811
3812     std::vector<uint8_t> input0({ 1, 2, 3, 4, 5, 6,
3813                                   7, 8, 9, 10, 11, 12 });
3814
3815     std::vector<uint8_t> input1({ 1 });
3816
3817     std::vector<uint8_t> output({ 1, 0, 0, 0, 0, 0,
3818                                   0, 0, 0, 0, 0, 0 });
3819
3820     return ElementwiseTestHelper<armnn::EqualQueueDescriptor,
3821                                  armnn::DataType::QuantisedAsymm8,
3822                                  armnn::DataType::Boolean>(
3823         workloadFactory,
3824         memoryManager,
3825         shape0,
3826         input0,
3827         shape1,
3828         input1,
3829         shape0,
3830         output,
3831         1.0f,
3832         0);
3833 }
3834
3835 LayerTestResult<uint8_t, 4> EqualBroadcast1DVectorUint8Test(
3836         armnn::IWorkloadFactory& workloadFactory,
3837         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3838 {
3839     const unsigned int shape0[] = { 1, 2, 2, 3 };
3840     const unsigned int shape1[] = { 1, 1, 1, 3 };
3841
3842     std::vector<uint8_t> input0({ 1, 2, 3, 4, 5, 6,
3843                                   7, 8, 9, 10, 11, 12 });
3844
3845     std::vector<uint8_t> input1({ 1, 1, 3});
3846
3847     std::vector<uint8_t> output({ 1, 0, 1, 0, 0, 0,
3848                                   0, 0, 0, 0, 0, 0 });
3849
3850     return ElementwiseTestHelper<armnn::EqualQueueDescriptor,
3851                                  armnn::DataType::QuantisedAsymm8,
3852                                  armnn::DataType::Boolean>(
3853         workloadFactory,
3854         memoryManager,
3855         shape0,
3856         input0,
3857         shape1,
3858         input1,
3859         shape0,
3860         output,
3861         1.0f,
3862         0);
3863 }
3864
3865 LayerTestResult<uint8_t, 4> GreaterSimpleTest(armnn::IWorkloadFactory& workloadFactory,
3866                                             const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3867 {
3868     const unsigned int width = 2;
3869     const unsigned int height = 2;
3870     const unsigned int channelCount = 2;
3871     const unsigned int batchSize = 2;
3872
3873     unsigned int shape[] = { batchSize, channelCount, height, width };
3874
3875     std::vector<float> input0({ 1, 1, 1, 1,  5, 5, 5, 5,
3876                                 3, 3, 3, 3,  4, 4, 4, 4 });
3877
3878     std::vector<float> input1({ 1, 1, 1, 1,  3, 3, 3, 3,
3879                                 5, 5, 5, 5,  4, 4, 4, 4 });
3880
3881     std::vector<uint8_t> output({ 0, 0, 0, 0,  1, 1, 1, 1,
3882                                   0, 0, 0, 0,  0, 0, 0, 0 });
3883
3884     return ElementwiseTestHelper<armnn::GreaterQueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean>(
3885         workloadFactory,
3886         memoryManager,
3887         shape,
3888         input0,
3889         shape,
3890         input1,
3891         shape,
3892         output);
3893 }
3894
3895 LayerTestResult<uint8_t, 4> GreaterBroadcast1ElementTest(
3896         armnn::IWorkloadFactory& workloadFactory,
3897         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3898 {
3899     unsigned int shape0[] = { 1, 2, 2, 2 };
3900     std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
3901
3902     unsigned int shape1[] = { 1, 1, 1, 1 };
3903     std::vector<float> input1({ 1 });
3904
3905     std::vector<uint8_t> output({ 0, 1, 1, 1, 1, 1, 1, 1});
3906
3907     return ElementwiseTestHelper<armnn::GreaterQueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean>(
3908         workloadFactory,
3909         memoryManager,
3910         shape0,
3911         input0,
3912         shape1,
3913         input1,
3914         shape0,
3915         output);
3916 }
3917
3918 LayerTestResult<uint8_t, 4> GreaterBroadcast1DVectorTest(
3919         armnn::IWorkloadFactory& workloadFactory,
3920         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3921 {
3922     const unsigned int shape0[] = { 1, 2, 2, 3 };
3923     const unsigned int shape1[] = { 1, 1, 1, 3 };
3924
3925     std::vector<float> input0({ 1, 2.9f, 2.1f, 4, 5, 6,
3926                                 7, 8, 9, 10, 11, 12 });
3927
3928     std::vector<float> input1({ 1, 3, 2});
3929
3930     std::vector<uint8_t> output({ 0, 0, 1, 1, 1, 1,
3931                                   1, 1, 1, 1, 1, 1 });
3932
3933     return ElementwiseTestHelper<armnn::GreaterQueueDescriptor, armnn::DataType::Float32, armnn::DataType::Boolean>(
3934         workloadFactory,
3935         memoryManager,
3936         shape0,
3937         input0,
3938         shape1,
3939         input1,
3940         shape0,
3941         output);
3942 }
3943
3944 LayerTestResult<uint8_t, 4> GreaterUint8Test(
3945         armnn::IWorkloadFactory& workloadFactory,
3946         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3947 {
3948     unsigned int shape[] = { 2, 2, 2, 2 };
3949
3950     // See dequantized values to the right.
3951     std::vector<uint8_t> input0({ 1, 1, 1, 1, 6, 6, 6, 6,
3952                                   3, 3, 3, 3, 5, 5, 5, 5 });
3953
3954     std::vector<uint8_t> input1({ 2, 2, 2, 2, 6, 6, 6, 6,
3955                                   2, 2, 2, 2, 5, 5, 5, 5 });
3956
3957     std::vector<uint8_t> output({ 0, 0, 0, 0, 0, 0, 0, 0,
3958                                   1, 1, 1, 1, 0, 0, 0, 0 });
3959
3960     return ElementwiseTestHelper<armnn::GreaterQueueDescriptor,
3961                                  armnn::DataType::QuantisedAsymm8,
3962                                  armnn::DataType::Boolean>(
3963         workloadFactory,
3964         memoryManager,
3965         shape,
3966         input0,
3967         shape,
3968         input1,
3969         shape,
3970         output,
3971         1.0f,
3972         0);
3973 }
3974
3975 LayerTestResult<uint8_t, 4> GreaterBroadcast1ElementUint8Test(
3976         armnn::IWorkloadFactory& workloadFactory,
3977         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
3978 {
3979     const unsigned int shape0[] = { 1, 2, 2, 3 };
3980     const unsigned int shape1[] = { 1, 1, 1, 1 };
3981
3982     std::vector<uint8_t> input0({ 1, 2, 3, 4, 5, 6,
3983                                   7, 8, 9, 10, 11, 12 });
3984
3985     std::vector<uint8_t> input1({ 1 });
3986
3987     std::vector<uint8_t> output({ 0, 1, 1, 1, 1, 1,
3988                                   1, 1, 1, 1, 1, 1 });
3989
3990     return ElementwiseTestHelper<armnn::GreaterQueueDescriptor,
3991                                  armnn::DataType::QuantisedAsymm8,
3992                                  armnn::DataType::Boolean>(
3993         workloadFactory,
3994         memoryManager,
3995         shape0,
3996         input0,
3997         shape1,
3998         input1,
3999         shape0,
4000         output,
4001         1.0f,
4002         0);
4003 }
4004
4005 LayerTestResult<uint8_t, 4> GreaterBroadcast1DVectorUint8Test(
4006         armnn::IWorkloadFactory& workloadFactory,
4007         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4008 {
4009     const unsigned int shape0[] = { 1, 2, 2, 3 };
4010     const unsigned int shape1[] = { 1, 1, 1, 3 };
4011
4012     std::vector<uint8_t> input0({ 1, 2, 3, 4, 5, 6,
4013                                   7, 8, 9, 10, 11, 12 });
4014
4015     std::vector<uint8_t> input1({ 1, 1, 3});
4016
4017     std::vector<uint8_t> output({ 0, 1, 0, 1, 1, 1,
4018                                   1, 1, 1, 1, 1, 1 });
4019
4020     return ElementwiseTestHelper<armnn::GreaterQueueDescriptor,
4021                                  armnn::DataType::QuantisedAsymm8,
4022                                  armnn::DataType::Boolean>(
4023         workloadFactory,
4024         memoryManager,
4025         shape0,
4026         input0,
4027         shape1,
4028         input1,
4029         shape0,
4030         output,
4031         1.0f,
4032         0);
4033 }
4034
4035 LayerTestResult<float, 4> MaximumSimpleTest(armnn::IWorkloadFactory& workloadFactory,
4036                                            const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4037 {
4038     const unsigned int width = 2;
4039     const unsigned int height = 2;
4040     const unsigned int channelCount = 2;
4041     const unsigned int batchSize = 2;
4042
4043     unsigned int shape[] = { batchSize, channelCount, height, width };
4044
4045     std::vector<float> input0({ 1, 1, 1, 1,  5, 5, 5, 5,
4046                                 3, 3, 3, 3,  4, 4, 4, 4 });
4047
4048     std::vector<float> input1({ 2, 2, 2, 2,  3, 3, 3, 3,
4049                                 4, 4, 4, 4,  5, 5, 5, 5 });
4050
4051     std::vector<float> output({ 2, 2, 2, 2,  5, 5, 5, 5,
4052                                 4, 4, 4, 4,  5, 5, 5, 5 });
4053
4054     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::Float32>(
4055         workloadFactory,
4056         memoryManager,
4057         shape,
4058         input0,
4059         shape,
4060         input1,
4061         shape,
4062         output);
4063 }
4064
4065 LayerTestResult<float, 4> MaximumBroadcast1ElementTest(
4066         armnn::IWorkloadFactory& workloadFactory,
4067         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4068 {
4069     unsigned int shape0[] = { 1, 2, 2, 2 };
4070     std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
4071
4072     unsigned int shape1[] = { 1, 1, 1, 1 };
4073     std::vector<float> input1({ 2 });
4074
4075     std::vector<float> output({ 2, 2, 3, 4, 5, 6, 7, 8});
4076
4077     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::Float32>(
4078         workloadFactory,
4079         memoryManager,
4080         shape0,
4081         input0,
4082         shape1,
4083         input1,
4084         shape0,
4085         output);
4086 }
4087
4088 LayerTestResult<float, 4> MaximumBroadcast1DVectorTest(
4089         armnn::IWorkloadFactory& workloadFactory,
4090         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4091 {
4092     const unsigned int shape0[] = { 1, 2, 2, 3 };
4093     const unsigned int shape1[] = { 1, 1, 1, 3 };
4094
4095     std::vector<float> input0({ 1, 2, 3, 4, 5, 6,
4096                                   7, 8, 9, 10, 11, 12 });
4097
4098     std::vector<float> input1({ 1, 2, 3});
4099
4100     std::vector<float> output({ 1, 2, 3, 4, 5, 6,
4101                                 7, 8, 9, 10, 11, 12 });
4102
4103     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::Float32>(
4104         workloadFactory,
4105         memoryManager,
4106         shape0,
4107         input0,
4108         shape1,
4109         input1,
4110         shape0,
4111         output);
4112 }
4113
4114 LayerTestResult<uint8_t, 4> MaximumUint8Test(
4115         armnn::IWorkloadFactory& workloadFactory,
4116         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4117 {
4118     unsigned int shape[] = { 2, 2, 2, 2 };
4119
4120     // See dequantized values to the right.
4121     std::vector<uint8_t> input0({ 1, 1, 1, 1, 6, 6, 6, 6,
4122                                   3, 3, 3, 3, 4, 4, 4, 4 });
4123
4124     std::vector<uint8_t> input1({ 2, 2, 2, 2, 3, 3, 3, 3,
4125                                   4, 4, 4, 4, 5, 5, 5, 5 });
4126
4127     std::vector<uint8_t> output({ 2, 2, 2, 2, 6, 6, 6, 6,
4128                                   4, 4, 4, 4, 5, 5, 5, 5 });
4129
4130     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::QuantisedAsymm8>(
4131         workloadFactory,
4132         memoryManager,
4133         shape,
4134         input0,
4135         shape,
4136         input1,
4137         shape,
4138         output,
4139         1.0f,
4140         0);
4141 }
4142
4143 LayerTestResult<uint8_t, 4> MaximumBroadcast1ElementUint8Test(
4144         armnn::IWorkloadFactory& workloadFactory,
4145         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4146 {
4147     const unsigned int shape0[] = { 1, 2, 2, 3 };
4148     const unsigned int shape1[] = { 1, 1, 1, 1 };
4149
4150     std::vector<uint8_t> input0({ 1, 2, 3, 4,  5, 6,
4151                                   7, 8, 9, 10, 11, 12 });
4152
4153     std::vector<uint8_t> input1({2});
4154
4155     std::vector<uint8_t> output({ 2, 2, 3, 4, 5, 6,
4156                                   7, 8, 9, 10, 11, 12 });
4157
4158     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::QuantisedAsymm8>(
4159         workloadFactory,
4160         memoryManager,
4161         shape0,
4162         input0,
4163         shape1,
4164         input1,
4165         shape0,
4166         output,
4167         1.0f,
4168         0);
4169 }
4170
4171 LayerTestResult<uint8_t, 4> MaximumBroadcast1DVectorUint8Test(
4172         armnn::IWorkloadFactory& workloadFactory,
4173         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4174 {
4175     const unsigned int shape0[] = { 1, 2, 2, 3 };
4176     const unsigned int shape1[] = { 1, 1, 1, 3 };
4177
4178     std::vector<uint8_t> input0({ 1, 2, 3, 4, 5, 6,
4179                                   7, 8, 9, 10, 11, 12 });
4180
4181     std::vector<uint8_t> input1({ 1, 10, 3});
4182
4183     std::vector<uint8_t> output({ 1, 10, 3, 4, 10, 6,
4184                                   7, 10, 9, 10, 11, 12 });
4185
4186     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::QuantisedAsymm8>(
4187         workloadFactory,
4188         memoryManager,
4189         shape0,
4190         input0,
4191         shape1,
4192         input1,
4193         shape0,
4194         output,
4195         1.0f,
4196         0);
4197 }
4198
4199 LayerTestResult<int16_t, 4> MaximumInt16Test(
4200     armnn::IWorkloadFactory& workloadFactory,
4201     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4202 {
4203     unsigned int shape[] = { 2, 2, 2, 2 };
4204
4205     std::vector<int16_t> input0({ 1, 1, 1, 1, 6, 6, 6, 6,
4206                                   3, 3, 3, 3, 4, 4, 4, 4 });
4207
4208     std::vector<int16_t> input1({ 2, 2, 2, 2, 3, 3, 3, 3,
4209                                   4, 4, 4, 4, 5, 5, 5, 5 });
4210
4211     std::vector<int16_t> output({ 2, 2, 2, 2, 6, 6, 6, 6,
4212                                   4, 4, 4, 4, 5, 5, 5, 5 });
4213
4214     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::QuantisedSymm16>(
4215         workloadFactory,
4216         memoryManager,
4217         shape,
4218         input0,
4219         shape,
4220         input1,
4221         shape,
4222         output,
4223         1.0f,
4224         0);
4225 }
4226
4227 LayerTestResult<int16_t, 4> MaximumBroadcast1ElementInt16Test(
4228     armnn::IWorkloadFactory& workloadFactory,
4229     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4230 {
4231     const unsigned int shape0[] = { 1, 2, 2, 3 };
4232     const unsigned int shape1[] = { 1, 1, 1, 1 };
4233
4234     std::vector<int16_t> input0({ 1, 2, 3, 4,  5, 6,
4235                                   7, 8, 9, 10, 11, 12 });
4236
4237     std::vector<int16_t> input1({2});
4238
4239     std::vector<int16_t> output({ 2, 2, 3, 4, 5, 6,
4240                                   7, 8, 9, 10, 11, 12 });
4241
4242     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::QuantisedSymm16>(
4243         workloadFactory,
4244         memoryManager,
4245         shape0,
4246         input0,
4247         shape1,
4248         input1,
4249         shape0,
4250         output,
4251         1.0f,
4252         0);
4253 }
4254
4255 LayerTestResult<int16_t, 4> MaximumBroadcast1DVectorInt16Test(
4256     armnn::IWorkloadFactory& workloadFactory,
4257     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4258 {
4259     const unsigned int shape0[] = { 1, 2, 2, 3 };
4260     const unsigned int shape1[] = { 1, 1, 1, 3 };
4261
4262     std::vector<int16_t> input0({ 1, 2, 3, 4, 5, 6,
4263                                   7, 8, 9, 10, 11, 12 });
4264
4265     std::vector<int16_t> input1({ 1, 10, 3});
4266
4267     std::vector<int16_t> output({ 1, 10, 3, 4, 10, 6,
4268                                   7, 10, 9, 10, 11, 12 });
4269
4270     return ElementwiseTestHelper<armnn::MaximumQueueDescriptor, armnn::DataType::QuantisedSymm16>(
4271         workloadFactory,
4272         memoryManager,
4273         shape0,
4274         input0,
4275         shape1,
4276         input1,
4277         shape0,
4278         output,
4279         1.0f,
4280         0);
4281 }
4282
4283 LayerTestResult<float, 4> MinimumBroadcast1ElementTest1(
4284     armnn::IWorkloadFactory& workloadFactory,
4285     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4286 {
4287     unsigned int shape0[] = { 1, 2, 2, 2 };
4288     std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
4289
4290     unsigned int shape1[] = { 1, 1, 1, 1 };
4291     std::vector<float> input1({ 2 });
4292
4293     std::vector<float> output({ 1, 2, 2, 2, 2, 2, 2, 2});
4294
4295     return ElementwiseTestHelper<armnn::MinimumQueueDescriptor, armnn::DataType::Float32>(
4296         workloadFactory,
4297         memoryManager,
4298         shape0,
4299         input0,
4300         shape1,
4301         input1,
4302         shape0,
4303         output);
4304 }
4305
4306
4307 LayerTestResult<float, 4> MinimumBroadcast1ElementTest2(
4308     armnn::IWorkloadFactory& workloadFactory,
4309     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4310 {
4311     unsigned int shape0[] = { 1, 2, 2, 2 };
4312     std::vector<float> input0({ 1, 6, 3, 2, 8, 9, 1, 10});
4313
4314     unsigned int shape1[] = { 1, 1, 1, 1 };
4315     std::vector<float> input1({ 5 });
4316
4317     std::vector<float> output({ 1, 5, 3, 2, 5, 5, 1, 5});
4318
4319     return ElementwiseTestHelper<armnn::MinimumQueueDescriptor, armnn::DataType::Float32>(
4320         workloadFactory,
4321         memoryManager,
4322         shape0,
4323         input0,
4324         shape1,
4325         input1,
4326         shape0,
4327         output);
4328 }
4329
4330 LayerTestResult<uint8_t, 4> MinimumBroadcast1DVectorUint8Test(
4331     armnn::IWorkloadFactory & workloadFactory,
4332     const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager)
4333 {
4334     const unsigned int shape0[] = { 1, 2, 2, 3 };
4335     const unsigned int shape1[] = { 1, 1, 1, 3 };
4336
4337     std::vector<uint8_t> input0({ 1, 2, 3, 3, 2, 1,
4338                                   7, 1, 2, 3, 4, 5 });
4339
4340     std::vector<uint8_t> input1({ 1, 2, 3});
4341
4342     std::vector<uint8_t> output({ 1, 2, 3, 1, 2, 1,
4343                                   1, 1, 2, 1, 2, 3 });
4344
4345     return ElementwiseTestHelper<armnn::MinimumQueueDescriptor, armnn::DataType::QuantisedAsymm8>(
4346         workloadFactory,
4347         memoryManager,
4348         shape0,
4349         input0,
4350         shape1,
4351         input1,
4352         shape0,
4353         output,
4354         1.0f,
4355         0);
4356 }
4357
4358 LayerTestResult<int16_t, 4> MinimumInt16Test(
4359     armnn::IWorkloadFactory& workloadFactory,
4360     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4361 {
4362     unsigned int shape[] = { 2, 2, 2, 2 };
4363
4364     std::vector<int16_t> input0({ 1, 1, 1, 1, 6, 6, 6, 6,
4365                                   3, 3, 3, 3, 4, 4, 4, 4 });
4366
4367     std::vector<int16_t> input1({ 2, 2, 2, 2, 3, 3, 3, 3,
4368                                   4, 4, 4, 4, 5, 5, 5, 5 });
4369
4370     std::vector<int16_t> output({ 1, 1, 1, 1, 3, 3, 3, 3,
4371                                   3, 3, 3, 3, 4, 4, 4, 4 });
4372
4373     return ElementwiseTestHelper<armnn::MinimumQueueDescriptor, armnn::DataType::QuantisedSymm16>(
4374         workloadFactory,
4375         memoryManager,
4376         shape,
4377         input0,
4378         shape,
4379         input1,
4380         shape,
4381         output,
4382         1.0f,
4383         0);
4384 }
4385
4386 LayerTestResult<int16_t, 4> MinimumBroadcast1ElementInt16Test(
4387     armnn::IWorkloadFactory& workloadFactory,
4388     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4389 {
4390     const unsigned int shape0[] = { 1, 2, 2, 3 };
4391     const unsigned int shape1[] = { 1, 1, 1, 1 };
4392
4393     std::vector<int16_t> input0({ 1, 2, 3, 4,  5, 6,
4394                                   7, 8, 9, 10, 11, 12 });
4395
4396     std::vector<int16_t> input1({2});
4397
4398     std::vector<int16_t> output({ 1, 2, 2, 2, 2, 2,
4399                                   2, 2, 2, 2, 2, 2 });
4400
4401     return ElementwiseTestHelper<armnn::MinimumQueueDescriptor, armnn::DataType::QuantisedSymm16>(
4402         workloadFactory,
4403         memoryManager,
4404         shape0,
4405         input0,
4406         shape1,
4407         input1,
4408         shape0,
4409         output,
4410         1.0f,
4411         0);
4412 }
4413
4414 LayerTestResult<int16_t, 4> MinimumBroadcast1DVectorInt16Test(
4415     armnn::IWorkloadFactory& workloadFactory,
4416     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4417 {
4418     const unsigned int shape0[] = { 1, 2, 2, 3 };
4419     const unsigned int shape1[] = { 1, 1, 1, 3 };
4420
4421     std::vector<int16_t> input0({ 1, 2, 3, 4, 5, 6,
4422                                   7, 8, 9, 10, 11, 12 });
4423
4424     std::vector<int16_t> input1({ 1, 10, 3});
4425
4426     std::vector<int16_t> output({ 1, 2, 3, 1, 5, 3,
4427                                   1, 8, 3, 1, 10, 3 });
4428
4429     return ElementwiseTestHelper<armnn::MinimumQueueDescriptor, armnn::DataType::QuantisedSymm16>(
4430         workloadFactory,
4431         memoryManager,
4432         shape0,
4433         input0,
4434         shape1,
4435         input1,
4436         shape0,
4437         output,
4438         1.0f,
4439         0);
4440 }
4441
4442 namespace {
4443 template<std::size_t NumDims>
4444 LayerTestResult<float,NumDims> MultiplicationTestHelper(
4445     armnn::IWorkloadFactory& workloadFactory,
4446     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
4447     const unsigned int shape0[NumDims],
4448     const std::vector<float> & values0,
4449     const unsigned int shape1[NumDims],
4450     const std::vector<float> & values1,
4451     const unsigned int outShape[NumDims],
4452     const std::vector<float> & outValues)
4453 {
4454     armnn::TensorInfo inputTensorInfo0{NumDims, shape0, armnn::DataType::Float32};
4455     armnn::TensorInfo inputTensorInfo1{NumDims, shape1, armnn::DataType::Float32};
4456     armnn::TensorInfo outputTensorInfo{NumDims, outShape, armnn::DataType::Float32};
4457
4458     auto input0 = MakeTensor<float, NumDims>(inputTensorInfo0, values0);
4459     auto input1 = MakeTensor<float, NumDims>(inputTensorInfo1, values1);
4460
4461     LayerTestResult<float,NumDims> ret(outputTensorInfo);
4462
4463     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
4464     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
4465     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4466
4467     armnn::MultiplicationQueueDescriptor data;
4468     armnn::WorkloadInfo info;
4469     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
4470     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
4471     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
4472
4473     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
4474
4475     inputHandle0->Allocate();
4476     inputHandle1->Allocate();
4477     outputHandle->Allocate();
4478
4479     CopyDataToITensorHandle(inputHandle0.get(), input0.origin());
4480     CopyDataToITensorHandle(inputHandle1.get(), input1.origin());
4481
4482     workload->PostAllocationConfigure();
4483     workload->Execute();
4484
4485     CopyDataFromITensorHandle(ret.output.origin(), outputHandle.get());
4486
4487     ret.outputExpected = MakeTensor<float, NumDims>(outputTensorInfo, outValues);
4488     return ret;
4489 }
4490 } // anonymous namespace
4491
4492
4493 LayerTestResult<float,4> MultiplicationTest(
4494     armnn::IWorkloadFactory& workloadFactory,
4495     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4496 {
4497     const unsigned int width = 2;
4498     const unsigned int height = 2;
4499     const unsigned int channelCount = 2;
4500     const unsigned int batchSize = 2;
4501
4502     unsigned int shape[] = { batchSize, channelCount, height, width };
4503
4504     std::vector<float> input0({
4505         1,  1,  1,  1,    2,  2,  2,  2,
4506         3,  3,  3,  3,    4,  4,  4,  4 });
4507
4508     std::vector<float> input1({
4509         2,  2,  2,  2,    3,  3,  3,  3,
4510         4,  4,  4,  4,    5,  5,  5,  5 });
4511
4512     std::vector<float> output({
4513         2,  2,  2,  2,    6,  6,  6,  6,
4514         12, 12, 12, 12,  20, 20, 20, 20 });
4515
4516     return MultiplicationTestHelper<4>(workloadFactory,
4517                                        memoryManager,
4518                                        shape,
4519                                        input0,
4520                                        shape,
4521                                        input1,
4522                                        shape,
4523                                        output);
4524 }
4525
4526 LayerTestResult<float,5> Multiplication5dTest(
4527     armnn::IWorkloadFactory& workloadFactory,
4528     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4529 {
4530     const unsigned int width = 3;
4531     const unsigned int height = 2;
4532     const unsigned int channelCount = 2;
4533     const unsigned int batchSize = 2;
4534     const unsigned int depth = 2;
4535
4536     unsigned int shape[] = { depth, batchSize, channelCount, height, width };
4537
4538     std::vector<float> input0({
4539         1.80f, 0.20f, 2.30f,  1.30f, 2.10f, 1.00f,
4540         2.60f, 0.60f, 2.10f,  2.30f, 2.30f, 2.00f,
4541
4542         2.50f, 1.00f, 2.90f,  3.10f, 1.50f, 2.40f,
4543         2.80f, 1.10f, 1.00f,  3.20f, 1.00f, 2.30f,
4544
4545
4546         0.30f, 2.20f, 1.00f,  0.20f, 1.60f, 1.40f,
4547         0.80f, 3.20f, 0.10f,  0.10f, 3.10f, 2.10f,
4548
4549         1.50f, 2.40f, 1.40f,  0.70f, 2.40f, 1.40f,
4550         1.60f, 1.20f, 1.90f,  0.80f, 0.00f, 0.10f,
4551     });
4552
4553     std::vector<float> input1({
4554         0.70f, 1.00f, 2.90f,  2.20f, 3.10f, 2.80f,
4555         1.80f, 2.00f, 0.50f,  2.30f, 1.20f, 2.70f,
4556
4557         2.40f, 0.20f, 3.20f,  1.60f, 0.20f, 2.50f,
4558         2.30f, 0.70f, 2.70f,  1.80f, 2.90f, 2.70f,
4559
4560
4561         3.20f, 3.20f, 0.70f,  1.90f, 2.70f, 2.50f,
4562         2.40f, 0.90f, 2.30f,  1.80f, 2.50f, 2.00f,
4563
4564         1.60f, 2.20f, 1.60f,  2.00f, 0.30f, 3.20f,
4565         0.40f, 3.00f, 2.60f,  0.30f, 0.00f, 2.50f,
4566     });
4567
4568     std::vector<float> output({
4569         1.26f, 0.20f, 6.67f,  2.86f, 6.51f, 2.80f,
4570         4.68f, 1.20f, 1.05f,  5.29f, 2.76f, 5.40f,
4571
4572         6.00f, 0.20f, 9.28f,  4.96f, 0.30f, 6.00f,
4573         6.44f, 0.77f, 2.70f,  5.76f, 2.90f, 6.21f,
4574
4575
4576         0.96f, 7.04f, 0.70f,  0.38f, 4.32f, 3.50f,
4577         1.92f, 2.88f, 0.23f,  0.18f, 7.75f, 4.20f,
4578
4579         2.40f, 5.28f, 2.24f,  1.40f, 0.72f, 4.48f,
4580         0.64f, 3.60f, 4.94f,  0.24f, 0.00f, 0.25f,
4581     });
4582
4583     return MultiplicationTestHelper<5>(workloadFactory,
4584                                        memoryManager,
4585                                        shape,
4586                                        input0,
4587                                        shape,
4588                                        input1,
4589                                        shape,
4590                                        output);
4591 }
4592
4593 LayerTestResult<float, 4> MultiplicationBroadcast1ElementTest(
4594     armnn::IWorkloadFactory& workloadFactory,
4595     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4596 {
4597     unsigned int shape0[] = { 1, 2, 2, 2 };
4598     std::vector<float> input0({ 1, 2, 3, 4, 5, 6, 7, 8});
4599
4600     unsigned int shape1[] = { 1, 1, 1, 1 };
4601     std::vector<float> input1({ 2 });
4602
4603     std::vector<float> output({ 2, 4, 6, 8, 10, 12, 14, 16});
4604
4605     return MultiplicationTestHelper<4>(workloadFactory,
4606                                        memoryManager,
4607                                        shape0,
4608                                        input0,
4609                                        shape1,
4610                                        input1,
4611                                        shape0,
4612                                        output);
4613 }
4614
4615 LayerTestResult<float, 4> MultiplicationBroadcast1DVectorTest(
4616     armnn::IWorkloadFactory& workloadFactory,
4617     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
4618 {
4619     unsigned int shape0[] = { 1, 3, 3, 2 };
4620     std::vector<float> input0({
4621         1,   2,      3,  4,      5,  6,
4622         7,   8,      9, 10,     11, 12,
4623         13, 14,     15, 16,     17, 18});
4624
4625     unsigned int shape1[] = { 1, 1, 1, 2 };
4626     std::vector<float> input1({ 1, 2 });
4627
4628     std::vector<float> output({
4629         1,   4,       3,  8,      5, 12,
4630         7,   16,      9, 20,     11, 24,
4631         13,  28,     15, 32,     17, 36});
4632
4633     return MultiplicationTestHelper<4>(workloadFactory,
4634                                        memoryManager,
4635                                        shape0,
4636                                        input0,
4637                                        shape1,
4638                                        input1,
4639                                        shape0,
4640                                        output);
4641 }
4642
4643 LayerTestResult<float,4> CompareMultiplicationTest(
4644     armnn::IWorkloadFactory& workloadFactory,
4645     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
4646     armnn::IWorkloadFactory& refWorkloadFactory)
4647 {
4648     const unsigned int width = 16;
4649     const unsigned int height = 32;
4650     const unsigned int channelCount = 2;
4651     const unsigned int batchSize = 5;
4652
4653     armnn::TensorInfo inputTensorInfo0;
4654     armnn::TensorInfo inputTensorInfo1;
4655     armnn::TensorInfo outputTensorInfo;
4656
4657     constexpr unsigned int shape[] = { batchSize, channelCount, height, width };
4658
4659     inputTensorInfo0 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
4660     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
4661     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
4662
4663     LayerTestResult<float,4> comparisonResult(outputTensorInfo);
4664
4665     auto input0 = MakeRandomTensor<float, 4>(inputTensorInfo0, 803506992);
4666     auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 54902257);
4667
4668     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
4669     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
4670     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4671
4672     std::unique_ptr<armnn::ITensorHandle> inputHandle0Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo0);
4673     std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1);
4674     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
4675
4676     armnn::MultiplicationQueueDescriptor data;
4677     armnn::WorkloadInfo info;
4678     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
4679     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
4680     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
4681
4682     armnn::MultiplicationQueueDescriptor refData = data;
4683     armnn::WorkloadInfo refInfo = info;
4684     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo0, inputHandle0Ref.get());
4685     SetWorkloadInput(refData, refInfo, 1, inputTensorInfo1, inputHandle1Ref.get());
4686     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
4687
4688     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
4689     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateMultiplication(refData, refInfo);
4690
4691     inputHandle0->Allocate();
4692     inputHandle1->Allocate();
4693     outputHandle->Allocate();
4694     inputHandle0Ref->Allocate();
4695     inputHandle1Ref->Allocate();
4696     outputHandleRef->Allocate();
4697
4698     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
4699     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
4700     CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]);
4701     CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
4702
4703     workload->PostAllocationConfigure();
4704     workload->Execute();
4705     workloadRef->PostAllocationConfigure();
4706     workloadRef->Execute();
4707     CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get());
4708     CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get());
4709
4710     return comparisonResult;
4711 }
4712
4713 LayerTestResult<float,4> CompareBatchNormTest(
4714     armnn::IWorkloadFactory& workloadFactory,
4715     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
4716     armnn::IWorkloadFactory& refWorkloadFactory)
4717 {
4718     const unsigned int width     = 2;
4719     const unsigned int height    = 3;
4720     const unsigned int channels  = 5;
4721     const unsigned int batchSize = 3;
4722
4723     armnn::TensorInfo inputTensorInfo;
4724     armnn::TensorInfo outputTensorInfo;
4725     armnn::TensorInfo tensorInfo;
4726
4727     constexpr unsigned int shape[]       = {batchSize, channels, height, width};
4728     constexpr unsigned int tensorShape[] = {channels};
4729
4730     inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
4731     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
4732     tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32);
4733
4734     auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312);
4735
4736     auto mean     = MakeRandomTensor<float, 1>(tensorInfo, 123);
4737     auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f);
4738     auto beta     = MakeRandomTensor<float, 1>(tensorInfo, 123);
4739     auto gamma    = MakeRandomTensor<float, 1>(tensorInfo, 345);
4740
4741     LayerTestResult<float,4> ret(outputTensorInfo);
4742
4743     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
4744     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4745
4746     std::unique_ptr<armnn::ITensorHandle> inputHandleRef  = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
4747     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
4748
4749     armnn::BatchNormalizationQueueDescriptor data;
4750     armnn::WorkloadInfo info;
4751     armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
4752     armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
4753     armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
4754     armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
4755
4756     AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
4757     AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
4758     AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
4759     AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
4760
4761     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
4762     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
4763     data.m_Mean             = &meanTensor;
4764     data.m_Variance         = &varianceTensor;
4765     data.m_Beta             = &betaTensor;
4766     data.m_Gamma            = &gammaTensor;
4767     data.m_Parameters.m_Eps = 0.01f;
4768
4769     armnn::BatchNormalizationQueueDescriptor refData = data;
4770     armnn::WorkloadInfo refInfo = info;
4771     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
4772     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
4773
4774     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
4775     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo);
4776
4777     inputHandle->Allocate();
4778     outputHandle->Allocate();
4779     inputHandleRef->Allocate();
4780     outputHandleRef->Allocate();
4781
4782     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
4783     CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
4784
4785     workload->PostAllocationConfigure();
4786     workload->Execute();
4787     workloadRef->PostAllocationConfigure();
4788     workloadRef->Execute();
4789
4790     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
4791     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
4792
4793     return ret;
4794 }
4795
4796 template<typename T>
4797 void PermuteTensorData(
4798         armnn::IWorkloadFactory& workloadFactory,
4799         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
4800         const armnn::PermutationVector& mappings,
4801         armnn::TensorInfo & inputTensorInfo,
4802         const T * inputData,
4803         std::vector<T>& outputData)
4804 {
4805     BOOST_ASSERT_MSG(inputData != nullptr, "inputData must not be null");
4806     if (inputData == nullptr)
4807     {
4808         // Nullptr is an error in the test. By returning without doing the concatenation
4809         // I expect the caller to fail the test. It still makes sense to report this as
4810         // an assert for Debug builds.
4811         return;
4812     }
4813
4814     armnn::TensorInfo outputTensorInfo = armnnUtils::Permuted(inputTensorInfo, mappings);
4815
4816     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
4817     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
4818
4819     armnn::PermuteQueueDescriptor queueDescriptor;
4820     queueDescriptor.m_Parameters = armnn::PermuteDescriptor{mappings};
4821     armnn::WorkloadInfo workloadInfo;
4822     AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfo, inputHandle.get());
4823     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
4824
4825     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePermute(queueDescriptor, workloadInfo);
4826
4827     inputHandle->Allocate();
4828     outputHandle->Allocate();
4829
4830     CopyDataToITensorHandle(inputHandle.get(), inputData);
4831
4832     workload->PostAllocationConfigure();
4833     workload->Execute();
4834
4835     outputData.resize(outputTensorInfo.GetNumElements());
4836     CopyDataFromITensorHandle(&outputData[0], outputHandle.get());
4837     inputTensorInfo = outputTensorInfo;
4838 }
4839
4840 armnn::OriginsDescriptor CreateDescriptorForConcatenation(
4841         const std::vector<armnn::TensorInfo> & inputTensorInfos,
4842         unsigned int concatDim)
4843 {
4844     std::vector<armnn::TensorShape> shapes;
4845     shapes.reserve(inputTensorInfos.size());
4846     for (const armnn::TensorInfo& it: inputTensorInfos)
4847     {
4848         shapes.push_back(it.GetShape());
4849     }
4850
4851     return armnn::CreateDescriptorForConcatenation(shapes.begin(),
4852                                                    shapes.end(),
4853                                                    concatDim);
4854 }
4855
4856 //
4857 // Concatenation is only supported for N and C dimensions for NCHW and the inner most dimension
4858 // In case of <4 dimensions we need to make sure that the concat dimensions are at least
4859 // the 3rd slowest iterating one or the inner most dimension.
4860 //
4861
4862 bool NeedPermuteForConcat(
4863         const std::vector<armnn::TensorInfo> & inputTensorInfos,
4864         unsigned int concatDim)
4865 {
4866     // See note above. Additionally we expect the input shapes to have the
4867     // same number of dimensions.
4868     unsigned int nDimensions = 0;
4869
4870     // Determine the number of dimensions as well as sanity check them
4871     // agains test implementation issues.
4872     for (auto && tensorInfo : inputTensorInfos)
4873     {
4874         if (!nDimensions)
4875         {
4876             nDimensions = tensorInfo.GetShape().GetNumDimensions();
4877         }
4878         else
4879         {
4880             BOOST_ASSERT_MSG(nDimensions == tensorInfo.GetShape().GetNumDimensions(),
4881                 "Input shapes must have the same number of dimensions");
4882         }
4883     }
4884
4885     return (nDimensions < 3 || (nDimensions == 3 && (nDimensions-concatDim) < 3 && (nDimensions-concatDim) != 1));
4886 }
4887
4888 armnn::TensorShape ExpandTensorShapeTo3dForPermute(const armnn::TensorShape & inputShape)
4889 {
4890     unsigned int numDims = inputShape.GetNumDimensions();
4891     if (numDims >= 3)
4892     {
4893         // Nothing to do if the inputShape has at least 3 dimensions.
4894         return inputShape;
4895     }
4896
4897     std::vector<unsigned int> newDims(size_t(3), 1u);
4898     unsigned int expandedBy = 3 - numDims;
4899     for (unsigned int i=0; i<numDims; ++i)
4900     {
4901         newDims[expandedBy+i] = inputShape[i];
4902     }
4903     return armnn::TensorShape(3u, &newDims[0]);
4904 }
4905
4906 void Generate3dPermuteVectorForConcat(
4907         unsigned int numDimensions,
4908         unsigned int & concatDim,
4909         std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutations)
4910 {
4911     BOOST_ASSERT_MSG(numDimensions <= 3,
4912        "Only dimensions 1,2 and 3 are supported by this helper");
4913     unsigned int expandedBy = 3 - numDimensions;
4914     unsigned int expandedConcatAxis = concatDim + expandedBy;
4915
4916     if (expandedConcatAxis == 2)
4917     {
4918         concatDim = 0;
4919         armnn::PermutationVector forwardPermutation({1, 2, 0});
4920         armnn::PermutationVector reversePermutation({2, 0, 1});
4921         permutations = std::make_pair(forwardPermutation, reversePermutation);
4922     }
4923     else if (expandedConcatAxis == 1)
4924     {
4925         concatDim = 0;
4926         armnn::PermutationVector forwardPermutation({2, 0, 1});
4927         armnn::PermutationVector reversePermutation({1, 2, 0});
4928         permutations = std::make_pair(forwardPermutation, reversePermutation);
4929     }
4930     else
4931     {
4932         BOOST_ASSERT(expandedConcatAxis == 0);
4933         concatDim = 0;
4934     }
4935 }
4936
4937 //
4938 // Permute the input tensors so we can do a supported concatenation.
4939 // Also treat lower than 3d tensors as 3d by adding dummy 1 dimensions
4940 // at the front. Finally this function tells what the output shape
4941 // of the permuted concatenated tensor is going to be.
4942 //
4943 template <typename T>
4944 void PermuteInputsForConcat(
4945         armnn::IWorkloadFactory& workloadFactory,
4946         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
4947         std::vector<armnn::TensorInfo> & inputTensorInfos,
4948         std::vector<T *> & inputData,
4949         std::vector<std::vector<T>> & inputDataStorage,
4950         armnn::PermutationVector & permuteVector,
4951         unsigned int & concatDim,
4952         armnn::TensorInfo & outputTensorInfo)
4953 {
4954     BOOST_ASSERT_MSG(inputTensorInfos.size() > 1,
4955         "Expecting more than one tensor to be concatenated here");
4956
4957     unsigned int numDims = 0;
4958     unsigned int nthInput = 0;
4959     const armnn::PermutationVector identity({0, 1, 2});
4960
4961     std::pair<armnn::PermutationVector, armnn::PermutationVector> permutations =
4962         std::make_pair(identity, identity);
4963
4964     inputDataStorage.resize(inputData.size());
4965
4966     for (auto && tensorInfo : inputTensorInfos)
4967     {
4968         if (numDims == 0)
4969         {
4970             numDims = tensorInfo.GetShape().GetNumDimensions();
4971             Generate3dPermuteVectorForConcat(numDims, concatDim, permutations);
4972
4973             // Store the reverese permutation.
4974             permuteVector = permutations.second;
4975             BOOST_ASSERT_MSG(!permuteVector.IsEqual(identity),
4976                 "Test logic error, we don't need permutation, so we shouldn't arrive here");
4977         }
4978         else
4979         {
4980             BOOST_ASSERT_MSG(numDims == tensorInfo.GetShape().GetNumDimensions(),
4981                 "All inputs must have the same number of dimensions");
4982         }
4983
4984         armnn::TensorInfo newTensorInfo = tensorInfo;
4985         newTensorInfo.SetShape(ExpandTensorShapeTo3dForPermute(tensorInfo.GetShape()));
4986
4987         PermuteTensorData<T>(workloadFactory,
4988                              memoryManager,
4989                              permutations.first,
4990                              newTensorInfo,
4991                              inputData[nthInput],
4992                              inputDataStorage[nthInput]);
4993
4994         inputData[nthInput] = inputDataStorage[nthInput].data();
4995         inputTensorInfos[nthInput] = newTensorInfo;
4996
4997         ++nthInput;
4998     }
4999
5000     outputTensorInfo.SetShape(
5001         armnnUtils::Permuted(
5002             ExpandTensorShapeTo3dForPermute(outputTensorInfo.GetShape()),
5003             permutations.first));
5004 }
5005
5006
5007 //
5008 // This is the pair of PermuteInputsForConcat(...) which permutes back
5009 // the output of the concatenation so we can check it against an expected
5010 // output.
5011 //
5012 template <typename T>
5013 void PermuteOutputForConcat(
5014         armnn::IWorkloadFactory& workloadFactory,
5015         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5016         const armnn::TensorInfo & tensorInfo,
5017         const armnn::PermutationVector & permuteVector,
5018         std::unique_ptr<armnn::ITensorHandle> && inputDataHandle,
5019         T * data)
5020 {
5021     BOOST_ASSERT_MSG(data != nullptr, "data must not be null");
5022     if (data == nullptr)
5023     {
5024         // Nullptr is an error in the test. By returning without doing the permutation
5025         // I expect the caller to fail the test. It still makes sense to report this as
5026         // an assert for Debug builds.
5027         return;
5028     }
5029
5030     armnn::TensorInfo resultTensorInfo = tensorInfo;
5031     std::vector<T> inputData(tensorInfo.GetNumElements());
5032     std::vector<T> outputData;
5033
5034     CopyDataFromITensorHandle(&inputData[0], inputDataHandle.get());
5035
5036     PermuteTensorData<T>(workloadFactory,
5037                          memoryManager,
5038                          permuteVector,
5039                          resultTensorInfo,
5040                          &inputData[0],
5041                          outputData);
5042
5043     ::memcpy(data, &outputData[0], sizeof(T)*outputData.size());
5044 }
5045
5046 template <typename T>
5047 void Concatenate(
5048     armnn::IWorkloadFactory& workloadFactory,
5049     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5050     std::initializer_list<const armnn::TensorInfo> inputTensorInfosOrig,
5051     std::initializer_list<T *> inputsOrig,
5052     const armnn::TensorInfo& outputTensorInfoOrig,
5053     T * output,
5054     unsigned int concatDim,
5055     bool useSubtensor)
5056 {
5057     BOOST_ASSERT_MSG(output != nullptr, "output must not be null");
5058     if (output == nullptr)
5059     {
5060         // Nullptr is an error in the test. By returning without doing the permutation
5061         // I expect the caller to fail the test. It still makes sense to report this as
5062         // an assert for Debug builds.
5063         return;
5064     }
5065
5066     // Saves a copy of the parameters which we might need to change.
5067     std::vector<armnn::TensorInfo> inputTensorInfos(inputTensorInfosOrig.begin(), inputTensorInfosOrig.end());
5068     std::vector<T *> inputs            = inputsOrig;
5069     armnn::TensorInfo outputTensorInfo = outputTensorInfoOrig;
5070
5071     armnn::PermutationVector permuteVector{0, 1, 2};
5072
5073     // Holds and automatically releases memory for the reshaped input data.
5074     std::vector<std::vector<T>> tmpInputDataStorage;
5075
5076     const size_t inputCount = inputTensorInfos.size();
5077
5078     bool needPermuteForConcat = NeedPermuteForConcat(inputTensorInfos, concatDim);
5079
5080     if (needPermuteForConcat)
5081     {
5082         //
5083         // We need to permute the inputs, because concatenation along
5084         // the requested axis is not supported.
5085         //
5086         PermuteInputsForConcat<T>(workloadFactory,
5087                                   memoryManager,
5088                                   inputTensorInfos,
5089                                   inputs,
5090                                   tmpInputDataStorage,
5091                                   permuteVector,
5092                                   concatDim,
5093                                   outputTensorInfo);
5094     }
5095
5096     armnn::WorkloadInfo workloadInfo;
5097
5098     std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
5099     inputHandles.reserve(inputCount);
5100
5101     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
5102
5103     armnn::ConcatQueueDescriptor queueDescriptor;
5104     armnn::OriginsDescriptor viewsDescriptor = CreateDescriptorForConcatenation(inputTensorInfos, concatDim);
5105     queueDescriptor.m_Parameters = viewsDescriptor;
5106
5107     if (useSubtensor)
5108     {
5109         queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
5110         for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
5111         {
5112             queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
5113                 viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
5114         }
5115
5116         outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
5117
5118         const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
5119         for (unsigned int i = 0; i < inputCount; ++i)
5120         {
5121             const armnn::TensorInfo& inputTensorInfo = inputTensorInfos[i];
5122             std::unique_ptr<armnn::ITensorHandle> inputHandle =
5123                 subTensorsSupported ?
5124                     workloadFactory.CreateSubTensorHandle(*outputHandle,
5125                                                           inputTensorInfo.GetShape(),
5126                                                           queueDescriptor.m_ViewOrigins[i].m_Origin.data()) :
5127                     workloadFactory.CreateTensorHandle(inputTensorInfo);
5128
5129             inputHandles.emplace_back(std::move(inputHandle));
5130         }
5131
5132     }
5133     else
5134     {
5135         for (unsigned int i = 0; i < inputCount; ++i)
5136         {
5137             std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfos[i]);
5138             inputHandles.emplace_back(std::move(inputHandle));
5139         }
5140     }
5141
5142     for (unsigned int i = 0; i < inputCount; ++i)
5143     {
5144         AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos[i], inputHandles[i].get());
5145     }
5146
5147     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
5148
5149     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(queueDescriptor, workloadInfo);
5150
5151     for (auto& inputHandle : inputHandles)
5152     {
5153         inputHandle->Allocate();
5154     }
5155
5156     outputHandle->Allocate();
5157
5158     unsigned int nextInputId = 0;
5159     for (auto& inputHandle : inputHandles)
5160     {
5161         CopyDataToITensorHandle(inputHandle.get(), inputs[nextInputId]);
5162         ++nextInputId;
5163     }
5164
5165     workload->PostAllocationConfigure();
5166     workload->Execute();
5167
5168     if (needPermuteForConcat)
5169     {
5170         PermuteOutputForConcat<T>(workloadFactory,
5171                                   memoryManager,
5172                                   outputTensorInfo,
5173                                   permuteVector,
5174                                   std::move(outputHandle),
5175                                   output);
5176     }
5177     else
5178     {
5179         CopyDataFromITensorHandle(output, outputHandle.get());
5180     }
5181 }
5182
5183 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5184 LayerTestResult<T, 1> Concatenation1dTestImpl(
5185     armnn::IWorkloadFactory& workloadFactory,
5186     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5187     float qScale,
5188     int32_t qOffset)
5189 {
5190     armnn::TensorInfo inputTensorInfo({ 3 }, ArmnnType, qScale, qOffset);
5191
5192     auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f }));
5193     auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f }));
5194     auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f }));
5195
5196     armnn::TensorInfo outputTensorInfo({ 9 }, ArmnnType, qScale, qOffset);
5197
5198     LayerTestResult<T, 1> result(outputTensorInfo);
5199
5200     std::vector<T> output;
5201     output.resize(outputTensorInfo.GetNumElements());
5202     Concatenate<T>(workloadFactory, memoryManager,
5203                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
5204                    { input0.data(), input1.data(), input2.data() },
5205                    outputTensorInfo,
5206                    output.data(),
5207                    0,
5208                    true);
5209
5210     result.output = MakeTensor<T, 1>(outputTensorInfo, output);
5211     result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5212         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
5213     }));
5214
5215     return result;
5216 }
5217
5218 LayerTestResult<float, 1> Concatenation1dTest(
5219     armnn::IWorkloadFactory& workloadFactory,
5220     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
5221 {
5222     return Concatenation1dTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
5223 }
5224
5225 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5226 LayerTestResult<T, 2> Concatenation2dTestImpl(
5227     armnn::IWorkloadFactory& workloadFactory,
5228     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5229     const armnn::TensorInfo& outputTensorInfo,
5230     unsigned int dimension,
5231     const float qScale,
5232     const int32_t qOffset)
5233 {
5234     armnn::TensorInfo inputTensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
5235
5236     auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5237         // Batch 0
5238         1.0f, 2.0f, 3.0f,
5239
5240         // Batch 1
5241         10.0f, 11.0f, 12.0f,
5242     }));
5243
5244     auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5245         // Batch 0
5246         4.0f, 5.0f, 6.0f,
5247
5248         // Batch 1
5249         13.0f, 14.0f, 15.0f,
5250     }));
5251
5252     auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5253         // Batch 0
5254         7.0f, 8.0f, 9.0f,
5255
5256         // Batch 1
5257         16.0f, 17.0f, 18.0f,
5258     }));
5259
5260     LayerTestResult<T, 2> result(outputTensorInfo);
5261
5262     std::vector<T> output;
5263     output.resize(outputTensorInfo.GetNumElements());
5264     Concatenate<T>(workloadFactory, memoryManager,
5265                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
5266                    { input0.data(), input1.data(), input2.data() },
5267                    outputTensorInfo,
5268                    output.data(),
5269                    dimension,
5270                    true);
5271
5272     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
5273     return result;
5274 }
5275
5276 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5277 LayerTestResult<T, 2> Concatenation2dDim0TestImpl(
5278     armnn::IWorkloadFactory& workloadFactory,
5279     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5280     float qScale,
5281     int32_t qOffset)
5282 {
5283     armnn::TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
5284
5285     LayerTestResult<T, 2> result = Concatenation2dTestImpl<ArmnnType>(
5286         workloadFactory, memoryManager, outputTensorInfo, 0, qScale, qOffset);
5287
5288     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5289         // Batch 0
5290         1.0f, 2.0f, 3.0f,
5291
5292         // Batch 1
5293         10.0f, 11.0f, 12.0f,
5294
5295         // Batch 2
5296         4.0f, 5.0f, 6.0f,
5297
5298         // Batch 3
5299         13.0f, 14.0f, 15.0f,
5300
5301         // Batch 4
5302         7.0f, 8.0f, 9.0f,
5303
5304         // Batch 5
5305         16.0f, 17.0f, 18.0f,
5306     }));
5307
5308     return result;
5309 }
5310
5311 LayerTestResult<float, 2> Concatenation2dDim0Test(
5312     armnn::IWorkloadFactory& workloadFactory,
5313     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
5314 {
5315     return Concatenation2dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
5316 }
5317
5318 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5319 LayerTestResult<T, 2> Concatenation2dDim1TestImpl(
5320     armnn::IWorkloadFactory& workloadFactory,
5321     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5322     float qScale,
5323     int32_t qOffset)
5324 {
5325     armnn::TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
5326
5327     LayerTestResult<T, 2> result = Concatenation2dTestImpl<ArmnnType>(
5328         workloadFactory, memoryManager, outputTensorInfo, 1, qScale, qOffset);
5329
5330     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5331         // Batch 0
5332         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
5333
5334         // Batch 1
5335         10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
5336     }));
5337
5338     return result;
5339 }
5340
5341 LayerTestResult<float, 2> Concatenation2dDim1Test(
5342     armnn::IWorkloadFactory& workloadFactory,
5343     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
5344 {
5345     return Concatenation2dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
5346 }
5347
5348 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5349 LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(
5350     armnn::IWorkloadFactory& workloadFactory,
5351     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5352     float qScale,
5353     int32_t qOffset)
5354 {
5355     armnn::TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
5356     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5357         // Batch 0
5358         1.0f, 2.0f, 3.0f,
5359
5360         // Batch 1
5361         10.0f, 11.0f, 12.0f,
5362     }));
5363
5364     armnn::TensorInfo input1TensorInfo({ 3, 3 }, ArmnnType, qScale, qOffset);
5365     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5366         // Batch 0
5367         4.0f, 5.0f, 6.0f,
5368
5369         // Batch 1
5370         13.0f, 14.0f, 15.0f,
5371
5372         // Batch 0
5373         7.0f, 8.0f, 9.0f,
5374     }));
5375
5376     armnn::TensorInfo input2TensorInfo({ 1, 3 }, ArmnnType, qScale, qOffset);
5377     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5378         // Batch 1
5379         16.0f, 17.0f, 18.0f,
5380     }));
5381
5382     armnn::TensorInfo outputTensorInfo({ 6, 3 }, ArmnnType, qScale, qOffset);
5383     LayerTestResult<T, 2> result(outputTensorInfo);
5384
5385     std::vector<T> output;
5386     output.resize(outputTensorInfo.GetNumElements());
5387     Concatenate<T>(workloadFactory, memoryManager,
5388                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
5389                    { input0.data(), input1.data(), input2.data() },
5390                    outputTensorInfo,
5391                    output.data(),
5392                    0,
5393                    true);
5394
5395     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
5396     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5397         // Batch 0
5398         1.0f, 2.0f, 3.0f,
5399
5400         // Batch 1
5401         10.0f, 11.0f, 12.0f,
5402
5403         // Batch 2
5404         4.0f, 5.0f, 6.0f,
5405
5406         // Batch 3
5407         13.0f, 14.0f, 15.0f,
5408
5409         // Batch 4
5410         7.0f, 8.0f, 9.0f,
5411
5412         // Batch 5
5413         16.0f, 17.0f, 18.0f,
5414     }));
5415
5416     return result;
5417 }
5418
5419 LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(
5420     armnn::IWorkloadFactory& workloadFactory,
5421     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
5422 {
5423     return Concatenation2dDim0DiffInputDimsTestImpl<armnn::DataType::Float32>(
5424         workloadFactory, memoryManager, 0.0f, 0);
5425 }
5426
5427 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5428 LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(
5429     armnn::IWorkloadFactory& workloadFactory,
5430     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5431     float qScale,
5432     int32_t qOffset)
5433 {
5434     armnn::TensorInfo input0TensorInfo({ 2, 3 }, ArmnnType, qScale, qOffset);
5435     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5436         // Batch 0
5437         1.0f, 2.0f, 3.0f,
5438
5439         // Batch 1
5440         10.0f, 11.0f, 12.0f,
5441     }));
5442
5443     armnn::TensorInfo input1TensorInfo({ 2, 5 }, ArmnnType, qScale, qOffset);
5444     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5445         // Batch 0
5446         4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
5447
5448         // Batch 1
5449         13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
5450     }));
5451
5452     armnn::TensorInfo input2TensorInfo({ 2, 1 }, ArmnnType, qScale, qOffset);
5453     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5454         // Batch 0
5455         9.0f,
5456
5457         // Batch 1
5458         18.0f
5459     }));
5460
5461     armnn::TensorInfo outputTensorInfo({ 2, 9 }, ArmnnType, qScale, qOffset);
5462     LayerTestResult<T, 2> result(outputTensorInfo);
5463
5464     std::vector<T> output;
5465     output.resize(outputTensorInfo.GetNumElements());
5466     Concatenate<T>(workloadFactory, memoryManager,
5467                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
5468                    { input0.data(), input1.data(), input2.data() },
5469                    outputTensorInfo,
5470                    output.data(),
5471                    1,
5472                    true);
5473
5474     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
5475     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5476         // Batch 0
5477         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
5478
5479         // Batch 1
5480         10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
5481     }));
5482
5483     return result;
5484 }
5485
5486 LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(
5487     armnn::IWorkloadFactory& workloadFactory,
5488     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
5489 {
5490     return Concatenation2dDim1DiffInputDimsTestImpl<armnn::DataType::Float32>(
5491         workloadFactory, memoryManager, 0.0f, 0);
5492 }
5493
5494 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5495 LayerTestResult<T, 3> Concatenation3dTestImpl(
5496     armnn::IWorkloadFactory& workloadFactory,
5497     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5498     const armnn::TensorInfo& outputTensorInfo,
5499     unsigned int dimension,
5500     bool useSubtensor,
5501     float qScale,
5502     int32_t qOffset)
5503 {
5504     armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
5505
5506     auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5507         // Batch 0, Channel 0
5508         1.0f, 2.0f,
5509
5510         // Batch 0, Channel 1
5511         3.0f, 4.0f,
5512
5513         // Batch 0, Channel 2
5514         5.0f, 6.0f,
5515
5516         // Batch 1, Channel 0
5517         19.0f, 20.0f,
5518
5519         // Batch 1, Channel 1
5520         21.0f, 22.0f,
5521
5522         // Batch 1, Channel 2
5523         23.0f, 24.0f
5524     }));
5525
5526     auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5527         // Batch 0, Channel 0
5528         7.0f, 8.0f,
5529
5530         // Batch 0, Channel 1
5531         9.0f, 10.0f,
5532
5533         // Batch 0, Channel 2
5534         11.0f, 12.0f,
5535
5536         // Batch 1, Channel 0
5537         25.0f, 26.0f,
5538
5539         // Batch 1, Channel 1
5540         27.0f, 28.0f,
5541
5542         // Batch 1, Channel 2
5543         29.0f, 30.0f
5544     }));
5545
5546     auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5547         // Batch 0, Channel 0
5548         13.0f, 14.0f,
5549
5550         // Batch 0, Channel 1
5551         15.0f, 16.0f,
5552
5553         // Batch 0, Channel 2
5554         17.0f, 18.0f,
5555
5556         // Batch 1, Channel 0
5557         31.0f, 32.0f,
5558
5559         // Batch 1, Channel 1
5560         33.0f, 34.0f,
5561
5562         // Batch 1, Channel 2
5563         35.0f, 36.0f
5564     }));
5565
5566     LayerTestResult<T, 3> result(outputTensorInfo);
5567
5568     std::vector<T> output;
5569     output.resize(outputTensorInfo.GetNumElements());
5570     Concatenate<T>(workloadFactory, memoryManager,
5571                    { inputTensorInfo, inputTensorInfo, inputTensorInfo },
5572                    { input0.data(), input1.data(), input2.data() },
5573                    outputTensorInfo,
5574                    output.data(),
5575                    dimension,
5576                    useSubtensor);
5577
5578     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
5579     return result;
5580 }
5581
5582 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5583 LayerTestResult<T, 3> Concatenation3dDim0TestImpl(
5584     armnn::IWorkloadFactory& workloadFactory,
5585     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5586     float qScale,
5587     int32_t qOffset)
5588 {
5589     armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType, qScale, qOffset);
5590
5591     LayerTestResult<T, 3> result = Concatenation3dTestImpl<ArmnnType>(
5592         workloadFactory, memoryManager, outputTensorInfo, 0, true, qScale, qOffset);
5593
5594     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5595         // Batch 0, Channel 0
5596         1.0f, 2.0f,
5597
5598         // Batch 0, Channel 1
5599         3.0f, 4.0f,
5600
5601         // Batch 0, Channel 2
5602         5.0f, 6.0f,
5603
5604         // Batch 1, Channel 0
5605         19.0f, 20.0f,
5606
5607         // Batch 1, Channel 1
5608         21.0f, 22.0f,
5609
5610         // Batch 1, Channel 2
5611         23.0f, 24.0f,
5612
5613         // Batch 2, Channel 0
5614         7.0f, 8.0f,
5615
5616         // Batch 2, Channel 1
5617         9.0f, 10.0f,
5618
5619         // Batch 2, Channel 2
5620         11.0f, 12.0f,
5621
5622         // Batch 3, Channel 0
5623         25.0f, 26.0f,
5624
5625         // Batch 3, Channel 1
5626         27.0f, 28.0f,
5627
5628         // Batch 3, Channel 2
5629         29.0f, 30.0f,
5630
5631         // Batch 4, Channel 0
5632         13.0f, 14.0f,
5633
5634         // Batch 4, Channel 1
5635         15.0f, 16.0f,
5636
5637         // Batch 4, Channel 2
5638         17.0f, 18.0f,
5639
5640         // Batch 5, Channel 0
5641         31.0f, 32.0f,
5642
5643         // Batch 5, Channel 1
5644         33.0f, 34.0f,
5645
5646         // Batch 5, Channel 2
5647         35.0f, 36.0f
5648     }));
5649
5650     return result;
5651 }
5652
5653 LayerTestResult<float, 3> Concatenation3dDim0Test(
5654     armnn::IWorkloadFactory& workloadFactory,
5655     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
5656 {
5657     return Concatenation3dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
5658 }
5659
5660 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5661 LayerTestResult<T, 3> Concatenation3dDim1TestImpl(
5662     armnn::IWorkloadFactory& workloadFactory,
5663     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5664     float qScale,
5665     int32_t qOffset)
5666 {
5667     armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, ArmnnType, qScale, qOffset);
5668
5669     LayerTestResult<T, 3> result = Concatenation3dTestImpl<ArmnnType>(
5670         workloadFactory, memoryManager, outputTensorInfo, 1, true, qScale, qOffset);
5671
5672     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5673         // Batch 0, Channel 0
5674         1.0f, 2.0f,
5675
5676         // Batch 0, Channel 1
5677         3.0f, 4.0f,
5678
5679         // Batch 0, Channel 2
5680         5.0f, 6.0f,
5681
5682         // Batch 0, Channel 3
5683         7.0f, 8.0f,
5684
5685         // Batch 0, Channel 4
5686         9.0f, 10.0f,
5687
5688         // Batch 0, Channel 5
5689         11.0f, 12.0f,
5690
5691         // Batch 0, Channel 6
5692         13.0f, 14.0f,
5693
5694         // Batch 0, Channel 7
5695         15.0f, 16.0f,
5696
5697         // Batch 0, Channel 8
5698         17.0f, 18.0f,
5699
5700         // Batch 1, Channel 0
5701         19.0f, 20.0f,
5702
5703         // Batch 1, Channel 1
5704         21.0f, 22.0f,
5705
5706         // Batch 1, Channel 2
5707         23.0f, 24.0f,
5708
5709         // Batch 1, Channel 3
5710         25.0f, 26.0f,
5711
5712         // Batch 1, Channel 4
5713         27.0f, 28.0f,
5714
5715         // Batch 1, Channel 5
5716         29.0f, 30.0f,
5717
5718         // Batch 1, Channel 6
5719         31.0f, 32.0f,
5720
5721         // Batch 1, Channel 7
5722         33.0f, 34.0f,
5723
5724         // Batch 1, Channel 8
5725         35.0f, 36.0f
5726     }));
5727
5728     return result;
5729 }
5730
5731 LayerTestResult<float, 3> Concatenation3dDim1Test(
5732     armnn::IWorkloadFactory& workloadFactory,
5733     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
5734 {
5735     return Concatenation3dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
5736 }
5737
5738 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5739 LayerTestResult<T, 3> Concatenation3dDim2TestImpl(
5740     armnn::IWorkloadFactory& workloadFactory,
5741     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5742     bool useSubtensor,
5743     float qScale,
5744     int32_t qOffset)
5745 {
5746     armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
5747
5748     LayerTestResult<T, 3> result = Concatenation3dTestImpl<ArmnnType>(
5749         workloadFactory, memoryManager, outputTensorInfo, 2, useSubtensor, qScale, qOffset);
5750
5751     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5752         // Batch 0, Channel 0
5753         1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,
5754
5755         // Batch 0, Channel 1
5756         3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,
5757
5758         // Batch 0, Channel 2
5759         5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,
5760
5761         // Batch 1, Channel 0
5762         19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,
5763
5764         // Batch 1, Channel 1
5765         21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,
5766
5767         // Batch 1, Channel 2
5768         23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
5769     }));
5770
5771     return result;
5772 }
5773
5774 LayerTestResult<float, 3> Concatenation3dDim2Test(
5775     armnn::IWorkloadFactory& workloadFactory,
5776     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5777     bool useSubtensor)
5778 {
5779     return Concatenation3dDim2TestImpl<armnn::DataType::Float32>(
5780         workloadFactory, memoryManager, useSubtensor, 0.0f, 0);
5781 }
5782
5783 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5784 LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(
5785     armnn::IWorkloadFactory& workloadFactory,
5786     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5787     float qScale,
5788     int32_t qOffset)
5789 {
5790     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType);
5791     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5792             // Batch 0, Channel 0
5793             1.0f, 2.0f,
5794
5795             // Batch 0, Channel 1
5796             3.0f, 4.0f,
5797
5798             // Batch 0, Channel 2
5799             5.0f, 6.0f,
5800
5801             // Batch 1, Channel 0
5802             19.0f, 20.0f,
5803
5804             // Batch 1, Channel 1
5805             21.0f, 22.0f,
5806
5807             // Batch 1, Channel 2
5808             23.0f, 24.0f
5809     }));
5810
5811     armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, ArmnnType);
5812     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5813             // Batch 0, Channel 0
5814             7.0f, 8.0f,
5815
5816             // Batch 0, Channel 1
5817             9.0f, 10.0f,
5818
5819             // Batch 0, Channel 2
5820             11.0f, 12.0f,
5821     }));
5822
5823     armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, ArmnnType);
5824     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5825             // Batch 0, Channel 0
5826             25.0f, 26.0f,
5827
5828             // Batch 0, Channel 1
5829             27.0f, 28.0f,
5830
5831             // Batch 0, Channel 2
5832             29.0f, 30.0f,
5833
5834             // Batch 1, Channel 0
5835             13.0f, 14.0f,
5836
5837             // Batch 1, Channel 1
5838             15.0f, 16.0f,
5839
5840             // Batch 1, Channel 2
5841             17.0f, 18.0f,
5842
5843             // Batch 2, Channel 0
5844             31.0f, 32.0f,
5845
5846             // Batch 2, Channel 1
5847             33.0f, 34.0f,
5848
5849             // Batch 2, Channel 2
5850             35.0f, 36.0f
5851     }));
5852
5853     armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, ArmnnType);
5854     LayerTestResult<T, 3> result(outputTensorInfo);
5855
5856     std::vector<T> output;
5857     output.resize(outputTensorInfo.GetNumElements());
5858     Concatenate<T>(workloadFactory, memoryManager,
5859                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
5860                    { input0.data(), input1.data(), input2.data() },
5861                    outputTensorInfo,
5862                    output.data(),
5863                    0,
5864                    true);
5865
5866     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
5867     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
5868         // Batch 0, Channel 0
5869         1.0f, 2.0f,
5870
5871         // Batch 0, Channel 1
5872         3.0f, 4.0f,
5873
5874         // Batch 0, Channel 2
5875         5.0f, 6.0f,
5876
5877         // Batch 1, Channel 0
5878         19.0f, 20.0f,
5879
5880         // Batch 1, Channel 1
5881         21.0f, 22.0f,
5882
5883         // Batch 1, Channel 2
5884         23.0f, 24.0f,
5885
5886         // Batch 2, Channel 0
5887         7.0f, 8.0f,
5888
5889         // Batch 2, Channel 1
5890         9.0f, 10.0f,
5891
5892         // Batch 2, Channel 2
5893         11.0f, 12.0f,
5894
5895         // Batch 3, Channel 0
5896         25.0f, 26.0f,
5897
5898         // Batch 3, Channel 1
5899         27.0f, 28.0f,
5900
5901         // Batch 3, Channel 2
5902         29.0f, 30.0f,
5903
5904         // Batch 4, Channel 0
5905         13.0f, 14.0f,
5906
5907         // Batch 4, Channel 1
5908         15.0f, 16.0f,
5909
5910         // Batch 4, Channel 2
5911         17.0f, 18.0f,
5912
5913         // Batch 5, Channel 0
5914         31.0f, 32.0f,
5915
5916         // Batch 5, Channel 1
5917         33.0f, 34.0f,
5918
5919         // Batch 5, Channel 2
5920         35.0f, 36.0f
5921     }));
5922
5923     return result;
5924 }
5925
5926 LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(
5927     armnn::IWorkloadFactory& workloadFactory,
5928     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
5929 {
5930     return Concatenation3dDim0DiffInputDimsTestImpl<armnn::DataType::Float32>(
5931         workloadFactory, memoryManager, 0.0f, 0);
5932 }
5933
5934 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
5935 LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(
5936     armnn::IWorkloadFactory& workloadFactory,
5937     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
5938     float qScale,
5939     int32_t qOffset)
5940 {
5941     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
5942     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5943         // Batch 0, Channel 0
5944         1.0f, 2.0f,
5945
5946         // Batch 0, Channel 1
5947         3.0f, 4.0f,
5948
5949         // Batch 0, Channel 2
5950         5.0f, 6.0f,
5951
5952         // Batch 1, Channel 0
5953         19.0f, 20.0f,
5954
5955         // Batch 1, Channel 1
5956         21.0f, 22.0f,
5957
5958         // Batch 1, Channel 2
5959         23.0f, 24.0f
5960     }));
5961
5962     armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, ArmnnType, qScale, qOffset);
5963     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5964         // Batch 0, Channel 0
5965         7.0f, 8.0f,
5966
5967         // Batch 0, Channel 1
5968         9.0f, 10.0f,
5969
5970         // Batch 0, Channel 2
5971         11.0f, 12.0f,
5972
5973         // Batch 0, Channel 3
5974         25.0f, 26.0f,
5975
5976         // Batch 1, Channel 0
5977         27.0f, 28.0f,
5978
5979         // Batch 1, Channel 1
5980         29.0f, 30.0f,
5981
5982         // Batch 1, Channel 2
5983         13.0f, 14.0f,
5984
5985         // Batch 1, Channel 3
5986         15.0f, 16.0f,
5987     }));
5988
5989     armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, ArmnnType, qScale, qOffset);
5990     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
5991         // Batch 0, Channel 0
5992         17.0f, 18.0f,
5993
5994         // Batch 1, Channel 0
5995         31.0f, 32.0f,
5996     }));
5997
5998     armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, ArmnnType, qScale, qOffset);
5999     LayerTestResult<T, 3> result(outputTensorInfo);
6000
6001     std::vector<T> output;
6002     output.resize(outputTensorInfo.GetNumElements());
6003     Concatenate<T>(workloadFactory, memoryManager,
6004                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
6005                    { input0.data(), input1.data(), input2.data() },
6006                    outputTensorInfo,
6007                    output.data(),
6008                    1,
6009                    true);
6010
6011     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
6012     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6013         // Batch 0, Channel 0
6014         1.0f, 2.0f,
6015
6016         // Batch 0, Channel 1
6017         3.0f, 4.0f,
6018
6019         // Batch 0, Channel 2
6020         5.0f, 6.0f,
6021
6022         // Batch 0, Channel 3
6023         7.0f, 8.0f,
6024
6025         // Batch 0, Channel 4
6026         9.0f, 10.0f,
6027
6028         // Batch 0, Channel 5
6029         11.0f, 12.0f,
6030
6031         // Batch 0, Channel 6
6032         25.0f, 26.0f,
6033
6034         // Batch 0, Channel 7
6035         17.0f, 18.0f,
6036
6037         // Batch 1, Channel 0
6038         19.0f, 20.0f,
6039
6040         // Batch 1, Channel 1
6041         21.0f, 22.0f,
6042
6043         // Batch 1, Channel 2
6044         23.0f, 24.0f,
6045
6046         // Batch 1, Channel 3
6047         27.0f, 28.0f,
6048
6049         // Batch 1, Channel 4
6050         29.0f, 30.0f,
6051
6052         // Batch 1, Channel 5
6053         13.0f, 14.0f,
6054
6055         // Batch 1, Channel 6
6056         15.0f, 16.0f,
6057
6058         // Batch 1, Channel 7
6059         31.0f, 32.0f,
6060     }));
6061
6062     return result;
6063 }
6064
6065 LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(
6066     armnn::IWorkloadFactory& workloadFactory,
6067     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
6068 {
6069     return Concatenation3dDim1DiffInputDimsTestImpl<armnn::DataType::Float32>(
6070         workloadFactory, memoryManager, 0.0f, 0);
6071 }
6072
6073 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6074 LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(
6075     armnn::IWorkloadFactory& workloadFactory,
6076     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6077     bool useSubtensor,
6078     float qScale,
6079     int32_t qOffset)
6080 {
6081     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, ArmnnType, qScale, qOffset);
6082     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
6083         // Batch 0, Channel 0
6084         1.0f, 2.0f,
6085
6086         // Batch 0, Channel 1
6087         3.0f, 4.0f,
6088
6089         // Batch 0, Channel 2
6090         5.0f, 6.0f,
6091
6092         // Batch 1, Channel 0
6093         19.0f, 20.0f,
6094
6095         // Batch 1, Channel 1
6096         21.0f, 22.0f,
6097
6098         // Batch 1, Channel 2
6099         23.0f, 24.0f
6100     }));
6101
6102     armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, ArmnnType, qScale, qOffset);
6103     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
6104         // Batch 0, Channel 0
6105         7.0f,
6106
6107         // Batch 0, Channel 1
6108         9.0f,
6109
6110         // Batch 0, Channel 2
6111         11.0f,
6112
6113         // Batch 1, Channel 0
6114         25.0f,
6115
6116         // Batch 1, Channel 1
6117         27.0f,
6118
6119         // Batch 1, Channel 2
6120         29.0f
6121     }));
6122
6123     armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, ArmnnType, qScale, qOffset);
6124     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
6125         // Batch 0, Channel 0
6126         13.0f, 14.0f, 50.0f,
6127
6128         // Batch 0, Channel 1
6129         15.0f, 16.0f, 51.0f,
6130
6131         // Batch 0, Channel 2
6132         17.0f, 18.0f, 52.0f,
6133
6134         // Batch 1, Channel 0
6135         31.0f, 32.0f, 53.0f,
6136
6137         // Batch 1, Channel 1
6138         33.0f, 34.0f, 54.0f,
6139
6140         // Batch 1, Channel 2
6141         35.0f, 36.0f, 55.0f,
6142     }));
6143
6144     armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, ArmnnType, qScale, qOffset);
6145     LayerTestResult<T, 3> result(outputTensorInfo);
6146
6147     std::vector<T> output;
6148     output.resize(outputTensorInfo.GetNumElements());
6149     Concatenate<T>(workloadFactory, memoryManager,
6150                    { input0TensorInfo, input1TensorInfo, input2TensorInfo },
6151                    { input0.data(), input1.data(), input2.data() },
6152                    outputTensorInfo,
6153                    output.data(),
6154                    2,
6155                    useSubtensor);
6156
6157     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
6158     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6159         // Batch 0, Channel 0
6160         1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,
6161
6162         // Batch 0, Channel 1
6163         3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,
6164
6165         // Batch 0, Channel 2
6166         5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,
6167
6168         // Batch 1, Channel 0
6169         19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,
6170
6171         // Batch 1, Channel 1
6172         21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,
6173
6174         // Batch 1, Channel 2
6175         23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
6176     }));
6177
6178     return result;
6179 }
6180
6181 LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(
6182     armnn::IWorkloadFactory& workloadFactory,
6183     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6184     bool useSubtensor)
6185 {
6186     return Concatenation3dDim2DiffInputDimsTestImpl<armnn::DataType::Float32>(
6187         workloadFactory, memoryManager, useSubtensor, 0.0f, 0);
6188 }
6189
6190 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6191 LayerTestResult<T, 4> Concatenation4dTestImpl(
6192     armnn::IWorkloadFactory& workloadFactory,
6193     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6194     const armnn::TensorInfo& outputTensorInfo,
6195     unsigned int dimension,
6196     bool useSubtensor,
6197     float qScale,
6198     int32_t qOffset)
6199 {
6200     armnn::TensorInfo inputTensorInfo({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
6201
6202     auto input0 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6203         1.0f, 2.0f,
6204         3.0f, 4.0f,
6205         5.0f, 6.0f,
6206         7.0f, 8.0f,
6207         9.0f, 10.0f,
6208         11.0f, 12.0f
6209     }));
6210
6211     auto input1 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6212         11.0f, 12.0f,
6213         13.0f, 14.0f,
6214         15.0f, 16.0f,
6215         17.0f, 18.0f,
6216         19.0f, 20.0f,
6217         21.0f, 22.0f
6218     }));
6219
6220     auto input2 = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6221         21.0f, 22.0f,
6222         23.0f, 24.0f,
6223         25.0f, 26.0f,
6224         27.0f, 28.0f,
6225         29.0f, 30.0f,
6226         31.0f, 32.0f
6227     }));
6228
6229     LayerTestResult<T, 4> result(outputTensorInfo);
6230
6231     std::vector<T> output;
6232     output.resize(outputTensorInfo.GetNumElements());
6233
6234     Concatenate<T>(workloadFactory,
6235                    memoryManager,
6236                    {inputTensorInfo, inputTensorInfo, inputTensorInfo},
6237                    {input0.data(), input1.data(), input2.data()},
6238                    outputTensorInfo,
6239                    output.data(),
6240                    dimension,
6241                    useSubtensor);
6242
6243     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
6244     return result;
6245 }
6246
6247 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6248 LayerTestResult<T, 4> Concatenation4dDim0TestImpl(
6249     armnn::IWorkloadFactory& workloadFactory,
6250     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6251     float qScale,
6252     int32_t qOffset)
6253 {
6254     armnn::TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
6255
6256     LayerTestResult<T, 4> result = Concatenation4dTestImpl<ArmnnType>(
6257         workloadFactory, memoryManager, outputTensorInfo, 0, true, qScale, qOffset);
6258
6259     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6260         1.0f, 2.0f,
6261         3.0f, 4.0f,
6262         5.0f, 6.0f,
6263         7.0f, 8.0f,
6264         9.0f, 10.0f,
6265         11.0f, 12.0f,
6266
6267         11.0f, 12.0f,
6268         13.0f, 14.0f,
6269         15.0f, 16.0f,
6270         17.0f, 18.0f,
6271         19.0f, 20.0f,
6272         21.0f, 22.0f,
6273
6274         21.0f, 22.0f,
6275         23.0f, 24.0f,
6276         25.0f, 26.0f,
6277         27.0f, 28.0f,
6278         29.0f, 30.0f,
6279         31.0f, 32.0f
6280     }));
6281     return result;
6282 }
6283
6284 LayerTestResult<float, 4> Concatenation4dDim0Test(
6285     armnn::IWorkloadFactory& workloadFactory,
6286     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
6287 {
6288     return Concatenation4dDim0TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
6289 }
6290
6291 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6292 LayerTestResult<T, 4> Concatenation4dDim1TestImpl(
6293     armnn::IWorkloadFactory& workloadFactory,
6294     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6295     float qScale,
6296     int32_t qOffset)
6297 {
6298     armnn::TensorInfo outputTensorInfo({ 1, 9, 2, 2 }, ArmnnType, qScale, qOffset);
6299
6300     LayerTestResult<T, 4> result = Concatenation4dTestImpl<ArmnnType>(
6301         workloadFactory, memoryManager, outputTensorInfo, 1, true, qScale, qOffset);
6302
6303     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6304         1.0f, 2.0f,
6305         3.0f, 4.0f,
6306         5.0f, 6.0f,
6307         7.0f, 8.0f,
6308         9.0f, 10.0f,
6309         11.0f, 12.0f,
6310
6311         11.0f, 12.0f,
6312         13.0f, 14.0f,
6313         15.0f, 16.0f,
6314         17.0f, 18.0f,
6315         19.0f, 20.0f,
6316         21.0f, 22.0f,
6317
6318         21.0f, 22.0f,
6319         23.0f, 24.0f,
6320         25.0f, 26.0f,
6321         27.0f, 28.0f,
6322         29.0f, 30.0f,
6323         31.0f, 32.0f
6324     }));
6325
6326     return result;
6327 }
6328
6329 LayerTestResult<float, 4> Concatenation4dDim1Test(
6330     armnn::IWorkloadFactory& workloadFactory,
6331     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
6332 {
6333     return Concatenation4dDim1TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
6334 }
6335
6336 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6337 LayerTestResult<T, 4> Concatenation4dDim2TestImpl(
6338     armnn::IWorkloadFactory& workloadFactory,
6339     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6340     float qScale,
6341     int32_t qOffset)
6342 {
6343     armnn::TensorInfo outputTensorInfo({ 1, 3, 6, 2 }, ArmnnType, qScale, qOffset);
6344
6345     LayerTestResult<T, 4> result = Concatenation4dTestImpl<ArmnnType>(
6346         workloadFactory, memoryManager, outputTensorInfo, 2, true, qScale, qOffset);
6347
6348     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6349         1.0f, 2.0f,
6350         3.0f, 4.0f,
6351         11.0f, 12.0f,
6352         13.0f, 14.0f,
6353         21.0f, 22.0f,
6354         23.0f, 24.0f,
6355
6356         5.0f, 6.0f,
6357         7.0f, 8.0f,
6358         15.0f, 16.0f,
6359         17.0f, 18.0f,
6360         25.0f, 26.0f,
6361         27.0f, 28.0f,
6362
6363         9.0f, 10.0f,
6364         11.0f, 12.0f,
6365         19.0f, 20.0f,
6366         21.0f, 22.0f,
6367         29.0f, 30.0f,
6368         31.0f, 32.0f
6369     }));
6370
6371     return result;
6372 }
6373
6374 LayerTestResult<float, 4> Concatenation4dDim2Test(
6375     armnn::IWorkloadFactory& workloadFactory,
6376     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
6377 {
6378     return Concatenation4dDim2TestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
6379 }
6380
6381 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6382 LayerTestResult<T, 4> Concatenation4dDim3TestImpl(
6383     armnn::IWorkloadFactory& workloadFactory,
6384     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6385     float qScale,
6386     int32_t qOffset,
6387     bool useSubtensor)
6388 {
6389     armnn::TensorInfo outputTensorInfo({ 1, 3, 2, 6 }, ArmnnType, qScale, qOffset);
6390
6391     LayerTestResult<T, 4> result = Concatenation4dTestImpl<ArmnnType>(
6392         workloadFactory, memoryManager, outputTensorInfo, 3, useSubtensor, qScale, qOffset);
6393
6394     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6395         1.0f, 2.0f,
6396         11.0f, 12.0f,
6397         21.0f, 22.0f,
6398         3.0f, 4.0f,
6399         13.0f, 14.0f,
6400         23.0f, 24.0f,
6401
6402         5.0f, 6.0f,
6403         15.0f, 16.0f,
6404         25.0f, 26.0f,
6405         7.0f, 8.0f,
6406         17.0f, 18.0f,
6407         27.0f, 28.0f,
6408
6409         9.0f, 10.0f,
6410         19.0f, 20.0f,
6411         29.0f, 30.0f,
6412         11.0f, 12.0f,
6413         21.0f, 22.0f,
6414         31.0f, 32.0f
6415     }));
6416
6417     return result;
6418 }
6419
6420 LayerTestResult<float, 4> Concatenation4dDim3Test(
6421     armnn::IWorkloadFactory& workloadFactory,
6422     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6423     bool useSubtensor)
6424 {
6425     return Concatenation4dDim3TestImpl<armnn::DataType::Float32>(
6426         workloadFactory, memoryManager, 0.0f, 0, useSubtensor);
6427 }
6428
6429 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6430 LayerTestResult<T, 4> Concatenation4dDiffShapeDim0TestImpl(
6431     armnn::IWorkloadFactory& workloadFactory,
6432     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6433     float qScale,
6434     int32_t qOffset)
6435 {
6436     unsigned int dimension = 0;
6437     armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
6438
6439     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
6440         1.0f, 2.0f,
6441         3.0f, 4.0f,
6442         5.0f, 6.0f,
6443         7.0f, 8.0f,
6444         9.0f, 10.0f,
6445         11.0f, 12.0f
6446     }));
6447
6448     armnn::TensorInfo inputTensorInfo1({ 2, 3, 2, 2 }, ArmnnType, qScale, qOffset);
6449
6450     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
6451         11.0f, 12.0f,
6452         13.0f, 14.0f,
6453         15.0f, 16.0f,
6454         17.0f, 18.0f,
6455         19.0f, 20.0f,
6456         21.0f, 22.0f,
6457
6458         21.0f, 22.0f,
6459         23.0f, 24.0f,
6460         25.0f, 26.0f,
6461         27.0f, 28.0f,
6462         29.0f, 30.0f,
6463         31.0f, 32.0f
6464
6465     }));
6466
6467     armnn::TensorInfo outputTensorInfo({ 3, 3, 2, 2 }, ArmnnType, qScale, qOffset);
6468
6469     LayerTestResult<T, 4> result(outputTensorInfo);
6470
6471     std::vector<T> output;
6472     output.resize(outputTensorInfo.GetNumElements());
6473     Concatenate<T>(workloadFactory,
6474                    memoryManager,
6475                    {inputTensorInfo0, inputTensorInfo1},
6476                    {input0.data(), input1.data()},
6477                    outputTensorInfo,
6478                    output.data(),
6479                    dimension,
6480                    true);
6481
6482     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
6483     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6484         1.0f, 2.0f,
6485         3.0f, 4.0f,
6486         5.0f, 6.0f,
6487         7.0f, 8.0f,
6488         9.0f, 10.0f,
6489         11.0f, 12.0f,
6490
6491         11.0f, 12.0f,
6492         13.0f, 14.0f,
6493         15.0f, 16.0f,
6494         17.0f, 18.0f,
6495         19.0f, 20.0f,
6496         21.0f, 22.0f,
6497
6498         21.0f, 22.0f,
6499         23.0f, 24.0f,
6500         25.0f, 26.0f,
6501         27.0f, 28.0f,
6502         29.0f, 30.0f,
6503         31.0f, 32.0f
6504     }));
6505
6506     return result;
6507 }
6508
6509 LayerTestResult<float, 4> Concatenation4dDiffShapeDim0Test(
6510     armnn::IWorkloadFactory& workloadFactory,
6511     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
6512 {
6513     return Concatenation4dDiffShapeDim0TestImpl<armnn::DataType::Float32>(
6514         workloadFactory, memoryManager, 0.0f, 0);
6515 }
6516
6517 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6518 LayerTestResult<T, 4> Concatenation4dDiffShapeDim1TestImpl(
6519     armnn::IWorkloadFactory& workloadFactory,
6520     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6521     float qScale,
6522     int32_t qOffset)
6523 {
6524     unsigned int dimension = 1;
6525     armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
6526
6527     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
6528         1.0f, 2.0f,
6529         3.0f, 4.0f,
6530         5.0f, 6.0f,
6531         7.0f, 8.0f,
6532         9.0f, 10.0f,
6533         11.0f, 12.0f
6534     }));
6535
6536     armnn::TensorInfo inputTensorInfo1({ 1, 2, 2, 2 }, ArmnnType, qScale, qOffset);
6537
6538     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
6539         11.0f, 12.0f,
6540         13.0f, 14.0f,
6541         15.0f, 16.0f,
6542         17.0f, 18.0f,
6543
6544     }));
6545
6546     armnn::TensorInfo outputTensorInfo({ 1, 5, 2, 2 }, ArmnnType, qScale, qOffset);
6547
6548     LayerTestResult<T, 4> result(outputTensorInfo);
6549
6550     std::vector<T> output;
6551     output.resize(outputTensorInfo.GetNumElements());
6552     Concatenate<T>(workloadFactory,
6553                    memoryManager,
6554                    {inputTensorInfo0, inputTensorInfo1},
6555                    {input0.data(), input1.data()},
6556                    outputTensorInfo,
6557                    output.data(),
6558                    dimension,
6559                    true);
6560
6561     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
6562     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6563         1.0f, 2.0f,
6564         3.0f, 4.0f,
6565         5.0f, 6.0f,
6566         7.0f, 8.0f,
6567         9.0f, 10.0f,
6568         11.0f, 12.0f,
6569         11.0f, 12.0f,
6570         13.0f, 14.0f,
6571         15.0f, 16.0f,
6572         17.0f, 18.0f
6573     }));
6574
6575     return result;
6576 }
6577
6578 LayerTestResult<float, 4> Concatenation4dDiffShapeDim1Test(
6579     armnn::IWorkloadFactory& workloadFactory,
6580     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
6581 {
6582     return Concatenation4dDiffShapeDim1TestImpl<armnn::DataType::Float32>(
6583         workloadFactory, memoryManager, 0.0f, 0);
6584 }
6585
6586 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6587 LayerTestResult<T, 4> Concatenation4dDiffShapeDim2TestImpl(
6588     armnn::IWorkloadFactory& workloadFactory,
6589     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6590     float qScale,
6591     int32_t qOffset)
6592 {
6593     unsigned int dimension = 2;
6594     armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
6595
6596     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
6597         1.0f, 2.0f,
6598         3.0f, 4.0f,
6599         5.0f, 6.0f,
6600         7.0f, 8.0f,
6601         9.0f, 10.0f,
6602         11.0f, 12.0f
6603     }));
6604
6605     armnn::TensorInfo inputTensorInfo1({ 1, 3, 3, 2 }, ArmnnType, qScale, qOffset);
6606
6607     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
6608         11.0f, 12.0f,
6609         13.0f, 14.0f,
6610         15.0f, 16.0f,
6611         17.0f, 18.0f,
6612         19.0f, 20.0f,
6613         21.0f, 22.0f,
6614         23.0f, 24.0f,
6615         25.0f, 26.0f,
6616         27.0f, 28.0f
6617     }));
6618
6619     armnn::TensorInfo outputTensorInfo({ 1, 3, 5, 2 }, ArmnnType, qScale, qOffset);
6620
6621     LayerTestResult<T, 4> result(outputTensorInfo);
6622
6623     std::vector<T> output;
6624     output.resize(outputTensorInfo.GetNumElements());
6625     Concatenate<T>(workloadFactory,
6626                    memoryManager,
6627                    {inputTensorInfo0, inputTensorInfo1},
6628                    {input0.data(), input1.data()},
6629                    outputTensorInfo,
6630                    output.data(),
6631                    dimension,
6632                    true);
6633
6634     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
6635     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6636         1.0f, 2.0f,
6637         3.0f, 4.0f,
6638         11.0f, 12.0f,
6639         13.0f, 14.0f,
6640         15.0f, 16.0f,
6641
6642         5.0f, 6.0f,
6643         7.0f, 8.0f,
6644         17.0f, 18.0f,
6645         19.0f, 20.0f,
6646         21.0f, 22.0f,
6647
6648         9.0f, 10.0f,
6649         11.0f, 12.0f,
6650         23.0f, 24.0f,
6651         25.0f, 26.0f,
6652         27.0f, 28.0f
6653     }));
6654
6655     return result;
6656 }
6657
6658 LayerTestResult<float, 4> Concatenation4dDiffShapeDim2Test(
6659     armnn::IWorkloadFactory& workloadFactory,
6660     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
6661 {
6662     return Concatenation4dDiffShapeDim2TestImpl<armnn::DataType::Float32>(
6663         workloadFactory, memoryManager, 0.0f, 0);
6664 }
6665
6666 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6667 LayerTestResult<T, 4> Concatenation4dDiffShapeDim3TestImpl(
6668     armnn::IWorkloadFactory& workloadFactory,
6669     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6670     float qScale,
6671     int32_t qOffset,
6672     bool useSubtensor)
6673 {
6674     unsigned int dimension = 3;
6675     armnn::TensorInfo inputTensorInfo0({ 1, 3, 2, 2 }, ArmnnType, qScale, qOffset);
6676
6677     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, QuantizedVector<T>(qScale, qOffset, {
6678         1.0f, 2.0f,
6679         3.0f, 4.0f,
6680         5.0f, 6.0f,
6681         7.0f, 8.0f,
6682         9.0f, 10.0f,
6683         11.0f, 12.0f
6684     }));
6685
6686     armnn::TensorInfo inputTensorInfo1({ 1, 3, 2, 3 }, ArmnnType, qScale, qOffset);
6687
6688     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset, {
6689         11.0f, 12.0f, 13.0f,
6690         14.0f, 15.0f, 16.0f,
6691
6692         17.0f, 18.0f, 19.0f,
6693         20.0f, 21.0f, 22.0f,
6694
6695         23.0f, 24.0f, 25.0f,
6696         26.0f, 27.0f, 28.0f
6697     }));
6698
6699     armnn::TensorInfo outputTensorInfo({ 1, 3, 2, 5 }, ArmnnType, qScale, qOffset);
6700
6701     LayerTestResult<T, 4> result(outputTensorInfo);
6702
6703     std::vector<T> output;
6704     output.resize(outputTensorInfo.GetNumElements());
6705     Concatenate<T>(workloadFactory,
6706                    memoryManager,
6707                    {inputTensorInfo0, inputTensorInfo1},
6708                    {input0.data(), input1.data()},
6709                    outputTensorInfo,
6710                    output.data(),
6711                    dimension,
6712                    useSubtensor);
6713
6714     result.output = MakeTensor<T, 4>(outputTensorInfo, output);
6715     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
6716         1.0f, 2.0f, 11.0f, 12.0f, 13.0f,
6717         3.0f, 4.0f, 14.0f, 15.0f, 16.0f,
6718         5.0f, 6.0f, 17.0f, 18.0f, 19.0f,
6719         7.0f, 8.0f, 20.0f, 21.0f, 22.0f,
6720         9.0f, 10.0f, 23.0f, 24.0f, 25.0f,
6721         11.0f, 12.0f, 26.0f, 27.0f, 28.0f
6722     }));
6723
6724     return result;
6725 }
6726
6727 LayerTestResult<float, 4> Concatenation4dDiffShapeDim3Test(
6728     armnn::IWorkloadFactory& workloadFactory,
6729     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6730     bool useSubtensor)
6731 {
6732     return Concatenation4dDiffShapeDim3TestImpl<armnn::DataType::Float32>(
6733         workloadFactory, memoryManager, 0.0f, 0, useSubtensor);
6734 }
6735
6736 LayerTestResult<float, 2> FakeQuantizationTest(
6737     armnn::IWorkloadFactory& workloadFactory,
6738     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
6739 {
6740     constexpr unsigned int width = 2;
6741     constexpr unsigned int height = 3;
6742
6743     const armnn::TensorInfo tensorInfo({height, width },
6744         armnn::DataType::Float32);
6745     auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
6746        -10.0f,  -5.0f,
6747          0.0f,   5.0f,
6748         10.0f,  10.0f
6749     }));
6750
6751     LayerTestResult<float, 2> ret(tensorInfo);
6752
6753     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
6754
6755     std::unique_ptr<armnn::ITensorHandle> outputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
6756
6757     armnn::FakeQuantizationQueueDescriptor data;
6758     armnn::WorkloadInfo info;
6759
6760     AddInputToWorkload(data, info, tensorInfo, inputHandle.get());
6761     AddOutputToWorkload(data, info, tensorInfo, outputHandle.get());
6762     float min = -10.f;
6763     float max = 10.f;
6764
6765     data.m_Parameters.m_Min = min;
6766     data.m_Parameters.m_Max = max;
6767
6768     armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]);
6769     armnn::FakeQuantizationQueueDescriptor refData = data;
6770     armnn::WorkloadInfo refInfo = info;
6771     SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle);
6772
6773     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info);
6774
6775     inputHandle->Allocate();
6776     outputHandle->Allocate();
6777
6778     CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
6779
6780     workload->PostAllocationConfigure();
6781     workload->Execute();
6782
6783     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
6784
6785     ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
6786         0.0f,     63.0f,
6787         128.0f,   191.0f,
6788         255.0f,   255.0f
6789     }));
6790     return ret;
6791 }
6792
6793 namespace
6794 {
6795 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
6796 LayerTestResult<T, 4> L2NormalizationTestImpl(
6797     armnn::IWorkloadFactory& workloadFactory,
6798     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6799     const armnn::TensorShape& inputOutputTensorShape,
6800     float scale,
6801     int32_t offset,
6802     const std::vector<float>& inputValues,
6803     float outScale,
6804     int32_t outOffset,
6805     const std::vector<float>& expectedOutputValues,
6806     const armnn::DataLayout layout,
6807     float epsilon = 1e-12f)
6808 {
6809     const armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, ArmnnType, scale, offset);
6810     const armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, ArmnnType, outScale, outOffset);
6811
6812     // at this point if we require it permute the input data
6813     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
6814     std::vector<float> inputData = inputValues;
6815     if (layout == armnn::DataLayout::NHWC)
6816     {
6817         std::vector<float> tmp(inputData.size());
6818         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
6819         inputData = tmp;
6820     }
6821
6822     auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(
6823                                                          inputTensorInfo.GetQuantizationScale(),
6824                                                          inputTensorInfo.GetQuantizationOffset(),
6825                                                          inputData));
6826
6827     std::vector<float> expectedOutputData = expectedOutputValues;
6828     if (layout == armnn::DataLayout::NHWC)
6829     {
6830         std::vector<float> tmp(expectedOutputData.size());
6831         armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, expectedOutputData.data(), tmp.data(),
6832                             sizeof(float));
6833         expectedOutputData = tmp;
6834     }
6835
6836     LayerTestResult<T, 4> result(outputTensorInfo);
6837     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(
6838                                                                outputTensorInfo.GetQuantizationScale(),
6839                                                                outputTensorInfo.GetQuantizationOffset(),
6840                                                                expectedOutputData));
6841
6842     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
6843     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
6844
6845     armnn::L2NormalizationQueueDescriptor descriptor;
6846     descriptor.m_Parameters.m_Eps = epsilon;
6847     descriptor.m_Parameters.m_DataLayout = layout;
6848     armnn::WorkloadInfo info;
6849
6850     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
6851     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
6852
6853     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
6854
6855     inputHandle->Allocate();
6856     outputHandle->Allocate();
6857
6858     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
6859
6860     workload->PostAllocationConfigure();
6861     ExecuteWorkload(*workload, memoryManager);
6862
6863     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
6864
6865     return result;
6866 }
6867
6868 float CalcInvL2Norm(std::initializer_list<float> elements)
6869 {
6870     const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f,
6871         [](float acc, float element) { return acc + element * element; });
6872     return 1.0f / sqrtf(reduction);
6873 }
6874
6875 } // anonymous namespace
6876
6877 template<armnn::DataType ArmnnType, typename T>
6878 LayerTestResult<T, 2> Pad2dTestCommon(
6879     armnn::IWorkloadFactory& workloadFactory,
6880     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6881     float qScale,
6882     int32_t qOffset,
6883     const float customPaddingValue)
6884 {
6885     const armnn::TensorShape inputShape{ 3, 3 };
6886     const armnn::TensorShape outputShape{ 7, 7 };
6887
6888     const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
6889     const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
6890
6891     std::vector<T> inputValues(
6892     QuantizedVector<T>(qScale, qOffset,
6893     {
6894       // Height (3) x Width (3)
6895       4, 8, 6,
6896       7, 4, 4,
6897       3, 2, 4
6898     }));
6899
6900     auto p = customPaddingValue;
6901     std::vector<T> expectedOutputValues;
6902     expectedOutputValues = (
6903     QuantizedVector<T>(qScale, qOffset,
6904     {
6905       p, p, p, p, p, p, p,
6906       p, p, p, p, p, p, p,
6907       p, p, 4, 8, 6, p, p,
6908       p, p, 7, 4, 4, p, p,
6909       p, p, 3, 2, 4, p, p,
6910       p, p, p, p, p, p, p,
6911       p, p, p, p, p, p, p
6912     }));
6913
6914     auto inputTensor = MakeTensor<T, 2>(inputTensorInfo, std::vector<T>(inputValues));
6915
6916     LayerTestResult<T, 2> result(outputTensorInfo);
6917     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, std::vector<T>(expectedOutputValues));
6918
6919     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
6920     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
6921
6922     armnn::PadQueueDescriptor descriptor;
6923
6924     std::vector<std::pair<unsigned int, unsigned int>> padList;
6925     padList.push_back(std::pair<unsigned int, unsigned int>(2,2));
6926     padList.push_back(std::pair<unsigned int, unsigned int>(2,2));
6927
6928     descriptor.m_Parameters.m_PadList = padList;
6929     descriptor.m_Parameters.m_PadValue = customPaddingValue;
6930     armnn::WorkloadInfo info;
6931
6932     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
6933     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
6934
6935     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
6936
6937     inputHandle->Allocate();
6938     outputHandle->Allocate();
6939
6940     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
6941
6942     workload->PostAllocationConfigure();
6943     workload->Execute();
6944
6945     CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
6946
6947     return result;
6948 }
6949
6950 template<armnn::DataType ArmnnType, typename T>
6951 LayerTestResult<T, 3> Pad3dTestCommon(
6952     armnn::IWorkloadFactory& workloadFactory,
6953     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
6954     float qScale,
6955     int32_t qOffset)
6956 {
6957     const armnn::TensorShape inputShape{ 2, 2, 2 };
6958     const armnn::TensorShape outputShape{ 3, 5, 6 };
6959
6960     const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
6961     const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
6962
6963     std::vector<T> inputValues(
6964       QuantizedVector<T>(qScale,qOffset,
6965     {
6966         // Channel 0, Height (2) x Width (2)
6967         0, 4,
6968         2, 5,
6969
6970         // Channel 1, Height (2) x Width (2)
6971         6, 1,
6972         5, 2
6973     }));
6974
6975     std::vector<T> expectedOutputValues(
6976       QuantizedVector<T>(qScale,qOffset,
6977     {
6978
6979         0, 0, 0, 0, 0, 0,
6980         0, 0, 0, 0, 0, 0,
6981         0, 0, 0, 4, 0, 0,
6982         0, 0, 2, 5, 0, 0,
6983         0, 0, 0, 0, 0, 0,
6984
6985         0, 0, 0, 0, 0, 0,
6986         0, 0, 0, 0, 0, 0,
6987         0, 0, 6, 1, 0, 0,
6988         0, 0, 5, 2, 0, 0,
6989         0, 0, 0, 0, 0, 0,
6990
6991         0, 0, 0, 0, 0, 0,
6992         0, 0, 0, 0, 0, 0,
6993         0, 0, 0, 0, 0, 0,
6994         0, 0, 0, 0, 0, 0,
6995         0, 0, 0, 0, 0, 0
6996
6997     }));
6998
6999     auto inputTensor = MakeTensor<T, 3>(inputTensorInfo, std::vector<T>(inputValues));
7000
7001     LayerTestResult<T, 3> result(outputTensorInfo);
7002     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, std::vector<T>(expectedOutputValues));
7003
7004     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
7005     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
7006
7007     armnn::PadQueueDescriptor descriptor;
7008
7009     std::vector<std::pair<unsigned int, unsigned int>> PadList;
7010     PadList.push_back(std::pair<unsigned int, unsigned int>(0,1));
7011     PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
7012     PadList.push_back(std::pair<unsigned int, unsigned int>(2,2));
7013
7014     descriptor.m_Parameters.m_PadList = PadList;
7015     armnn::WorkloadInfo info;
7016
7017     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
7018     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
7019
7020     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
7021
7022     inputHandle->Allocate();
7023     outputHandle->Allocate();
7024
7025     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0]);
7026
7027     workload->PostAllocationConfigure();
7028     workload->Execute();
7029
7030     CopyDataFromITensorHandle(&result.output[0][0][0], outputHandle.get());
7031
7032     return result;
7033 }
7034
7035 template<armnn::DataType ArmnnType, typename T>
7036 LayerTestResult<T, 4> Pad4dTestCommon(
7037     armnn::IWorkloadFactory& workloadFactory,
7038     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7039     float qScale,
7040     int32_t qOffset)
7041 {
7042     const armnn::TensorShape inputShape{ 2, 2, 3, 2 };
7043     const armnn::TensorShape outputShape{ 4, 5, 7, 4 };
7044
7045     const armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType, qScale, qOffset);
7046     const armnn::TensorInfo outputTensorInfo(outputShape, ArmnnType, qScale, qOffset);
7047
7048     std::vector<T> inputValues(
7049       QuantizedVector<T>(qScale,qOffset,
7050     {
7051         // Batch 0, Channel 0, Height (3) x Width (2)
7052         0, 1,
7053         2, 3,
7054         4, 5,
7055
7056         // Batch 0, Channel 1, Height (3) x Width (2)
7057         6, 7,
7058         8, 9,
7059         10, 11,
7060
7061         // Batch 1, Channel 0, Height (3) x Width (2)
7062         12, 13,
7063         14, 15,
7064         16, 17,
7065
7066         // Batch 1, Channel 1, Height (3) x Width (2)
7067         18, 19,
7068         20, 21,
7069         22, 23
7070     }));
7071
7072     std::vector<T> expectedOutputValues(
7073       QuantizedVector<T>(qScale,qOffset,
7074     {
7075         0, 0, 0, 0,
7076         0, 0, 0, 0,
7077         0, 0, 0, 0,
7078         0, 0, 0, 0,
7079         0, 0, 0, 0,
7080         0, 0, 0, 0,
7081         0, 0, 0, 0,
7082
7083         0, 0, 0, 0,
7084         0, 0, 0, 0,
7085         0, 0, 0, 0,
7086         0, 0, 0, 0,
7087         0, 0, 0, 0,
7088         0, 0, 0, 0,
7089         0, 0, 0, 0,
7090
7091         0, 0, 0, 0,
7092         0, 0, 0, 0,
7093         0, 0, 0, 0,
7094         0, 0, 0, 0,
7095         0, 0, 0, 0,
7096         0, 0, 0, 0,
7097         0, 0, 0, 0,
7098
7099         0, 0, 0, 0,
7100         0, 0, 0, 0,
7101         0, 0, 0, 0,
7102         0, 0, 0, 0,
7103         0, 0, 0, 0,
7104         0, 0, 0, 0,
7105         0, 0, 0, 0,
7106
7107         0, 0, 0, 0,
7108         0, 0, 0, 0,
7109         0, 0, 0, 0,
7110         0, 0, 0, 0,
7111         0, 0, 0, 0,
7112         0, 0, 0, 0,
7113         0, 0, 0, 0,
7114
7115         0, 0, 0, 0,
7116         0, 0, 0, 0,
7117         0, 0, 0, 0,
7118         0, 0, 0, 0,
7119         0, 0, 0, 0,
7120         0, 0, 0, 0,
7121         0, 0, 0, 0,
7122
7123         0, 0, 0, 0,
7124         0, 0, 0, 0,
7125         0, 0, 0, 0,
7126         0, 0, 0, 0,
7127         0, 0, 0, 0,
7128         0, 0, 0, 0,
7129         0, 0, 0, 0,
7130
7131         0, 0, 0, 0,
7132         0, 0, 0, 0,
7133         0, 0, 0, 0,
7134         0, 0, 1, 0,
7135         0, 2, 3, 0,
7136         0, 4, 5, 0,
7137         0, 0, 0, 0,
7138
7139         0, 0, 0, 0,
7140         0, 0, 0, 0,
7141         0, 0, 0, 0,
7142         0, 6, 7, 0,
7143         0, 8, 9, 0,
7144         0, 10, 11, 0,
7145         0, 0, 0, 0,
7146
7147         0, 0, 0, 0,
7148         0, 0, 0, 0,
7149         0, 0, 0, 0,
7150         0, 0, 0, 0,
7151         0, 0, 0, 0,
7152         0, 0, 0, 0,
7153         0, 0, 0, 0,
7154
7155         0, 0, 0, 0,
7156         0, 0, 0, 0,
7157         0, 0, 0, 0,
7158         0, 0, 0, 0,
7159         0, 0, 0, 0,
7160         0, 0, 0, 0,
7161         0, 0, 0, 0,
7162
7163         0, 0, 0, 0,
7164         0, 0, 0, 0,
7165         0, 0, 0, 0,
7166         0, 0, 0, 0,
7167         0, 0, 0, 0,
7168         0, 0, 0, 0,
7169         0, 0, 0, 0,
7170
7171         0, 0, 0, 0,
7172         0, 0, 0, 0,
7173         0, 0, 0, 0,
7174         0, 12, 13, 0,
7175         0, 14, 15, 0,
7176         0, 16, 17, 0,
7177         0, 0, 0, 0,
7178
7179         0, 0, 0, 0,
7180         0, 0, 0, 0,
7181         0, 0, 0, 0,
7182         0, 18, 19, 0,
7183         0, 20, 21, 0,
7184         0, 22, 23, 0,
7185         0, 0, 0, 0,
7186
7187         0, 0, 0, 0,
7188         0, 0, 0, 0,
7189         0, 0, 0, 0,
7190         0, 0, 0, 0,
7191         0, 0, 0, 0,
7192         0, 0, 0, 0,
7193         0, 0, 0, 0,
7194
7195         0, 0, 0, 0,
7196         0, 0, 0, 0,
7197         0, 0, 0, 0,
7198         0, 0, 0, 0,
7199         0, 0, 0, 0,
7200         0, 0, 0, 0,
7201         0, 0, 0, 0,
7202
7203         0, 0, 0, 0,
7204         0, 0, 0, 0,
7205         0, 0, 0, 0,
7206         0, 0, 0, 0,
7207         0, 0, 0, 0,
7208         0, 0, 0, 0,
7209         0, 0, 0, 0,
7210
7211         0, 0, 0, 0,
7212         0, 0, 0, 0,
7213         0, 0, 0, 0,
7214         0, 0, 0, 0,
7215         0, 0, 0, 0,
7216         0, 0, 0, 0,
7217         0, 0, 0, 0,
7218
7219         0, 0, 0, 0,
7220         0, 0, 0, 0,
7221         0, 0, 0, 0,
7222         0, 0, 0, 0,
7223         0, 0, 0, 0,
7224         0, 0, 0, 0,
7225         0, 0, 0, 0,
7226
7227         0, 0, 0, 0,
7228         0, 0, 0, 0,
7229         0, 0, 0, 0,
7230         0, 0, 0, 0,
7231         0, 0, 0, 0,
7232         0, 0, 0, 0,
7233         0, 0, 0, 0
7234     }));
7235
7236     auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(inputValues));
7237
7238     LayerTestResult<T, 4> result(outputTensorInfo);
7239     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, std::vector<T>(expectedOutputValues));
7240
7241     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
7242     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
7243
7244     armnn::PadQueueDescriptor descriptor;
7245
7246     std::vector<std::pair<unsigned int, unsigned int>> PadList;
7247     PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
7248     PadList.push_back(std::pair<unsigned int, unsigned int>(2,1));
7249     PadList.push_back(std::pair<unsigned int, unsigned int>(3,1));
7250     PadList.push_back(std::pair<unsigned int, unsigned int>(1,1));
7251
7252     descriptor.m_Parameters.m_PadList = PadList;
7253     armnn::WorkloadInfo info;
7254
7255     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
7256     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
7257
7258     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePad(descriptor, info);
7259
7260     inputHandle->Allocate();
7261     outputHandle->Allocate();
7262
7263     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0][0][0]);
7264
7265     workload->PostAllocationConfigure();
7266     workload->Execute();
7267
7268     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
7269
7270     return result;
7271 }
7272
7273 LayerTestResult<uint8_t, 2> PadUint82dTest(
7274     armnn::IWorkloadFactory& workloadFactory,
7275     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7276 {
7277     return Pad2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
7278 }
7279
7280 LayerTestResult<uint8_t, 2> PadUint82dCustomPaddingTest(
7281     armnn::IWorkloadFactory& workloadFactory,
7282     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7283 {
7284     return Pad2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0, 1.0f);
7285 }
7286
7287 LayerTestResult<uint8_t, 3> PadUint83dTest(
7288     armnn::IWorkloadFactory& workloadFactory,
7289     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7290 {
7291     return Pad3dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
7292 }
7293
7294 LayerTestResult<uint8_t, 4> PadUint84dTest(
7295     armnn::IWorkloadFactory& workloadFactory,
7296     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7297 {
7298     return Pad4dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
7299 }
7300
7301
7302 template LayerTestResult<typename armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 2>
7303 Pad2dTestCommon<armnn::DataType::QuantisedSymm16>(
7304     armnn::IWorkloadFactory& workloadFactory,
7305     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7306     float qScale,
7307     int32_t qOffset,
7308     const float customPaddingValue);
7309
7310 template LayerTestResult<typename armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 3>
7311 Pad3dTestCommon<armnn::DataType::QuantisedSymm16>(
7312     armnn::IWorkloadFactory& workloadFactory,
7313     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7314     float qScale,
7315     int32_t qOffset);
7316
7317 template LayerTestResult<typename armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
7318 Pad4dTestCommon<armnn::DataType::QuantisedSymm16>(
7319     armnn::IWorkloadFactory& workloadFactory,
7320     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7321     float qScale,
7322     int32_t qOffset);
7323
7324 LayerTestResult<float, 2> PadFloat322dTest(
7325     armnn::IWorkloadFactory& workloadFactory,
7326     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7327 {
7328     return Pad2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
7329 }
7330
7331 LayerTestResult<float, 2> PadFloat322dCustomPaddingTest(
7332     armnn::IWorkloadFactory& workloadFactory,
7333     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7334 {
7335     return Pad2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0, 1.0f);
7336 }
7337
7338 LayerTestResult<float, 3> PadFloat323dTest(
7339     armnn::IWorkloadFactory& workloadFactory,
7340     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7341 {
7342     return Pad3dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
7343 }
7344
7345 LayerTestResult<float, 4> PadFloat324dTest(
7346     armnn::IWorkloadFactory& workloadFactory,
7347     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7348 {
7349     return Pad4dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
7350 }
7351
7352 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
7353 LayerTestResult<T, 4> L2NormalizationEpsilonTestCommon(
7354         armnn::IWorkloadFactory& workloadFactory,
7355         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7356         float scale,
7357         int32_t offset,
7358         float outScale,
7359         int32_t outOffset,
7360         const armnn::DataLayout layout,
7361         float epsilon)
7362 {
7363     // Width: 1
7364     // Height: 1
7365     // Channels: 3
7366     // BatchSize: 1
7367     unsigned int numberOfBatches = 1;
7368     unsigned int numberOfChannels = 3;
7369     unsigned int height = 1;
7370     unsigned int width = 1;
7371
7372     const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
7373             numberOfBatches, numberOfChannels, height, width, layout);
7374
7375     // 0.0000001^2 + 0.00000002^2 + 0.00000003^2 < 1e-12
7376     std::vector<float> inputValues
7377     {
7378         // Batch 0, Channel 0, Height (1) x Width (1)
7379         0.00000001f,
7380
7381         // Batch 0, Channel 1, Height (1) x Width (1)
7382         0.00000002f,
7383
7384         // Batch 0, Channel 2, Height (1) x Width (1)
7385         0.00000003f,
7386     };
7387
7388     const float approxInvL2Norm = 1.f / sqrtf(epsilon);
7389     std::vector<float> expectedOutputValues
7390     {
7391         // Batch 0, Channel 0, Height (1) x Width (1)
7392         0.00000001f * approxInvL2Norm,
7393         0.00000002f * approxInvL2Norm,
7394         0.00000003f * approxInvL2Norm,
7395     };
7396
7397     return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
7398                                               inputValues, outScale, outOffset, expectedOutputValues, layout,
7399                                               epsilon);
7400 }
7401
7402
7403 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
7404 LayerTestResult<T, 4> L2Normalization1dTestCommon(
7405         armnn::IWorkloadFactory& workloadFactory,
7406         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7407         float scale,
7408         int32_t offset,
7409         float outScale,
7410         int32_t outOffset,
7411         const armnn::DataLayout layout)
7412 {
7413     // Width: 1
7414     // Height: 1
7415     // Channels: 10
7416     // BatchSize: 1
7417     unsigned int numberOfBatches = 1;
7418     unsigned int numberOfChannels = 10;
7419     unsigned int height = 1;
7420     unsigned int width = 1;
7421
7422
7423     const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
7424             numberOfBatches, numberOfChannels, height, width, layout);
7425     std::vector<float> inputValues
7426     {
7427         // Batch 0, Channel 0, Height (1) x Width (1)
7428         1.0f,
7429
7430         // Batch 0, Channel 1, Height (1) x Width (1)
7431         2.0f,
7432
7433         // Batch 0, Channel 2, Height (1) x Width (1)
7434         3.0f,
7435
7436         // Batch 0, Channel 3, Height (1) x Width (1)
7437         4.0f,
7438
7439         // Batch 0, Channel 4, Height (1) x Width (1)
7440         5.0f,
7441
7442         // Batch 0, Channel 5, Height (1) x Width (1)
7443         6.0f,
7444
7445         // Batch 0, Channel 6, Height (1) x Width (1)
7446         7.0f,
7447
7448         // Batch 0, Channel 7, Height (1) x Width (1)
7449         8.0f,
7450
7451         // Batch 0, Channel 8, Height (1) x Width (1)
7452         9.0f,
7453
7454         // Batch 0, Channel 9, Height (1) x Width (1)
7455         10.0f
7456     };
7457     const float approxInvL2Norm = 0.050964719f;
7458     std::vector<float> expectedOutputValues
7459     {
7460         // Batch 0, Channel 0, Height (1) x Width (1)
7461         1.0f * approxInvL2Norm,
7462         2.0f * approxInvL2Norm,
7463         3.0f * approxInvL2Norm,
7464         4.0f * approxInvL2Norm,
7465         5.0f * approxInvL2Norm,
7466         6.0f * approxInvL2Norm,
7467         7.0f * approxInvL2Norm,
7468         8.0f * approxInvL2Norm,
7469         9.0f * approxInvL2Norm,
7470         10.0f * approxInvL2Norm
7471     };
7472
7473
7474     return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
7475                                               inputValues, outScale, outOffset, expectedOutputValues, layout);
7476 }
7477
7478 LayerTestResult<float, 4> L2NormalizationDefaultEpsilonTest(
7479         armnn::IWorkloadFactory& workloadFactory,
7480         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7481         const armnn::DataLayout layout)
7482 {
7483     // Dummy descriptor to get the default value of epsilon.
7484     armnn::L2NormalizationDescriptor descriptor;
7485
7486     return L2NormalizationEpsilonTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
7487                                                                       layout, descriptor.m_Eps);
7488 }
7489
7490 LayerTestResult<float, 4> L2NormalizationNonDefaultEpsilonTest(
7491         armnn::IWorkloadFactory& workloadFactory,
7492         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7493         const armnn::DataLayout layout)
7494 {
7495     return L2NormalizationEpsilonTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
7496                                                                       layout, 1e-9f);
7497 }
7498
7499 LayerTestResult<float, 4> L2Normalization1dTest(
7500     armnn::IWorkloadFactory& workloadFactory,
7501     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7502     const armnn::DataLayout layout)
7503 {
7504     return L2Normalization1dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,layout);
7505 }
7506
7507 LayerTestResult<int16_t, 4> L2Normalization1dInt16Test(
7508     armnn::IWorkloadFactory& workloadFactory,
7509     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7510     const armnn::DataLayout layout)
7511 {
7512     return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.f, 0, 1.f, 0,
7513                                                                          layout);
7514 }
7515
7516 LayerTestResult<uint8_t, 4> L2Normalization1dUint8Test(
7517     armnn::IWorkloadFactory& workloadFactory,
7518     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7519     const armnn::DataLayout layout)
7520 {
7521     return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.f, 0,
7522                                                                          1.f/128, 128, layout);
7523 }
7524
7525 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
7526 LayerTestResult<T, 4> L2Normalization2dTestCommon(
7527     armnn::IWorkloadFactory& workloadFactory,
7528     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7529     float scale,
7530     int32_t offset,
7531     float outScale,
7532     int32_t outOffset,
7533     const armnn::DataLayout layout)
7534 {
7535     // Width: 5
7536     // Height: 1
7537     // Channels: 2
7538     // BatchSize: 1
7539     unsigned int numberOfBatches = 1;
7540     unsigned int numberOfChannels = 2;
7541     unsigned int height = 1;
7542     unsigned int width = 5;
7543
7544     const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
7545             numberOfBatches, numberOfChannels, height, width, layout);
7546     std::vector<float> inputValues
7547     {
7548         // Batch 0, Channel 0, Height (1) x Width (5)
7549         1.0f, 3.0f, 5.0f, 7.0f,  9.0f,
7550
7551         // Batch 0, Channel 1, Height (1) x Width (5)
7552         2.0f, 4.0f, 6.0f, 8.0f, 10.0f
7553     };
7554     std::vector<float> expectedOutputValues
7555     {
7556         // Batch 0, Channel 0, Height (1) x Width (5)
7557         1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
7558         3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
7559         5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
7560         7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
7561         9.0f * CalcInvL2Norm({ 9.0f, 10.0f }),
7562
7563         // Batch 0, Channel 1, Height (1) x Width (5)
7564         2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
7565         4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
7566         6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
7567         8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
7568         10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
7569     };
7570
7571     return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
7572                                               inputValues, outScale, outOffset, expectedOutputValues, layout);
7573 }
7574
7575 LayerTestResult<float, 4> L2Normalization2dTest(
7576     armnn::IWorkloadFactory& workloadFactory,
7577     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7578     const armnn::DataLayout layout)
7579 {
7580     return L2Normalization2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
7581                                                                  layout);
7582 }
7583
7584 LayerTestResult<int16_t, 4> L2Normalization2dInt16Test(
7585     armnn::IWorkloadFactory& workloadFactory,
7586     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7587     const armnn::DataLayout layout)
7588 {
7589     return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.f, 0, 1.f, 0,
7590                                                                          layout);
7591 }
7592
7593 LayerTestResult<uint8_t, 4> L2Normalization2dUint8Test(
7594     armnn::IWorkloadFactory& workloadFactory,
7595     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7596     const armnn::DataLayout layout)
7597 {
7598     return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.f, 0,
7599                                                                          1.f/128, 128, layout);
7600 }
7601
7602 LayerTestResult<float, 2> L2Normalization2dShapeTest(
7603     armnn::IWorkloadFactory& workloadFactory,
7604     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
7605 {
7606     const armnn::DataLayout layout = armnn::DataLayout::NHWC;
7607     const armnn::TensorShape inputOutputTensorShape = armnn::TensorShape({ 5, 2 });
7608
7609     std::vector<float> inputData
7610     {
7611         1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f
7612     };
7613     std::vector<float> expectedOutputData
7614     {
7615         1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
7616         2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
7617         3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
7618         4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
7619         5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
7620         6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
7621         7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
7622         8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
7623         9.0f  * CalcInvL2Norm({ 9.0f, 10.0f }),
7624         10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
7625     };
7626
7627     const armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32, 0.f, 0);
7628     const armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32, 0.f, 0);
7629
7630     auto inputTensor = MakeTensor<float, 2>(inputTensorInfo, QuantizedVector<float>(
7631                                                              inputTensorInfo.GetQuantizationScale(),
7632                                                              inputTensorInfo.GetQuantizationOffset(),
7633                                                              inputData));
7634
7635     LayerTestResult<float, 2> result(outputTensorInfo);
7636     result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, QuantizedVector<float>(
7637                                                                    outputTensorInfo.GetQuantizationScale(),
7638                                                                    outputTensorInfo.GetQuantizationOffset(),
7639                                                                    expectedOutputData));
7640
7641     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
7642     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
7643
7644     armnn::L2NormalizationQueueDescriptor descriptor;
7645     descriptor.m_Parameters.m_Eps = 1e-12f;
7646     descriptor.m_Parameters.m_DataLayout = layout;
7647     armnn::WorkloadInfo info;
7648
7649     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
7650     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
7651
7652     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
7653
7654     inputHandle->Allocate();
7655     outputHandle->Allocate();
7656
7657     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
7658
7659     workload->PostAllocationConfigure();
7660     ExecuteWorkload(*workload, memoryManager);
7661
7662     CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
7663
7664     return result;
7665 }
7666
7667 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
7668 LayerTestResult<T, 4> L2Normalization3dTestCommon(
7669     armnn::IWorkloadFactory& workloadFactory,
7670     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7671     float scale,
7672     int32_t offset,
7673     float outScale,
7674     int32_t outOffset,
7675     const armnn::DataLayout layout)
7676 {
7677     // Width: 3
7678     // Height: 4
7679     // Channels: 2
7680     // BatchSize: 1
7681     unsigned int numberOfBatches = 1;
7682     unsigned int numberOfChannels = 2;
7683     unsigned int height = 4;
7684     unsigned int width = 3;
7685
7686     const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
7687             numberOfBatches, numberOfChannels, height, width, layout);
7688     std::vector<float> inputValues
7689     {
7690         // Batch 0, Channel 0, Height (4) x Width (3)
7691         119.0f,  21.0f, 150.0f,
7692         149.0f,  32.0f, 179.0f,
7693         15.0f, 227.0f, 141.0f,
7694         147.0f, 199.0f, 220.0f,
7695
7696         // Batch 0, Channel 1, Height (4) x Width (3)
7697         110.0f, 140.0f,  73.0f,
7698         211.0f, 212.0f,  89.0f,
7699         24.0f, 138.0f, 188.0f,
7700         162.0f,  12.0f, 161.0f
7701     };
7702     std::vector<float> expectedOutputValues
7703     {
7704         // Batch 0, Channel 0, Height (4) x Width (3)
7705         119.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
7706         21.0f * CalcInvL2Norm({  21.0f, 140.0f }),
7707         150.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
7708         149.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
7709         32.0f * CalcInvL2Norm({  32.0f, 212.0f }),
7710         179.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
7711         15.0f * CalcInvL2Norm({  15.0f,  24.0f }),
7712         227.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
7713         141.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
7714         147.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
7715         199.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
7716         220.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
7717
7718         // Batch 0, Channel 1, Height (4) x Width (3)
7719         110.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
7720         140.0f * CalcInvL2Norm({  21.0f, 140.0f }),
7721         73.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
7722         211.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
7723         212.0f * CalcInvL2Norm({  32.0f, 212.0f }),
7724         89.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
7725         24.0f * CalcInvL2Norm({  15.0f,  24.0f }),
7726         138.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
7727         188.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
7728         162.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
7729         12.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
7730         161.0f * CalcInvL2Norm({ 220.0f, 161.0f })
7731     };
7732
7733     return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
7734                                               inputValues, outScale, outOffset, expectedOutputValues, layout);
7735 }
7736
7737 LayerTestResult<float, 4> L2Normalization3dTest(
7738     armnn::IWorkloadFactory& workloadFactory,
7739     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7740     const armnn::DataLayout layout)
7741 {
7742     return L2Normalization3dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
7743                                                                  layout);
7744 }
7745
7746 LayerTestResult<int16_t, 4> L2Normalization3dInt16Test(
7747     armnn::IWorkloadFactory& workloadFactory,
7748     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7749     const armnn::DataLayout layout)
7750 {
7751     return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.f, 0, 1.f, 0,
7752                                                                          layout);
7753 }
7754
7755 LayerTestResult<uint8_t, 4> L2Normalization3dUint8Test(
7756     armnn::IWorkloadFactory& workloadFactory,
7757     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7758     const armnn::DataLayout layout)
7759 {
7760     return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.f, 0,
7761                                                                          1.f/128, 128, layout);
7762 }
7763
7764 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
7765 LayerTestResult<T, 4> L2Normalization4dTestCommon(
7766     armnn::IWorkloadFactory& workloadFactory,
7767     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7768     float scale,
7769     int32_t offset,
7770     float outScale,
7771     int32_t outOffset,
7772     const armnn::DataLayout layout)
7773 {
7774     // Width: 3
7775     // Height: 4
7776     // Channels: 3
7777     // BatchSize: 2
7778     unsigned int numberOfBatches = 2;
7779     unsigned int numberOfChannels = 3;
7780     unsigned int height = 4;
7781     unsigned int width = 3;
7782
7783     const armnn::TensorShape inputOutputShape = armnnUtils::GetTensorShape(
7784             numberOfBatches, numberOfChannels, height, width, layout);
7785     std::vector<float> inputValues
7786     {
7787         // Batch 0, Channel 0, Height (4) x Width (3)
7788         235.0f,  46.0f, 178.0f,
7789         100.0f, 123.0f,  19.0f,
7790         172.0f,  74.0f, 250.0f,
7791         6.0f, 195.0f,  80.0f,
7792
7793         // Batch 0, Channel 1, Height (4) x Width (3)
7794         113.0f,  95.0f, 202.0f,
7795         77.0f, 114.0f,  71.0f,
7796         122.0f, 246.0f, 166.0f,
7797         82.0f,  28.0f,  37.0f,
7798
7799         // Batch 0, Channel 2, Height (4) x Width (3)
7800         56.0f, 170.0f, 162.0f,
7801         194.0f,  89.0f, 254.0f,
7802         12.0f, 209.0f, 200.0f,
7803         1.0f,  64.0f,  54.0f,
7804
7805         // Batch 1, Channel 0, Height (4) x Width (3)
7806         67.0f,  90.0f,  49.0f,
7807         7.0f, 163.0f,  18.0f,
7808         25.0f, 117.0f, 103.0f,
7809         247.0f,  59.0f, 189.0f,
7810
7811         // Batch 1, Channel 1, Height (4) x Width (3)
7812         239.0f, 104.0f, 199.0f,
7813         17.0f, 124.0f, 153.0f,
7814         222.0f, 217.0f, 75.0f,
7815         32.0f, 126.0f, 21.0f,
7816
7817         // Batch 1, Channel 2, Height (4) x Width (3)
7818         97.0f, 145.0f, 215.0f,
7819         115.0f, 116.0f, 238.0f,
7820         226.0f,  16.0f, 132.0f,
7821         92.0f, 125.0f,  88.0f
7822     };
7823     std::vector<float> expectedOutputValues
7824     {
7825         // Batch 0, Channel 0, Height (4) x Width (3)
7826         235.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
7827         46.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
7828         178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
7829         100.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
7830         123.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
7831         19.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
7832         172.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
7833         74.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
7834         250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
7835         6.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
7836         195.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
7837         80.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
7838
7839         // Batch 0, Channel 1, Height (4) x Width (3)
7840         113.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
7841         95.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
7842         202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
7843         77.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
7844         114.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
7845         71.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
7846         122.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
7847         246.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
7848         166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
7849         82.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
7850         28.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
7851         37.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
7852
7853         // Batch 0, Channel 2, Height (4) x Width (3)
7854         56.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
7855         170.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
7856         162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
7857         194.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
7858         89.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
7859         254.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
7860         12.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
7861         209.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
7862         200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
7863         1.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
7864         64.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
7865         54.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
7866
7867         // Batch 1, Channel 0, Height (4) x Width (3)
7868         67.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
7869         90.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
7870         49.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
7871         7.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
7872         163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
7873         18.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
7874         25.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
7875         117.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
7876         103.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
7877         247.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
7878         59.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
7879         189.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
7880
7881         // Batch 1, Channel 1, Height (4) x Width (3)
7882         239.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
7883         104.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
7884         199.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
7885         17.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
7886         124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
7887         153.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
7888         222.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
7889         217.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
7890         75.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
7891         32.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
7892         126.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
7893         21.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
7894
7895         // Batch 1, Channel 2, Height (4) x Width (3)
7896         97.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
7897         145.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
7898         215.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
7899         115.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
7900         116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
7901         238.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
7902         226.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
7903         16.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
7904         132.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
7905         92.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
7906         125.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
7907         88.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f })
7908     };
7909
7910     return L2NormalizationTestImpl<ArmnnType>(workloadFactory, memoryManager, inputOutputShape, scale, offset,
7911                                               inputValues, outScale, outOffset, expectedOutputValues, layout);
7912 }
7913
7914 LayerTestResult<float, 4> L2Normalization4dTest(
7915     armnn::IWorkloadFactory& workloadFactory,
7916     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7917     const armnn::DataLayout layout)
7918 {
7919     return L2Normalization4dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.f, 0, 0.f, 0,
7920                                                                  layout);
7921 }
7922
7923 LayerTestResult<int16_t, 4> L2Normalization4dInt16Test(
7924     armnn::IWorkloadFactory& workloadFactory,
7925     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7926     const armnn::DataLayout layout)
7927 {
7928     return L2Normalization1dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.f, 0, 1.f, 0,
7929                                                                          layout);
7930 }
7931
7932 LayerTestResult<uint8_t, 4> L2Normalization4dUint8Test(
7933     armnn::IWorkloadFactory& workloadFactory,
7934     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7935     const armnn::DataLayout layout)
7936 {
7937     return L2Normalization1dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.f, 0,
7938                                                                          1.f/128, 128, layout);
7939 }
7940
7941 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
7942 LayerTestResult<T, 4> ConstantTestImpl(
7943     armnn::IWorkloadFactory& workloadFactory,
7944     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
7945     float qScale,
7946     int32_t qOffset)
7947 {
7948     constexpr unsigned int inputWidth = 3;
7949     constexpr unsigned int inputHeight = 4;
7950     constexpr unsigned int inputChannels = 3;
7951     constexpr unsigned int inputBatchSize = 2;
7952
7953     constexpr unsigned int outputWidth = inputWidth;
7954     constexpr unsigned int outputHeight = inputHeight;
7955     constexpr unsigned int outputChannels = inputChannels;
7956     constexpr unsigned int outputBatchSize = inputBatchSize;
7957
7958     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
7959                                         ArmnnType, qScale, qOffset);
7960
7961     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
7962                                          ArmnnType, qScale, qOffset);
7963
7964     // Set quantization parameters if the requested type is a quantized type.
7965     if(armnn::IsQuantizedType<T>())
7966     {
7967         inputTensorInfo.SetQuantizationScale(qScale);
7968         inputTensorInfo.SetQuantizationOffset(qOffset);
7969         outputTensorInfo.SetQuantizationScale(qScale);
7970         outputTensorInfo.SetQuantizationOffset(qOffset);
7971     }
7972
7973     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
7974         QuantizedVector<T>(qScale, qOffset, {
7975         // Batch 0, Channel 0
7976         235.0f,  46.0f, 178.0f,
7977         100.0f, 123.0f,  19.0f,
7978         172.0f,  74.0f, 250.0f,
7979           6.0f, 195.0f,  80.0f,
7980
7981         // Batch 0, Channel 1
7982         113.0f,  95.0f, 202.0f,
7983          77.0f, 114.0f,  71.0f,
7984         122.0f, 246.0f, 166.0f,
7985          82.0f,  28.0f,  37.0f,
7986
7987         // Batch 0, Channel 2
7988          56.0f, 170.0f, 162.0f,
7989         194.0f,  89.0f, 254.0f,
7990          12.0f, 209.0f, 200.0f,
7991           1.0f,  64.0f,  54.0f,
7992
7993         // Batch 1, Channel 0
7994          67.0f,  90.0f,  49.0f,
7995           7.0f, 163.0f,  18.0f,
7996          25.0f, 117.0f, 103.0f,
7997         247.0f,  59.0f, 189.0f,
7998
7999         // Batch 1, Channel 1
8000         239.0f, 104.0f, 199.0f,
8001          17.0f, 124.0f, 153.0f,
8002         222.0f, 217.0f, 75.0f,
8003          32.0f, 126.0f, 21.0f,
8004
8005         // Batch 1, Channel 2
8006          97.0f, 145.0f, 215.0f,
8007         115.0f, 116.0f, 238.0f,
8008         226.0f,  16.0f, 132.0f,
8009          92.0f, 125.0f,  88.0f,
8010     })));
8011
8012     LayerTestResult<T, 4> result(outputTensorInfo);
8013     result.outputExpected = input;
8014
8015     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
8016
8017     armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo);
8018     AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]);
8019
8020     armnn::ConstantQueueDescriptor descriptor;
8021     descriptor.m_LayerOutput = &constantTensor;
8022
8023     armnn::WorkloadInfo info;
8024     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
8025
8026     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info);
8027
8028     outputHandle->Allocate();
8029
8030     workload->PostAllocationConfigure();
8031     workload->Execute();
8032
8033     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
8034     return result;
8035 }
8036
8037 LayerTestResult<float, 4> ConstantTest(
8038     armnn::IWorkloadFactory& workloadFactory,
8039     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8040 {
8041     return ConstantTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager, 0.0f, 0);
8042 }
8043
8044 LayerTestResult<int16_t, 4> ConstantInt16SimpleQuantizationScaleNoOffsetTest(
8045     armnn::IWorkloadFactory& workloadFactory,
8046     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8047 {
8048     return ConstantTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 1.0f, 0);
8049 }
8050
8051 LayerTestResult<uint8_t, 4> ConstantUint8SimpleQuantizationScaleNoOffsetTest(
8052     armnn::IWorkloadFactory& workloadFactory,
8053     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8054 {
8055     return ConstantTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 1.0f, 0);
8056 }
8057
8058 LayerTestResult<uint8_t, 3> ConcatUint8DifferentQParamsTest(
8059         armnn::IWorkloadFactory& workloadFactory,
8060         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8061 {
8062     unsigned int outputWidth = 3;
8063     unsigned int outputHeight = 6;
8064     unsigned int outputChannels = 3;
8065
8066     unsigned int inputWidth1 = 3;
8067     unsigned int inputHeight1 = 6;
8068     unsigned int inputChannels1 = 2;
8069
8070     unsigned int inputWidth2 = 3;
8071     unsigned int inputHeight2 = 6;
8072     unsigned int inputChannels2 = 1;
8073
8074     // Defines the tensor descriptors.
8075     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
8076     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
8077     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
8078
8079     // Quantized input1 tensor. Range [-3, 1]
8080     const float inputScale1 = 0.015686f;
8081     const int32_t inputOffset1 = 192;
8082
8083     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
8084     {
8085         1, 2, 3,
8086         4, 5, 6,
8087         7, 8, 9,
8088         10, 11, 12,
8089         13, 14, 15,
8090         16, 17, 18,
8091
8092         19, 20, 21,
8093         22, 23, 24,
8094         25, 26, 27,
8095         28, 29, 30,
8096         31, 32, 33,
8097         34, 35, 36,
8098     })
8099     );
8100
8101     // Quatized input2 tensor. Range [-1, 4]
8102     const float inputScale2 = 0.019608f;
8103     const int32_t inputOffset2 = 50;
8104
8105     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
8106     {
8107         37, 38, 39,
8108         40, 41, 42,
8109         43, 44, 45,
8110         46, 47, 48,
8111         49, 50, 51,
8112         52, 53, 54,
8113     })
8114     );
8115
8116     // Output has the same quantization parameters than input1,
8117     // so that only the requantization of input2 is required
8118     const float outputScale = 0.015686f;
8119     const int32_t outputOffset = 192;
8120
8121     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
8122
8123     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
8124     {
8125         1, 2, 3,
8126         4, 5, 6,
8127         7, 8, 9,
8128         10, 11, 12,
8129         13, 14, 15,
8130         16, 17, 18,
8131
8132         19, 20, 21,
8133         22, 23, 24,
8134         25, 26, 27,
8135         28, 29, 30,
8136         31, 32, 33,
8137         34, 35, 36,
8138
8139         176, 177, 178,
8140         179, 181, 182,
8141         183, 184, 186,
8142         187, 188, 189,
8143         191, 192, 193,
8144         195, 196, 197,
8145     })
8146     );
8147
8148     outputTensorInfo.SetQuantizationScale(outputScale);
8149     outputTensorInfo.SetQuantizationOffset(outputOffset);
8150     inputTensorInfo1.SetQuantizationScale(inputScale1);
8151     inputTensorInfo1.SetQuantizationOffset(inputOffset1);
8152     inputTensorInfo2.SetQuantizationScale(inputScale2);
8153     inputTensorInfo2.SetQuantizationOffset(inputOffset2);
8154
8155     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
8156     armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
8157
8158     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
8159     armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
8160
8161     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
8162
8163     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
8164
8165     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
8166             subTensorsSupported ?
8167             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
8168             workloadFactory.CreateTensorHandle(inputTensorInfo1);
8169
8170     std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
8171             subTensorsSupported ?
8172             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
8173             workloadFactory.CreateTensorHandle(inputTensorInfo2);
8174
8175     armnn::ConcatQueueDescriptor data;
8176     armnn::WorkloadInfo info;
8177     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
8178     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
8179     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
8180
8181     data.m_ViewOrigins.push_back(window1);
8182     data.m_ViewOrigins.push_back(window2);
8183
8184     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
8185
8186     inputHandle1->Allocate();
8187     inputHandle2->Allocate();
8188     outputHandle->Allocate();
8189
8190     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
8191     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
8192
8193     workload->PostAllocationConfigure();
8194     workload->Execute();
8195
8196     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
8197
8198     return ret;
8199 }
8200
8201 LayerTestResult<uint8_t, 3> ConcatUint8Test(
8202     armnn::IWorkloadFactory& workloadFactory,
8203     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8204 {
8205     unsigned int outputWidth = 3;
8206     unsigned int outputHeight = 6;
8207     unsigned int outputChannels = 3;
8208
8209     unsigned int inputWidth1 = 3;
8210     unsigned int inputHeight1 = 6;
8211     unsigned int inputChannels1 = 2;
8212
8213     unsigned int inputWidth2 = 3;
8214     unsigned int inputHeight2 = 6;
8215     unsigned int inputChannels2 = 1;
8216
8217     // Defines the tensor descriptors.
8218     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
8219     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
8220     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
8221
8222     // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
8223     const float scale = 0.13497836f;
8224     const int32_t offset = -7;
8225
8226     outputTensorInfo.SetQuantizationScale(scale);
8227     outputTensorInfo.SetQuantizationOffset(offset);
8228     inputTensorInfo1.SetQuantizationScale(scale);
8229     inputTensorInfo1.SetQuantizationOffset(offset);
8230     inputTensorInfo2.SetQuantizationScale(scale);
8231     inputTensorInfo2.SetQuantizationOffset(offset);
8232
8233     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
8234
8235     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
8236         {
8237             1, 2, 3,
8238             4, 5, 6,
8239             7, 8, 9,
8240             10, 11, 12,
8241             13, 14, 15,
8242             16, 17, 18,
8243
8244             19, 20, 21,
8245             22, 23, 24,
8246             25, 26, 27,
8247             28, 29, 30,
8248             31, 32, 33,
8249             34, 35, 36,
8250
8251             37, 38, 39,
8252             40, 41, 42,
8253             43, 44, 45,
8254             46, 47, 48,
8255             49, 50, 51,
8256             52, 53, 54,
8257         })
8258     );
8259
8260     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
8261     {
8262         1, 2, 3,
8263         4, 5, 6,
8264         7, 8, 9,
8265         10, 11, 12,
8266         13, 14, 15,
8267         16, 17, 18,
8268
8269         19, 20, 21,
8270         22, 23, 24,
8271         25, 26, 27,
8272         28, 29, 30,
8273         31, 32, 33,
8274         34, 35, 36,
8275     })
8276     );
8277
8278     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
8279     {
8280         37, 38, 39,
8281         40, 41, 42,
8282         43, 44, 45,
8283         46, 47, 48,
8284         49, 50, 51,
8285         52, 53, 54,
8286     })
8287     );
8288
8289     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
8290     armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
8291
8292     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
8293     armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
8294
8295
8296     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
8297
8298     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
8299
8300     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
8301         subTensorsSupported ?
8302             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
8303             workloadFactory.CreateTensorHandle(inputTensorInfo1);
8304
8305     std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
8306         subTensorsSupported ?
8307             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
8308             workloadFactory.CreateTensorHandle(inputTensorInfo2);
8309
8310
8311     armnn::ConcatQueueDescriptor data;
8312     armnn::WorkloadInfo info;
8313     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
8314     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
8315     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
8316
8317     data.m_ViewOrigins.push_back(window1);
8318     data.m_ViewOrigins.push_back(window2);
8319
8320     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
8321
8322     inputHandle1->Allocate();
8323     inputHandle2->Allocate();
8324     outputHandle->Allocate();
8325
8326     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
8327     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
8328
8329     workload->PostAllocationConfigure();
8330     workload->Execute();
8331
8332     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
8333
8334     return ret;
8335 }
8336
8337 LayerTestResult<uint16_t, 3> ConcatUint16Test(
8338         armnn::IWorkloadFactory& workloadFactory,
8339         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8340 {
8341     unsigned int outputWidth = 3;
8342     unsigned int outputHeight = 6;
8343     unsigned int outputChannels = 3;
8344
8345     unsigned int inputWidth1 = 3;
8346     unsigned int inputHeight1 = 6;
8347     unsigned int inputChannels1 = 2;
8348
8349     unsigned int inputWidth2 = 3;
8350     unsigned int inputHeight2 = 6;
8351     unsigned int inputChannels2 = 1;
8352
8353     // Defines the tensor descriptors.
8354     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedSymm16);
8355     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedSymm16);
8356     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedSymm16);
8357
8358     // Arbitrary scale and offsets. They don't really matter as the Concat operator doesn't dequantize/quantize them.
8359     const float scale = 0.13497836f;
8360     const int32_t offset = -7;
8361
8362     outputTensorInfo.SetQuantizationScale(scale);
8363     outputTensorInfo.SetQuantizationOffset(offset);
8364     inputTensorInfo1.SetQuantizationScale(scale);
8365     inputTensorInfo1.SetQuantizationOffset(offset);
8366     inputTensorInfo2.SetQuantizationScale(scale);
8367     inputTensorInfo2.SetQuantizationOffset(offset);
8368
8369     LayerTestResult<uint16_t, 3> ret(outputTensorInfo);
8370
8371     ret.outputExpected = MakeTensor<uint16_t, 3>(outputTensorInfo, std::vector<uint16_t>(
8372     {
8373         1, 2, 3,
8374         4, 5, 6,
8375         7, 8, 9,
8376         10, 11, 12,
8377         13, 14, 15,
8378         16, 17, 18,
8379
8380         19, 20, 21,
8381         22, 23, 24,
8382         25, 26, 27,
8383         28, 29, 30,
8384         31, 32, 33,
8385         34, 35, 36,
8386
8387         37, 38, 39,
8388         40, 41, 42,
8389         43, 44, 45,
8390         46, 47, 48,
8391         49, 50, 51,
8392         52, 53, 54,
8393     }));
8394
8395     auto input1 = MakeTensor<uint16_t, 3>(inputTensorInfo1, std::vector<uint16_t>(
8396     {
8397         1, 2, 3,
8398         4, 5, 6,
8399         7, 8, 9,
8400         10, 11, 12,
8401         13, 14, 15,
8402         16, 17, 18,
8403
8404         19, 20, 21,
8405         22, 23, 24,
8406         25, 26, 27,
8407         28, 29, 30,
8408         31, 32, 33,
8409         34, 35, 36,
8410     }));
8411
8412     auto input2 = MakeTensor<uint16_t, 3>(inputTensorInfo2, std::vector<uint16_t>(
8413     {
8414         37, 38, 39,
8415         40, 41, 42,
8416         43, 44, 45,
8417         46, 47, 48,
8418         49, 50, 51,
8419         52, 53, 54,
8420     }));
8421
8422     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //Extent of the window is defined by size of input[0].
8423     armnn::ConcatQueueDescriptor::ViewOrigin window1(wOrigin1);
8424
8425     std::vector<unsigned int> wOrigin2 = { 2, 0, 0 }; //Extent of the window is defined by size of input[1].
8426     armnn::ConcatQueueDescriptor::ViewOrigin window2(wOrigin2);
8427
8428
8429     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
8430
8431     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
8432
8433     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
8434             subTensorsSupported ?
8435             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
8436             workloadFactory.CreateTensorHandle(inputTensorInfo1);
8437
8438     std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
8439             subTensorsSupported ?
8440             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
8441             workloadFactory.CreateTensorHandle(inputTensorInfo2);
8442
8443
8444     armnn::ConcatQueueDescriptor data;
8445     armnn::WorkloadInfo info;
8446     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
8447     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
8448     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
8449
8450     data.m_ViewOrigins.push_back(window1);
8451     data.m_ViewOrigins.push_back(window2);
8452
8453     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConcat(data, info);
8454
8455     inputHandle1->Allocate();
8456     inputHandle2->Allocate();
8457     outputHandle->Allocate();
8458
8459     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
8460     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
8461
8462     workload->PostAllocationConfigure();
8463     workload->Execute();
8464
8465     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
8466
8467     return ret;
8468 }
8469
8470 namespace
8471 {
8472 template <typename T>
8473 LayerTestResult<T, 4> AdditionQuantizeTestHelper(
8474     armnn::IWorkloadFactory& workloadFactory,
8475     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
8476     const unsigned int shape0[4],
8477     const std::vector<T>& values0,
8478     float scale0,
8479     int32_t offset0,
8480     const unsigned int shape1[4],
8481     const std::vector<T> & values1,
8482     float scale1,
8483     int32_t offset1,
8484     const unsigned int outShape[4],
8485     const std::vector<T> & outValues,
8486     float outScale,
8487     int32_t outOffset)
8488 {
8489     auto dataType = (std::is_same<T, uint8_t>::value ?
8490                      armnn::DataType::QuantisedAsymm8 :
8491                      armnn::DataType::QuantisedSymm16);
8492
8493     armnn::TensorInfo inputTensorInfo0(4, shape0, dataType);
8494     armnn::TensorInfo inputTensorInfo1(4, shape1, dataType);
8495     armnn::TensorInfo outputTensorInfo(4, outShape, dataType);
8496
8497     inputTensorInfo0.SetQuantizationScale(scale0);
8498     inputTensorInfo0.SetQuantizationOffset(offset0);
8499
8500     inputTensorInfo1.SetQuantizationScale(scale1);
8501     inputTensorInfo1.SetQuantizationOffset(offset1);
8502
8503     outputTensorInfo.SetQuantizationScale(outScale);
8504     outputTensorInfo.SetQuantizationOffset(outOffset);
8505
8506     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, values0);
8507     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, values1);
8508
8509     LayerTestResult<T, 4> result(outputTensorInfo);
8510     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outValues);
8511
8512     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
8513     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
8514     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
8515
8516     armnn::AdditionQueueDescriptor data;
8517     armnn::WorkloadInfo info;
8518     AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
8519     AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
8520     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
8521
8522     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
8523
8524     inputHandle0->Allocate();
8525     inputHandle1->Allocate();
8526     outputHandle->Allocate();
8527
8528     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
8529     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
8530
8531     workload->PostAllocationConfigure();
8532     workload->Execute();
8533
8534     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
8535
8536     return result;
8537 }
8538 } // anonymous namespace
8539
8540 LayerTestResult<uint8_t, 4> AdditionUint8Test(
8541     armnn::IWorkloadFactory& workloadFactory,
8542     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8543 {
8544     const unsigned int shape0[] = { 1, 2, 2, 3 };
8545     const unsigned int shape1[] = { 1, 2, 2, 3 };
8546
8547     std::vector<uint8_t> input0(
8548     {
8549         63,  35,  77,  70,  56, 112, //  420, 224,  518,  469,  371, 763
8550         203,  28, 252, 168, 245,  91  // 1400, 175, 1743, 1155, 1694, 616
8551     });
8552
8553     std::vector<uint8_t> input1(
8554     {
8555         21,   7, 175, 231, 175, 210, // 126,   28, 1204, 1596, 1204, 1449
8556         126, 161,  63,  21, 105, 126  // 861, 1106,  420,  126,  714,  861
8557     });
8558
8559     std::vector<uint8_t> output(
8560     {
8561         81,  39, 249, 255, 228, 255, //  546,  252, 1722, 2065(clamped), 1575, 2212(clamped)
8562         255, 186, 255, 186, 255, 214, // 2261(clamped), 1281, 2163(clamped), 1281, 2408(clamped), 1477
8563     });
8564
8565     return AdditionQuantizeTestHelper(workloadFactory,
8566                                       memoryManager,
8567                                       shape0, input0, 7.0f, 3,
8568                                       shape1, input1, 7.0f, 3,
8569                                       shape0, output, 7.0f, 3);
8570 }
8571
8572 LayerTestResult<int16_t, 4> AdditionInt16Test(
8573     armnn::IWorkloadFactory& workloadFactory,
8574     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8575 {
8576     const unsigned int shape0[] = { 1, 2, 2, 3 };
8577     const unsigned int shape1[] = { 1, 2, 2, 3 };
8578
8579     std::vector<int16_t> input0(
8580         {
8581             63,  35,  77,  70,  56, 112, //  441, 245,  539,  490,  392, 184
8582             203,  28, 252, 168, 245,  91  // 1421, 196, 1764, 1176, 1715, 637
8583         });
8584
8585     std::vector<int16_t> input1(
8586         {
8587             21,   7, 175, 231, 175, 210, // 126,   28, 1204, 1596, 1204, 1449
8588             126, 161,  63,  21, 105, 126  // 861, 1106,  420,  126,  714,  861
8589         });
8590
8591     std::vector<int16_t> output(
8592         {
8593             84,  42, 252, 301, 231, 322, //  588,  294, 1764, 2107(clamped), 1617, 2254(clamped)
8594             329, 189, 315, 189, 350, 217, // 2303(clamped), 1323, 2205(clamped), 1323, 2450(clamped), 1519
8595         });
8596
8597     return AdditionQuantizeTestHelper(workloadFactory,
8598                                       memoryManager,
8599                                       shape0, input0, 7.0f, 0,
8600                                       shape1, input1, 7.0f, 0,
8601                                       shape0, output, 7.0f, 0);
8602 }
8603
8604 namespace
8605 {
8606 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
8607 LayerTestResult<T, 4> MultiplicationQuantizeTestHelper(
8608     armnn::IWorkloadFactory& workloadFactory,
8609     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
8610     const unsigned int shape0[4],
8611     const std::vector<T> & values0,
8612     float scale0,
8613     int32_t offset0,
8614     const unsigned int shape1[4],
8615     const std::vector<T> & values1,
8616     float scale1,
8617     int32_t offset1,
8618     const unsigned int outShape[4],
8619     const std::vector<T> & outValues,
8620     float outScale,
8621     int32_t outOffset)
8622 {
8623     armnn::TensorInfo inputTensorInfo0(4, shape0, ArmnnType);
8624     armnn::TensorInfo inputTensorInfo1(4, shape1, ArmnnType);
8625     armnn::TensorInfo outputTensorInfo(4, outShape, ArmnnType);
8626
8627     inputTensorInfo0.SetQuantizationScale(scale0);
8628     inputTensorInfo0.SetQuantizationOffset(offset0);
8629
8630     inputTensorInfo1.SetQuantizationScale(scale1);
8631     inputTensorInfo1.SetQuantizationOffset(offset1);
8632
8633     outputTensorInfo.SetQuantizationScale(outScale);
8634     outputTensorInfo.SetQuantizationOffset(outOffset);
8635
8636     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, values0);
8637     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, values1);
8638
8639     LayerTestResult<T, 4> result(outputTensorInfo);
8640     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outValues);
8641
8642     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
8643     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
8644     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
8645
8646     armnn::MultiplicationQueueDescriptor data;
8647     armnn::WorkloadInfo info;
8648     AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
8649     AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
8650     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
8651
8652     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
8653
8654     inputHandle0->Allocate();
8655     inputHandle1->Allocate();
8656     outputHandle->Allocate();
8657
8658     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
8659     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
8660
8661     workload->PostAllocationConfigure();
8662     workload->Execute();
8663
8664     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
8665
8666     return result;
8667 }
8668 } // anonymous namespace
8669
8670 LayerTestResult<uint8_t, 4> MultiplicationUint8Test(
8671     armnn::IWorkloadFactory& workloadFactory,
8672     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8673 {
8674     unsigned int batchSize = 1;
8675     unsigned int channels = 2;
8676     unsigned int height = 2;
8677     unsigned int width = 3;
8678     const unsigned int shape[] = { batchSize, channels, height, width };
8679
8680     // See dequantized values to the right.
8681     std::vector<uint8_t> input0({
8682          62,  37,   3, 172,  13, 111, // 244, 144,   8, 684,  48, 440,
8683         188,  20,  73,  31,  23,  31  // 748,  76, 288, 120,  88, 120
8684     });
8685
8686     // See dequantized values to the right.
8687     std::vector<uint8_t> input1({
8688         126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747,
8689          48, 115, 151,  79,  78,  97  // 150, 351, 459, 243, 240, 297
8690     });
8691
8692     // See dequantized values to the right.
8693     std::vector<uint8_t> output(
8694     {
8695          64,  72,   0, 255,   8, 236, //  93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680,
8696          77,  15,  92,  16,  10,  21, // 112200,  26676,        132192,           29160, 21120,  35640
8697     });
8698
8699     // Scale/offset chosen to have output values out of range.
8700     return MultiplicationQuantizeTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
8701                                                                               memoryManager,
8702                                                                               shape,
8703                                                                               input0,
8704                                                                               4.0f,
8705                                                                               1,
8706                                                                               shape,
8707                                                                               input1,
8708                                                                               3.0f,
8709                                                                               -2,
8710                                                                               shape,
8711                                                                               output,
8712                                                                               1366.255f,
8713                                                                               -5);
8714 }
8715
8716 LayerTestResult<uint8_t, 4> MultiplicationBroadcast1ElementUint8Test(
8717     armnn::IWorkloadFactory& workloadFactory,
8718     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8719 {
8720     const unsigned int shape0[] = { 1, 2, 2, 3 };
8721     const unsigned int shape1[] = { 1, 1, 1, 1 };
8722
8723     std::vector<uint8_t> input0({
8724         1, 2, 3,    4,  5,  6,
8725         7, 8, 9,   10, 11, 12
8726     });
8727
8728     std::vector<uint8_t> input1({2});
8729
8730     std::vector<uint8_t> output({
8731         2,  4,   6,     8, 10, 12,
8732         14, 16, 18,    20, 22, 24
8733     });
8734
8735     return MultiplicationQuantizeTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
8736                                                                               memoryManager,
8737                                                                               shape0,
8738                                                                               input0,
8739                                                                               1.0f,
8740                                                                               0,
8741                                                                               shape1,
8742                                                                               input1,
8743                                                                               1.0f,
8744                                                                               0,
8745                                                                               shape0,
8746                                                                               output,
8747                                                                               1.0f,
8748                                                                               0);
8749 }
8750
8751 LayerTestResult<uint8_t, 4> MultiplicationBroadcast1DVectorUint8Test(
8752     armnn::IWorkloadFactory& workloadFactory,
8753     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8754 {
8755     const unsigned int shape0[] = { 1, 2, 2, 3 };
8756     const unsigned int shape1[] = { 1, 1, 1, 3 };
8757
8758     std::vector<uint8_t> input0({
8759         1, 2, 3,    4,  5,  6,
8760         7, 8, 9,   10, 11, 12
8761     });
8762
8763     std::vector<uint8_t> input1({1, 2, 3});
8764
8765     std::vector<uint8_t> output({
8766         1,  4,   9,     4, 10, 18,
8767         7, 16,  27,    10, 22, 36
8768     });
8769
8770     return MultiplicationQuantizeTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
8771                                                                               memoryManager,
8772                                                                               shape0,
8773                                                                               input0,
8774                                                                               1.0f,
8775                                                                               0,
8776                                                                               shape1,
8777                                                                               input1,
8778                                                                               1.0f,
8779                                                                               0,
8780                                                                               shape0,
8781                                                                               output,
8782                                                                               1.0f,
8783                                                                               0);
8784 }
8785
8786 LayerTestResult<int16_t, 4> MultiplicationInt16Test(
8787     armnn::IWorkloadFactory& workloadFactory,
8788     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8789 {
8790     const unsigned int shape[] = { 1, 2, 2, 3 };
8791
8792     std::vector<int16_t> input0(
8793     {
8794         6,   7,  8,  9, 10, 11,
8795         12, 13, 14, 15, 16, 17
8796     });
8797
8798     std::vector<int16_t> input1(
8799     {
8800         1, 2, 3,  4,  5,  6,
8801         7, 8, 9, 10, 11, 12
8802     });
8803
8804     std::vector<int16_t> output(
8805     {
8806         6,   14,  24,  36,  50,  66,
8807         84, 104, 126, 150, 176, 204
8808     });
8809
8810     return MultiplicationQuantizeTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
8811                                                                               memoryManager,
8812                                                                               shape,
8813                                                                               input0,
8814                                                                               1.0f,
8815                                                                               0,
8816                                                                               shape,
8817                                                                               input1,
8818                                                                               1.0f,
8819                                                                               0,
8820                                                                               shape,
8821                                                                               output,
8822                                                                               1.0f,
8823                                                                               0);
8824 }
8825
8826 LayerTestResult<int16_t, 4> MultiplicationBroadcast1ElementInt16Test(
8827     armnn::IWorkloadFactory& workloadFactory,
8828     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8829 {
8830     const unsigned int shape0[] = { 1, 2, 2, 3 };
8831     const unsigned int shape1[] = { 1, 1, 1, 1 };
8832
8833     std::vector<int16_t> input0(
8834     {
8835         1, 2, 3,  4,  5,  6,
8836         7, 8, 9, 10, 11, 12
8837     });
8838
8839     std::vector<int16_t> input1({2});
8840
8841     std::vector<int16_t> output(
8842     {
8843         2,   4,  6,  8, 10, 12,
8844         14, 16, 18, 20, 22, 24
8845     });
8846
8847     return MultiplicationQuantizeTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
8848                                                                               memoryManager,
8849                                                                               shape0,
8850                                                                               input0,
8851                                                                               1.0f,
8852                                                                               0,
8853                                                                               shape1,
8854                                                                               input1,
8855                                                                               1.0f,
8856                                                                               0,
8857                                                                               shape0,
8858                                                                               output,
8859                                                                               1.0f,
8860                                                                               0);
8861 }
8862
8863 LayerTestResult<int16_t, 4> MultiplicationBroadcast1DVectorInt16Test(
8864     armnn::IWorkloadFactory& workloadFactory,
8865     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8866 {
8867     const unsigned int shape0[] = { 1, 2, 2, 3 };
8868     const unsigned int shape1[] = { 1, 1, 1, 3 };
8869
8870     std::vector<int16_t> input0(
8871     {
8872         1, 2, 3,  4,  5,  6,
8873         7, 8, 9, 10, 11, 12
8874     });
8875
8876     std::vector<int16_t> input1({1, 2, 3});
8877
8878     std::vector<int16_t> output(
8879     {
8880         1,  4,  9,  4, 10, 18,
8881         7, 16, 27, 10, 22, 36
8882     });
8883
8884     return MultiplicationQuantizeTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
8885                                                                               memoryManager,
8886                                                                               shape0,
8887                                                                               input0,
8888                                                                               1.0f,
8889                                                                               0,
8890                                                                               shape1,
8891                                                                               input1,
8892                                                                               1.0f,
8893                                                                               0,
8894                                                                               shape0,
8895                                                                               output,
8896                                                                               1.0f,
8897                                                                               0);
8898 }
8899
8900 namespace
8901 {
8902 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
8903 LayerTestResult<T, 4> SubtractionTestHelper(
8904     armnn::IWorkloadFactory& workloadFactory,
8905     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
8906     const unsigned int shape0[4],
8907     const std::vector<T>& values0,
8908     float scale0,
8909     int32_t offset0,
8910     const unsigned int shape1[4],
8911     const std::vector<T> & values1,
8912     float scale1,
8913     int32_t offset1,
8914     const unsigned int outShape[4],
8915     const std::vector<T> & outValues,
8916     float outScale,
8917     int32_t outOffset)
8918 {
8919     armnn::TensorInfo inputTensorInfo0(4, shape0, ArmnnType);
8920     armnn::TensorInfo inputTensorInfo1(4, shape1, ArmnnType);
8921     armnn::TensorInfo outputTensorInfo(4, outShape, ArmnnType);
8922
8923     inputTensorInfo0.SetQuantizationScale(scale0);
8924     inputTensorInfo0.SetQuantizationOffset(offset0);
8925
8926     inputTensorInfo1.SetQuantizationScale(scale1);
8927     inputTensorInfo1.SetQuantizationOffset(offset1);
8928
8929     outputTensorInfo.SetQuantizationScale(outScale);
8930     outputTensorInfo.SetQuantizationOffset(outOffset);
8931
8932     auto input0 = MakeTensor<T, 4>(inputTensorInfo0, values0);
8933     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, values1);
8934
8935     LayerTestResult<T, 4> result(outputTensorInfo);
8936     result.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outValues);
8937
8938     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
8939     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
8940     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
8941
8942     armnn::SubtractionQueueDescriptor data;
8943     armnn::WorkloadInfo info;
8944     AddInputToWorkload(data,  info, inputTensorInfo0, inputHandle0.get());
8945     AddInputToWorkload(data,  info, inputTensorInfo1, inputHandle1.get());
8946     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
8947
8948     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSubtraction(data, info);
8949
8950     inputHandle0->Allocate();
8951     inputHandle1->Allocate();
8952     outputHandle->Allocate();
8953
8954     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
8955     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
8956
8957     workload->PostAllocationConfigure();
8958     workload->Execute();
8959
8960     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
8961
8962     return result;
8963 }
8964 } // anonymous namespace
8965
8966 LayerTestResult<uint8_t, 4> SubtractionUint8Test(
8967     armnn::IWorkloadFactory& workloadFactory,
8968     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8969 {
8970     const unsigned int shape0[] = { 1, 1, 2, 2 };
8971     const unsigned int shape1[] = { 1, 1, 2, 2 };
8972
8973     std::vector<uint8_t> input0({ 10, 12, 14, 16 });
8974     std::vector<uint8_t> input1({ 1, 2, 1, 2 });
8975     std::vector<uint8_t> output({ 3, 3, 5, 5 });
8976
8977     return SubtractionTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
8978                                                                    memoryManager,
8979                                                                    shape0, input0, 0.5f, 2,
8980                                                                    shape1, input1, 1.0f, 0,
8981                                                                    shape0, output, 1.0f, 0);
8982 }
8983
8984 LayerTestResult<uint8_t, 4> SubtractionBroadcast1ElementUint8Test(
8985     armnn::IWorkloadFactory& workloadFactory,
8986     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
8987 {
8988     const unsigned int shape0[] = { 1, 1, 2, 2 };
8989     const unsigned int shape1[] = { 1, 1, 1, 1 };
8990
8991     std::vector<uint8_t> input0({ 10, 12, 14, 16 });
8992     std::vector<uint8_t> input1({ 2 });
8993     std::vector<uint8_t> output({ 5, 6, 7, 8 });
8994
8995     return SubtractionTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
8996                                                                    memoryManager,
8997                                                                    shape0, input0, 0.5f, 2,
8998                                                                    shape1, input1, 1.0f, 0,
8999                                                                    shape0, output, 1.0f, 3);
9000 }
9001
9002 LayerTestResult<uint8_t, 4> SubtractionBroadcastUint8Test(
9003     armnn::IWorkloadFactory& workloadFactory,
9004     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9005 {
9006     const unsigned int shape0[] = { 1, 1, 2, 2 };
9007     const unsigned int shape1[] = { 1, 1, 2, 1 };
9008
9009     std::vector<uint8_t> input0({ 10, 12, 14, 16 });
9010     std::vector<uint8_t> input1({ 2, 1 });
9011     std::vector<uint8_t> output({ 8, 11, 12, 15 });
9012
9013     return SubtractionTestHelper<armnn::DataType::QuantisedAsymm8>(workloadFactory,
9014                                                                    memoryManager,
9015                                                                    shape0, input0, 1.0f, 0,
9016                                                                    shape1, input1, 1.0f, 0,
9017                                                                    shape0, output, 1.0f, 0);
9018 }
9019
9020 LayerTestResult<float, 4> SubtractionTest(
9021     armnn::IWorkloadFactory& workloadFactory,
9022     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9023 {
9024     const unsigned int shape0[] = { 1, 1, 2, 2 };
9025     const unsigned int shape1[] = { 1, 1, 2, 2 };
9026
9027     std::vector<float> input0({ 1,  2, 3, 4 });
9028     std::vector<float> input1({ 1, -1, 0, 2 });
9029     std::vector<float> output({ 0,  3, 3, 2 });
9030
9031     return SubtractionTestHelper<armnn::DataType::Float32>(workloadFactory,
9032                                                            memoryManager,
9033                                                            shape0, input0, 1.0f, 0,
9034                                                            shape1, input1, 1.0f, 0,
9035                                                            shape0, output, 1.0f, 0);
9036 }
9037
9038 LayerTestResult<float, 4> SubtractionBroadcast1ElementTest(
9039     armnn::IWorkloadFactory& workloadFactory,
9040     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9041 {
9042     const unsigned int shape0[] = { 1, 1, 2, 2 };
9043     const unsigned int shape1[] = { 1, 1, 1, 1 };
9044
9045     std::vector<float> input0({ 1,  2, 3, 4 });
9046     std::vector<float> input1({ 10 });
9047     std::vector<float> output({ -9,  -8, -7, -6 });
9048
9049     return SubtractionTestHelper<armnn::DataType::Float32>(workloadFactory,
9050                                                            memoryManager,
9051                                                            shape0, input0, 1.0f, 0,
9052                                                            shape1, input1, 1.0f, 0,
9053                                                            shape0, output, 1.0f, 0);
9054 }
9055
9056 LayerTestResult<float, 4> SubtractionBroadcastTest(
9057     armnn::IWorkloadFactory& workloadFactory,
9058     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9059 {
9060     const unsigned int shape0[] = { 1, 1, 2, 2 };
9061     const unsigned int shape1[] = { 1, 1, 1, 2 };
9062
9063     std::vector<float> input0({ 1,  2, 3, 4 });
9064     std::vector<float> input1({ 10, -5 });
9065     std::vector<float> output({ -9,  7, -7, 9 });
9066
9067     return SubtractionTestHelper<armnn::DataType::Float32>(workloadFactory,
9068                                                            memoryManager,
9069                                                            shape0, input0, 1.0f, 0,
9070                                                            shape1, input1, 1.0f, 0,
9071                                                            shape0, output, 1.0f, 0);
9072 }
9073
9074 LayerTestResult<int16_t, 4> SubtractionInt16Test(
9075     armnn::IWorkloadFactory& workloadFactory,
9076     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9077 {
9078     const unsigned int shape0[] = { 1, 1, 2, 2 };
9079     const unsigned int shape1[] = { 1, 1, 2, 2 };
9080
9081     std::vector<int16_t> input0({ 10, 12, 14, 16 });
9082     std::vector<int16_t> input1({ 1, 2, 1, 2 });
9083     std::vector<int16_t> output({ 3, 3, 5, 5 });
9084
9085     return SubtractionTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
9086                                                                    memoryManager,
9087                                                                    shape0, input0, 0.5f, 0,
9088                                                                    shape1, input1, 1.0f, 0,
9089                                                                    shape0, output, 1.0f, 0);
9090 }
9091
9092 LayerTestResult<int16_t, 4> SubtractionBroadcast1ElementInt16Test(
9093     armnn::IWorkloadFactory& workloadFactory,
9094     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9095 {
9096     const unsigned int shape0[] = { 1, 1, 2, 2 };
9097     const unsigned int shape1[] = { 1, 1, 1, 1 };
9098
9099     std::vector<int16_t> input0({ 10, 12, 14, 16 });
9100     std::vector<int16_t> input1({ 2 });
9101     std::vector<int16_t> output({ 3, 4, 5, 6 });
9102
9103     return SubtractionTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
9104                                                                    memoryManager,
9105                                                                    shape0, input0, 0.5f, 0,
9106                                                                    shape1, input1, 1.0f, 0,
9107                                                                    shape0, output, 1.0f, 0);
9108 }
9109
9110 LayerTestResult<int16_t, 4> SubtractionBroadcastInt16Test(
9111     armnn::IWorkloadFactory& workloadFactory,
9112     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9113 {
9114     const unsigned int shape0[] = { 1, 1, 2, 2 };
9115     const unsigned int shape1[] = { 1, 1, 2, 1 };
9116
9117     std::vector<int16_t> input0({ 10, 12, 14, 16 });
9118     std::vector<int16_t> input1({ 2, 1 });
9119     std::vector<int16_t> output({ 8, 11, 12, 15 });
9120
9121     return SubtractionTestHelper<armnn::DataType::QuantisedSymm16>(workloadFactory,
9122                                                                    memoryManager,
9123                                                                    shape0, input0, 1.0f, 0,
9124                                                                    shape1, input1, 1.0f, 0,
9125                                                                    shape0, output, 1.0f, 0);
9126 }
9127
9128 LayerTestResult<float, 4> BatchNormTest(
9129     armnn::IWorkloadFactory& workloadFactory,
9130     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9131 {
9132     // BatchSize: 1
9133     // Channels: 2
9134     // Height: 3
9135     // Width: 2
9136
9137     const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
9138     std::vector<float> inputValues
9139     {
9140         // Batch 0, Channel 0, Height (3) x Width (2)
9141          1.f, 4.f,
9142          4.f, 2.f,
9143          1.f, 6.f,
9144
9145         // Batch 0, Channel 1, Height (3) x Width (2)
9146          1.f, 1.f,
9147          4.f, 1.f,
9148         -2.f, 4.f
9149     };
9150     std::vector<float> expectedOutputValues
9151     {
9152         // Batch 0, Channel 0, Height (3) x Width (2)
9153         1.f, 4.f,
9154         4.f, 2.f,
9155         1.f, 6.f,
9156
9157         // Batch 0, Channel 1, Height (3) x Width (2)
9158         3.f, 3.f,
9159         4.f, 3.f,
9160         2.f, 4.f
9161     };
9162
9163     return BatchNormTestImpl<armnn::DataType::Float32>(
9164         workloadFactory, memoryManager,
9165         inputOutputShape, inputValues, expectedOutputValues,
9166         0.f, 0, armnn::DataLayout::NCHW);
9167 }
9168
9169 LayerTestResult<float, 4> BatchNormNhwcTest(
9170     armnn::IWorkloadFactory& workloadFactory,
9171     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9172 {
9173     // BatchSize: 1
9174     // Height: 3
9175     // Width: 2
9176     // Channels: 2
9177
9178     const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
9179     std::vector<float> inputValues
9180     {
9181         // Batch 0, Height 0, Width (2) x Channel (2)
9182         1.f,  1.f,
9183         4.f,  1.f,
9184
9185         // Batch 0, Height 1, Width (2) x Channel (2)
9186         4.f,  4.f,
9187         2.f,  1.f,
9188
9189         // Batch 0, Height 2, Width (2) x Channel (2)
9190         1.f, -2.f,
9191         6.f,  4.f
9192     };
9193     std::vector<float> expectedOutputValues
9194     {
9195         // Batch 0, Height 0, Width (2) x Channel (2)
9196         1.f, 3.f,
9197         4.f, 3.f,
9198
9199         // Batch 0, Height 1, Width (2) x Channel (2)
9200         4.f, 4.f,
9201         2.f, 3.f,
9202
9203         // Batch 0, Height 2, Width (2) x Channel (2)
9204         1.f, 2.f,
9205         6.f, 4.f
9206     };
9207
9208     return BatchNormTestImpl<armnn::DataType::Float32>(
9209         workloadFactory, memoryManager,
9210         inputOutputShape, inputValues, expectedOutputValues,
9211         0.f, 0, armnn::DataLayout::NHWC);
9212 }
9213
9214 LayerTestResult<uint8_t, 4> BatchNormUint8Test(
9215     armnn::IWorkloadFactory& workloadFactory,
9216     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9217 {
9218     // BatchSize: 1
9219     // Channels: 2
9220     // Height: 3
9221     // Width: 2
9222
9223     const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
9224     std::vector<float> inputValues
9225     {
9226         // Batch 0, Channel 0, Height (3) x Width (2)
9227          1.f, 4.f,
9228          4.f, 2.f,
9229          1.f, 6.f,
9230
9231         // Batch 0, Channel 1, Height (3) x Width (2)
9232          1.f, 1.f,
9233          4.f, 1.f,
9234         -2.f, 4.f
9235     };
9236     std::vector<float> expectedOutputValues
9237     {
9238         // Batch 0, Channel 0, Height (3) x Width (2)
9239         1.f, 4.f,
9240         4.f, 2.f,
9241         1.f, 6.f,
9242
9243         // Batch 0, Channel 1, Height (3) x Width (2)
9244         3.f, 3.f,
9245         4.f, 3.f,
9246         2.f, 4.f
9247     };
9248
9249     return BatchNormTestImpl<armnn::DataType::QuantisedAsymm8>(
9250         workloadFactory, memoryManager,
9251         inputOutputShape, inputValues, expectedOutputValues,
9252         1.f/20.f, 50, armnn::DataLayout::NCHW);
9253 }
9254
9255 LayerTestResult<uint8_t, 4> BatchNormUint8NhwcTest(
9256     armnn::IWorkloadFactory& workloadFactory,
9257     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9258 {
9259     // BatchSize: 1
9260     // Height: 3
9261     // Width: 2
9262     // Channels: 2
9263
9264     const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
9265     std::vector<float> inputValues
9266     {
9267         // Batch 0, Height 0, Width (2) x Channel (2)
9268         1.f,  1.f,
9269         4.f,  1.f,
9270
9271         // Batch 0, Height 1, Width (2) x Channel (2)
9272         4.f,  4.f,
9273         2.f,  1.f,
9274
9275         // Batch 0, Height 2, Width (2) x Channel (2)
9276         1.f, -2.f,
9277         6.f,  4.f
9278     };
9279     std::vector<float> expectedOutputValues
9280     {
9281         // Batch 0, Height 0, Width (2) x Channel (2)
9282         1.f, 3.f,
9283         4.f, 3.f,
9284
9285         // Batch 0, Height 1, Width (2) x Channel (2)
9286         4.f, 4.f,
9287         2.f, 3.f,
9288
9289         // Batch 0, Height 2, Width (2) x Channel (2)
9290         1.f, 2.f,
9291         6.f, 4.f
9292     };
9293
9294     return BatchNormTestImpl<armnn::DataType::QuantisedAsymm8>
9295         (workloadFactory, memoryManager,
9296          inputOutputShape, inputValues, expectedOutputValues,
9297          1.f/20.f, 50, armnn::DataLayout::NHWC);
9298 }
9299
9300 LayerTestResult<int16_t, 4> BatchNormInt16Test(
9301     armnn::IWorkloadFactory& workloadFactory,
9302     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9303 {
9304     // BatchSize: 1
9305     // Channels: 2
9306     // Height: 3
9307     // Width: 2
9308
9309     const armnn::TensorShape inputOutputShape{ 1, 2, 3, 2 };
9310     std::vector<float> inputValues
9311     {
9312         // Batch 0, Channel 0, Height (3) x Width (2)
9313          1.f, 4.f,
9314          4.f, 2.f,
9315          1.f, 6.f,
9316
9317         // Batch 0, Channel 1, Height (3) x Width (2)
9318          1.f, 1.f,
9319          4.f, 1.f,
9320         -2.f, 4.f
9321     };
9322     std::vector<float> expectedOutputValues
9323     {
9324         // Batch 0, Channel 0, Height (3) x Width (2)
9325         1.f, 4.f,
9326         4.f, 2.f,
9327         1.f, 6.f,
9328
9329         // Batch 0, Channel 1, Height (3) x Width (2)
9330         3.f, 3.f,
9331         4.f, 3.f,
9332         2.f, 4.f
9333     };
9334
9335     return BatchNormTestImpl<armnn::DataType::QuantisedSymm16>(
9336         workloadFactory, memoryManager,
9337         inputOutputShape, inputValues, expectedOutputValues,
9338         1.f/20.f, 50, armnn::DataLayout::NCHW);
9339 }
9340
9341 LayerTestResult<int16_t, 4> BatchNormInt16NhwcTest(
9342     armnn::IWorkloadFactory& workloadFactory,
9343     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9344 {
9345     // BatchSize: 1
9346     // Height: 3
9347     // Width: 2
9348     // Channels: 2
9349
9350     const armnn::TensorShape inputOutputShape{ 1, 3, 2, 2 };
9351     std::vector<float> inputValues
9352     {
9353         // Batch 0, Height 0, Width (2) x Channel (2)
9354         1.f,  1.f,
9355         4.f,  1.f,
9356
9357         // Batch 0, Height 1, Width (2) x Channel (2)
9358         4.f,  4.f,
9359         2.f,  1.f,
9360
9361         // Batch 0, Height 2, Width (2) x Channel (2)
9362         1.f, -2.f,
9363         6.f,  4.f
9364     };
9365     std::vector<float> expectedOutputValues
9366     {
9367         // Batch 0, Height 0, Width (2) x Channel (2)
9368         1.f, 3.f,
9369         4.f, 3.f,
9370
9371         // Batch 0, Height 1, Width (2) x Channel (2)
9372         4.f, 4.f,
9373         2.f, 3.f,
9374
9375         // Batch 0, Height 2, Width (2) x Channel (2)
9376         1.f, 2.f,
9377         6.f, 4.f
9378     };
9379
9380     return BatchNormTestImpl<armnn::DataType::QuantisedSymm16>
9381         (workloadFactory, memoryManager,
9382          inputOutputShape, inputValues, expectedOutputValues,
9383          1.f/20.f, 50, armnn::DataLayout::NHWC);
9384 }
9385
9386 LayerTestResult<uint8_t, 4> ConstantUint8CustomQuantizationScaleAndOffsetTest(
9387     armnn::IWorkloadFactory& workloadFactory,
9388     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9389 {
9390     return ConstantTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 2e-6f, 1);
9391 }
9392
9393 LayerTestResult<int16_t, 4> ConstantInt16CustomQuantizationScaleAndOffsetTest(
9394     armnn::IWorkloadFactory& workloadFactory,
9395     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9396 {
9397     return ConstantTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, 2e-6f, 1);
9398 }
9399
9400 LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(
9401     armnn::IWorkloadFactory& workloadFactory,
9402     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9403 {
9404     return Concatenation1dTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9405 }
9406
9407 LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(
9408     armnn::IWorkloadFactory& workloadFactory,
9409     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9410 {
9411     return Concatenation2dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9412 }
9413
9414 LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(
9415     armnn::IWorkloadFactory& workloadFactory,
9416     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9417 {
9418     return Concatenation2dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9419 }
9420
9421 LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(
9422     armnn::IWorkloadFactory& workloadFactory,
9423     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9424 {
9425     return Concatenation2dDim0DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
9426         workloadFactory, memoryManager, 0.5f, -1);
9427 }
9428
9429 LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(
9430     armnn::IWorkloadFactory& workloadFactory,
9431     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9432 {
9433     return Concatenation2dDim1DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
9434         workloadFactory, memoryManager, 0.5f, -1);
9435 }
9436
9437 LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(
9438     armnn::IWorkloadFactory& workloadFactory,
9439     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9440 {
9441     return Concatenation3dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9442 }
9443
9444 LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(
9445     armnn::IWorkloadFactory& workloadFactory,
9446     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9447 {
9448     return Concatenation3dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9449 }
9450
9451 LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(
9452     armnn::IWorkloadFactory& workloadFactory,
9453     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9454     bool useSubtensor)
9455 {
9456     return Concatenation3dDim2TestImpl<armnn::DataType::QuantisedAsymm8>(
9457         workloadFactory, memoryManager, useSubtensor, 0.5f, -1);
9458 }
9459
9460 LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(
9461     armnn::IWorkloadFactory& workloadFactory,
9462     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9463 {
9464     return Concatenation3dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9465 }
9466
9467 LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(
9468     armnn::IWorkloadFactory& workloadFactory,
9469     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9470 {
9471     return Concatenation3dDim1DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
9472         workloadFactory, memoryManager, 0.5f, -1);
9473 }
9474
9475 LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(
9476     armnn::IWorkloadFactory& workloadFactory,
9477     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9478     bool useSubtensor)
9479 {
9480     return Concatenation3dDim2DiffInputDimsTestImpl<armnn::DataType::QuantisedAsymm8>(
9481         workloadFactory, memoryManager, useSubtensor, 0.5f, -1);
9482 }
9483
9484 LayerTestResult<uint8_t, 4> Concatenation4dDim0Uint8Test(
9485     armnn::IWorkloadFactory& workloadFactory,
9486     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9487 {
9488     return Concatenation4dDim0TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9489 }
9490
9491 LayerTestResult<uint8_t, 4> Concatenation4dDim1Uint8Test(
9492     armnn::IWorkloadFactory& workloadFactory,
9493     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9494 {
9495     return Concatenation4dDim1TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9496 }
9497
9498 LayerTestResult<uint8_t, 4> Concatenation4dDim2Uint8Test(
9499     armnn::IWorkloadFactory& workloadFactory,
9500     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9501 {
9502     return Concatenation4dDim2TestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, 0.5f, -1);
9503 }
9504
9505 LayerTestResult<uint8_t, 4> Concatenation4dDim3Uint8Test(
9506     armnn::IWorkloadFactory& workloadFactory,
9507     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, bool useSubtensor)
9508 {
9509     return Concatenation4dDim3TestImpl<armnn::DataType::QuantisedAsymm8>(
9510         workloadFactory, memoryManager, 0.5f, -1, useSubtensor);
9511 }
9512
9513 LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim0Uint8Test(
9514     armnn::IWorkloadFactory& workloadFactory,
9515     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9516 {
9517     return Concatenation4dDiffShapeDim0TestImpl<armnn::DataType::QuantisedAsymm8>(
9518         workloadFactory, memoryManager, 0.5f, -1);
9519 }
9520
9521 LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim1Uint8Test(
9522     armnn::IWorkloadFactory& workloadFactory,
9523     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9524 {
9525     return Concatenation4dDiffShapeDim1TestImpl<armnn::DataType::QuantisedAsymm8>(
9526         workloadFactory, memoryManager, 0.5f, -1);
9527 }
9528
9529 LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim2Uint8Test(
9530     armnn::IWorkloadFactory& workloadFactory,
9531     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9532 {
9533     return Concatenation4dDiffShapeDim2TestImpl<armnn::DataType::QuantisedAsymm8>(
9534         workloadFactory, memoryManager, 0.5f, -1);
9535 }
9536
9537 LayerTestResult<uint8_t, 4> Concatenation4dDiffShapeDim3Uint8Test(
9538     armnn::IWorkloadFactory& workloadFactory,
9539     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9540     bool useSubtensor)
9541 {
9542     return Concatenation4dDiffShapeDim3TestImpl<armnn::DataType::QuantisedAsymm8>(
9543         workloadFactory, memoryManager, 0.5f, -1, useSubtensor);
9544 }
9545
9546 LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(
9547     armnn::IWorkloadFactory& workloadFactory,
9548     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9549     bool forceNoPadding)
9550 {
9551     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::Float32>(
9552         workloadFactory, memoryManager, forceNoPadding);
9553 }
9554
9555 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(
9556     armnn::IWorkloadFactory& workloadFactory,
9557     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9558     bool forceNoPadding)
9559 {
9560     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::QuantisedAsymm8>(
9561         workloadFactory, memoryManager, forceNoPadding, 3.0f, -5);
9562 }
9563
9564 LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Int16Test(
9565     armnn::IWorkloadFactory& workloadFactory,
9566     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9567     bool forceNoPadding)
9568 {
9569     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<armnn::DataType::QuantisedSymm16>(
9570             workloadFactory, memoryManager, forceNoPadding);
9571 }
9572
9573 LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(
9574     armnn::IWorkloadFactory& workloadFactory,
9575     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9576     bool forceNoPadding)
9577 {
9578     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::Float32>(
9579         workloadFactory, memoryManager, forceNoPadding);
9580 }
9581
9582 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(
9583     armnn::IWorkloadFactory& workloadFactory,
9584     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9585     bool forceNoPadding)
9586 {
9587     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::QuantisedAsymm8>(
9588         workloadFactory, memoryManager, forceNoPadding, 0.1f, 128);
9589 }
9590
9591 LayerTestResult<int16_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Int16Test(
9592     armnn::IWorkloadFactory& workloadFactory,
9593     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9594     bool forceNoPadding)
9595 {
9596     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<armnn::DataType::QuantisedSymm16>(
9597             workloadFactory, memoryManager, forceNoPadding);
9598 }
9599
9600 LayerTestResult<float, 4> SimpleMaxPooling2dTest(
9601     armnn::IWorkloadFactory& workloadFactory,
9602     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9603     const armnn::DataLayout dataLayout)
9604 {
9605     return SimpleMaxPooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
9606 }
9607
9608 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dUint8Test(
9609     armnn::IWorkloadFactory& workloadFactory,
9610     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9611     const armnn::DataLayout dataLayout)
9612 {
9613     return SimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, dataLayout);
9614 }
9615
9616 LayerTestResult<int16_t, 4> SimpleMaxPooling2dInt16Test(
9617     armnn::IWorkloadFactory& workloadFactory,
9618     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9619     const armnn::DataLayout dataLayout)
9620 {
9621     return SimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, dataLayout);
9622 }
9623 LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(
9624     armnn::IWorkloadFactory& workloadFactory,
9625     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9626 {
9627     return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9628 }
9629
9630 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(
9631     armnn::IWorkloadFactory& workloadFactory,
9632     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9633 {
9634     return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
9635             workloadFactory, memoryManager, 1.0f, -5);
9636 }
9637
9638 LayerTestResult<int16_t, 4> IgnorePaddingSimpleMaxPooling2dInt16Test(
9639     armnn::IWorkloadFactory& workloadFactory,
9640     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9641 {
9642     return IgnorePaddingSimpleMaxPooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
9643             workloadFactory, memoryManager);
9644 }
9645
9646 LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(
9647     armnn::IWorkloadFactory& workloadFactory,
9648     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9649 {
9650     return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9651 }
9652
9653 LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(
9654     armnn::IWorkloadFactory& workloadFactory,
9655     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9656 {
9657     return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(
9658             workloadFactory, memoryManager, 1.0f, -5);
9659 }
9660
9661 LayerTestResult<int16_t, 4> IgnorePaddingMaxPooling2dSize3Int16Test(
9662     armnn::IWorkloadFactory& workloadFactory,
9663     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9664 {
9665     return IgnorePaddingMaxPooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(
9666             workloadFactory, memoryManager);
9667 }
9668
9669 LayerTestResult<float, 4> SimpleAveragePooling2dTest(
9670     armnn::IWorkloadFactory& workloadFactory,
9671     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9672     const armnn::DataLayout dataLayout)
9673 {
9674     return SimpleAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
9675 }
9676
9677 LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(
9678     armnn::IWorkloadFactory& workloadFactory,
9679     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9680     const armnn::DataLayout dataLayout)
9681 {
9682     return SimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
9683         workloadFactory, memoryManager, dataLayout, 0.5, -1);
9684 }
9685
9686 LayerTestResult<int16_t, 4> SimpleAveragePooling2dInt16Test(
9687     armnn::IWorkloadFactory& workloadFactory,
9688     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9689     const armnn::DataLayout dataLayout)
9690 {
9691     return SimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
9692             workloadFactory, memoryManager, dataLayout);
9693 }
9694
9695 LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3x2Stride2x2Test(
9696     armnn::IWorkloadFactory& workloadFactory,
9697     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9698     bool forceNoPadding)
9699 {
9700     return IgnorePaddingAveragePooling2dSize3x2Stride2x2TestCommon<armnn::DataType::Float32>(
9701         workloadFactory, memoryManager, forceNoPadding);
9702 }
9703
9704 LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(
9705     armnn::IWorkloadFactory& workloadFactory,
9706     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9707 {
9708     return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9709 }
9710
9711 LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(
9712     armnn::IWorkloadFactory& workloadFactory,
9713     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9714 {
9715     return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
9716         workloadFactory, memoryManager, 0.5, -1);
9717 }
9718
9719 LayerTestResult<int16_t, 4> LargeTensorsAveragePooling2dInt16Test(
9720     armnn::IWorkloadFactory& workloadFactory,
9721     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9722 {
9723     return LargeTensorsAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
9724             workloadFactory, memoryManager);
9725 }
9726 LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(
9727     armnn::IWorkloadFactory& workloadFactory,
9728     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9729 {
9730     return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9731 }
9732
9733 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(
9734     armnn::IWorkloadFactory& workloadFactory,
9735     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9736 {
9737     return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
9738             workloadFactory, memoryManager);
9739 }
9740
9741 LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dInt16Test(
9742     armnn::IWorkloadFactory& workloadFactory,
9743     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9744 {
9745     return IgnorePaddingSimpleAveragePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
9746             workloadFactory, memoryManager);
9747 }
9748
9749 LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(
9750     armnn::IWorkloadFactory& workloadFactory,
9751     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9752 {
9753     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::Float32>(
9754             workloadFactory, memoryManager);
9755 }
9756
9757 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test(
9758     armnn::IWorkloadFactory& workloadFactory,
9759     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9760 {
9761     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::QuantisedAsymm8>(
9762             workloadFactory, memoryManager);
9763 }
9764
9765 LayerTestResult<int16_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingInt16Test(
9766     armnn::IWorkloadFactory& workloadFactory,
9767     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9768 {
9769     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<armnn::DataType::QuantisedSymm16>(
9770             workloadFactory, memoryManager);
9771 }
9772
9773 LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(
9774     armnn::IWorkloadFactory& workloadFactory,
9775     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9776 {
9777     return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9778 }
9779
9780 LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(
9781     armnn::IWorkloadFactory& workloadFactory,
9782     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9783 {
9784     return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(
9785             workloadFactory, memoryManager);
9786 }
9787
9788 LayerTestResult<int16_t, 4> IgnorePaddingAveragePooling2dSize3Int16Test(
9789     armnn::IWorkloadFactory& workloadFactory,
9790     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9791 {
9792     return IgnorePaddingAveragePooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(
9793             workloadFactory, memoryManager);
9794 }
9795
9796 LayerTestResult<float, 4> SimpleL2Pooling2dTest(
9797     armnn::IWorkloadFactory& workloadFactory,
9798     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9799     const armnn::DataLayout dataLayout)
9800 {
9801     return SimpleL2Pooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, dataLayout);
9802 }
9803
9804 LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(
9805     armnn::IWorkloadFactory& workloadFactory,
9806     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9807     const armnn::DataLayout dataLayout)
9808 {
9809     return SimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager, dataLayout);
9810 }
9811
9812 LayerTestResult<int16_t, 4> SimpleL2Pooling2dInt16Test(
9813     armnn::IWorkloadFactory& workloadFactory,
9814     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9815     const armnn::DataLayout dataLayout)
9816 {
9817     return SimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager, dataLayout);
9818 }
9819
9820 LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(
9821     armnn::IWorkloadFactory& workloadFactory,
9822     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9823 {
9824     return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9825 }
9826
9827 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(
9828     armnn::IWorkloadFactory& workloadFactory,
9829     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9830 {
9831     return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
9832 }
9833
9834 LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride1Int16Test(
9835     armnn::IWorkloadFactory& workloadFactory,
9836     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9837 {
9838     return L2Pooling2dSize3Stride1TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
9839 }
9840
9841 LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(
9842     armnn::IWorkloadFactory& workloadFactory,
9843     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9844 {
9845     return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9846 }
9847
9848 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(
9849     armnn::IWorkloadFactory& workloadFactory,
9850     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9851 {
9852     return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
9853 }
9854
9855 LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride3Int16Test(
9856     armnn::IWorkloadFactory& workloadFactory,
9857     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9858 {
9859     return L2Pooling2dSize3Stride3TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
9860 }
9861 LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(
9862     armnn::IWorkloadFactory& workloadFactory,
9863     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9864 {
9865     return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9866 }
9867
9868 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(
9869     armnn::IWorkloadFactory& workloadFactory,
9870     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9871 {
9872     return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
9873 }
9874
9875 LayerTestResult<int16_t, 4> L2Pooling2dSize3Stride4Int16Test(
9876     armnn::IWorkloadFactory& workloadFactory,
9877     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9878 {
9879     return L2Pooling2dSize3Stride4TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
9880 }
9881
9882 LayerTestResult<float, 4> L2Pooling2dSize7Test(
9883     armnn::IWorkloadFactory& workloadFactory,
9884     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9885 {
9886     return L2Pooling2dSize7TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9887 }
9888
9889 LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(
9890     armnn::IWorkloadFactory& workloadFactory,
9891     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9892 {
9893     return L2Pooling2dSize7TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
9894 }
9895
9896 LayerTestResult<int16_t, 4> L2Pooling2dSize7Int16Test(
9897     armnn::IWorkloadFactory& workloadFactory,
9898     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9899 {
9900     return L2Pooling2dSize7TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
9901 }
9902
9903 LayerTestResult<float, 4> L2Pooling2dSize9Test(
9904     armnn::IWorkloadFactory& workloadFactory,
9905     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9906 {
9907     return L2Pooling2dSize9TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9908 }
9909
9910 LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(
9911     armnn::IWorkloadFactory& workloadFactory,
9912     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9913 {
9914     return L2Pooling2dSize9TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
9915 }
9916
9917 LayerTestResult<int16_t, 4> L2Pooling2dSize9Int16Test(
9918     armnn::IWorkloadFactory& workloadFactory,
9919     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9920 {
9921     return L2Pooling2dSize9TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
9922 }
9923 LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(
9924     armnn::IWorkloadFactory& workloadFactory,
9925     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9926 {
9927     return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9928 }
9929
9930 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(
9931     armnn::IWorkloadFactory& workloadFactory,
9932     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9933 {
9934     return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
9935 }
9936
9937 LayerTestResult<int16_t, 4> IgnorePaddingSimpleL2Pooling2dInt16Test(
9938     armnn::IWorkloadFactory& workloadFactory,
9939     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9940 {
9941     return IgnorePaddingSimpleL2Pooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
9942 }
9943
9944 LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(
9945     armnn::IWorkloadFactory& workloadFactory,
9946     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9947 {
9948     return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9949 }
9950
9951 LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(
9952     armnn::IWorkloadFactory& workloadFactory,
9953     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9954 {
9955     return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
9956 }
9957
9958 LayerTestResult<int16_t, 4> IgnorePaddingL2Pooling2dSize3Int16Test(
9959     armnn::IWorkloadFactory& workloadFactory,
9960     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9961 {
9962     return IgnorePaddingL2Pooling2dSize3TestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
9963 }
9964
9965 LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(
9966     armnn::IWorkloadFactory& workloadFactory,
9967     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9968 {
9969     return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager);
9970 }
9971
9972 LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(
9973     armnn::IWorkloadFactory& workloadFactory,
9974     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9975 {
9976     return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
9977 }
9978
9979 LayerTestResult<int16_t, 4> AsymmetricNonSquarePooling2dInt16Test(
9980     armnn::IWorkloadFactory& workloadFactory,
9981     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
9982 {
9983     return AsymmetricNonSquarePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
9984 }
9985
9986 LayerTestResult<float, 4> ComparePooling2dTest(
9987     armnn::IWorkloadFactory& workloadFactory,
9988     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9989     armnn::IWorkloadFactory& refWorkloadFactory,
9990     armnn::PoolingAlgorithm  poolingType)
9991 {
9992     return ComparePooling2dTestCommon<armnn::DataType::Float32>(
9993         workloadFactory, memoryManager, refWorkloadFactory, poolingType);
9994 }
9995
9996 LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(
9997     armnn::IWorkloadFactory& workloadFactory,
9998     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
9999     armnn::IWorkloadFactory& refWorkloadFactory,
10000     armnn::PoolingAlgorithm  poolingType)
10001 {
10002     return ComparePooling2dTestCommon<armnn::DataType::QuantisedAsymm8>(
10003         workloadFactory, memoryManager, refWorkloadFactory, poolingType, 0.1f, 128);
10004 }
10005
10006 LayerTestResult<int16_t, 4> ComparePooling2dInt16Test(
10007     armnn::IWorkloadFactory& workloadFactory,
10008     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
10009     armnn::IWorkloadFactory& refWorkloadFactory,
10010     armnn::PoolingAlgorithm  poolingType)
10011 {
10012     return ComparePooling2dTestCommon<armnn::DataType::QuantisedSymm16>(
10013             workloadFactory, memoryManager, refWorkloadFactory, poolingType);
10014 }
10015
10016 LayerTestResult<float, 2> FullyConnectedLargeTest(
10017     armnn::IWorkloadFactory& workloadFactory,
10018     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
10019     bool transposeWeights)
10020 {
10021     return FullyConnectedLargeTestCommon<armnn::DataType::Float32>(workloadFactory, memoryManager, transposeWeights);
10022 }
10023
10024 LayerTestResult<float, 4> AdditionAfterMaxPoolTest(
10025     armnn::IWorkloadFactory& workloadFactory,
10026     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10027 {
10028     // Create Initial Tensor
10029     // 1, 2, 3
10030     // 4, 5, 6
10031     // 7, 8, 9
10032
10033     armnn::TensorInfo poolingInputTensorInfo({ 1, 1, 3, 3}, armnn::DataType::Float32);
10034     armnn::TensorInfo poolingOutputTensorInfo({ 1, 1, 2, 2}, armnn::DataType::Float32);
10035
10036     boost::multi_array<float, 4> poolingInput = MakeTensor<float,4>(poolingInputTensorInfo,
10037                                                             {1, 2, 3,
10038                                                              4, 5, 6,
10039                                                              7, 8, 9
10040                                                             });
10041
10042     std::unique_ptr<armnn::ITensorHandle> poolingInputHandle =
10043             workloadFactory.CreateTensorHandle(poolingInputTensorInfo);
10044     std::unique_ptr<armnn::ITensorHandle> poolingOutputHandle =
10045             workloadFactory.CreateTensorHandle(poolingOutputTensorInfo);
10046
10047     // Apply MaxPool poolSize = 1x1, stride=2x2
10048     // Result =
10049     // 1, 3
10050     // 7, 9
10051     armnn::Pooling2dDescriptor descriptor;
10052     descriptor.m_PoolHeight = 1;
10053     descriptor.m_PoolWidth = 1;
10054     descriptor.m_StrideX = 2;
10055     descriptor.m_StrideY = 2;
10056     descriptor.m_PoolType = armnn::PoolingAlgorithm::Max;
10057
10058     armnn::Pooling2dQueueDescriptor queueDescriptor;
10059     queueDescriptor.m_Parameters = descriptor;
10060     armnn::WorkloadInfo workloadInfo;
10061     AddInputToWorkload(queueDescriptor, workloadInfo, poolingInputTensorInfo, poolingInputHandle.get());
10062     AddOutputToWorkload(queueDescriptor, workloadInfo, poolingOutputTensorInfo, poolingOutputHandle.get());
10063
10064     // Create the MaxPool
10065     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreatePooling2d(queueDescriptor, workloadInfo);
10066
10067     //LayerTestResult<float, 4> result(poolingOutputTensorInfo);
10068     auto shape( GetTensorShapeAsArray<4>(poolingOutputTensorInfo));
10069     boost::multi_array<float, 4> resultMaxPool;
10070     resultMaxPool.resize(shape);
10071
10072
10073     // Create addition with another tensor the same size
10074     // This would be the result to apply a Conv2d with kernel ones(2) and stride 1x1
10075     // with the initial tensor.
10076     // 12, 16
10077     // 24, 28
10078
10079     armnn::TensorInfo addInputTensorInfo({ 1,1,2,2}, armnn::DataType::Float32);
10080     armnn::TensorInfo addOutputTensorInfo({ 1,1,2,2}, armnn::DataType::Float32);
10081
10082     boost::multi_array<float, 4> addInput = MakeTensor<float,4>(addInputTensorInfo,
10083                                                                     {12, 16,
10084                                                                      24, 28,
10085                                                                     });
10086
10087     // Expected output tensor after MaxPool and Addition.
10088     LayerTestResult<float,4> addRet(addOutputTensorInfo);
10089     addRet.outputExpected = MakeTensor<float, 4>(addOutputTensorInfo, std::vector<float>(
10090             {
10091                     13, 19,
10092                     31, 37
10093             }));
10094
10095     std::unique_ptr<armnn::ITensorHandle> addInputHandle = workloadFactory.CreateTensorHandle(addInputTensorInfo);
10096     std::unique_ptr<armnn::ITensorHandle> addOutputHandle = workloadFactory.CreateTensorHandle(addOutputTensorInfo);
10097
10098     armnn::AdditionQueueDescriptor data;
10099     armnn::WorkloadInfo info;
10100
10101     // Add the output of the MaxPool and the new tensor
10102     AddInputToWorkload(data, info, poolingOutputTensorInfo, poolingOutputHandle.get());
10103     AddInputToWorkload(data, info, addInputTensorInfo, addInputHandle.get());
10104     AddOutputToWorkload(data, info, addOutputTensorInfo, addOutputHandle.get());
10105
10106     std::unique_ptr<armnn::IWorkload> addWorkload = workloadFactory.CreateAddition(data, info);
10107
10108     poolingInputHandle->Allocate();
10109     poolingOutputHandle->Allocate();
10110     addInputHandle->Allocate();
10111     addOutputHandle->Allocate();
10112
10113     CopyDataToITensorHandle(poolingInputHandle.get(), &poolingInput[0][0][0][0]);
10114     CopyDataFromITensorHandle(&resultMaxPool[0][0][0][0], poolingOutputHandle.get());
10115
10116     CopyDataToITensorHandle(poolingOutputHandle.get(), &resultMaxPool[0][0][0][0]);
10117     CopyDataToITensorHandle(addInputHandle.get(), &addInput[0][0][0][0]);
10118
10119     workload->PostAllocationConfigure();
10120     workload->Execute();
10121     addWorkload->PostAllocationConfigure();
10122     addWorkload->Execute();
10123
10124     CopyDataFromITensorHandle(&addRet.output[0][0][0][0], addOutputHandle.get());
10125
10126     return addRet;
10127 }
10128
10129 LayerTestResult<float, 4> SpaceToBatchNdSimpleFloat32Test(
10130     armnn::IWorkloadFactory& workloadFactory,
10131     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10132 {
10133     return SpaceToBatchNdSimpleTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10134 }
10135
10136 LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsFloat32Test(
10137     armnn::IWorkloadFactory& workloadFactory,
10138     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10139 {
10140     return SpaceToBatchNdMultiChannelsTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10141 }
10142
10143 LayerTestResult<float, 4> SpaceToBatchNdMultiBlockFloat32Test(
10144     armnn::IWorkloadFactory& workloadFactory,
10145     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10146 {
10147     return SpaceToBatchNdMultiBlockTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10148 }
10149
10150 LayerTestResult<float, 4> SpaceToBatchNdPaddingFloat32Test(
10151     armnn::IWorkloadFactory& workloadFactory,
10152     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10153 {
10154     return SpaceToBatchNdPaddingTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10155 }
10156
10157 LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleUint8Test(
10158     armnn::IWorkloadFactory& workloadFactory,
10159     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10160 {
10161     return SpaceToBatchNdSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10162 }
10163
10164 LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsUint8Test(
10165     armnn::IWorkloadFactory& workloadFactory,
10166     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10167 {
10168     return SpaceToBatchNdMultiChannelsTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10169 }
10170
10171 LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockUint8Test(
10172     armnn::IWorkloadFactory& workloadFactory,
10173     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10174 {
10175     return SpaceToBatchNdMultiBlockTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10176 }
10177
10178 LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingUint8Test(
10179     armnn::IWorkloadFactory& workloadFactory,
10180     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10181 {
10182     return SpaceToBatchNdPaddingTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10183 }
10184
10185 LayerTestResult<float, 4> SpaceToBatchNdSimpleNHWCFloat32Test(
10186     armnn::IWorkloadFactory& workloadFactory,
10187     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10188 {
10189     return SpaceToBatchNdSimpleNHWCTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10190 }
10191
10192 LayerTestResult<float, 4> SpaceToBatchNdMultiChannelsNHWCFloat32Test(
10193     armnn::IWorkloadFactory& workloadFactory,
10194     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10195 {
10196     return SpaceToBatchNdMultiChannelsNHWCTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10197 }
10198
10199 LayerTestResult<float, 4> SpaceToBatchNdMultiBlockNHWCFloat32Test(
10200     armnn::IWorkloadFactory& workloadFactory,
10201     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10202 {
10203     return SpaceToBatchNdMultiBlockNHWCTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10204 }
10205
10206 LayerTestResult<float, 4> SpaceToBatchNdPaddingNHWCFloat32Test(
10207     armnn::IWorkloadFactory& workloadFactory,
10208     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10209 {
10210     return SpaceToBatchNdPaddingNHWCTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10211 }
10212
10213 LayerTestResult<uint8_t, 4> SpaceToBatchNdSimpleNHWCUint8Test(
10214     armnn::IWorkloadFactory& workloadFactory,
10215     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10216 {
10217     return SpaceToBatchNdSimpleNHWCTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10218 }
10219
10220 LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsNHWCUint8Test(
10221     armnn::IWorkloadFactory& workloadFactory,
10222     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10223 {
10224     return SpaceToBatchNdMultiChannelsNHWCTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10225 }
10226
10227 LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockNHWCUint8Test(
10228     armnn::IWorkloadFactory& workloadFactory,
10229     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10230 {
10231     return SpaceToBatchNdMultiBlockNHWCTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10232 }
10233
10234 LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingNHWCUint8Test(
10235     armnn::IWorkloadFactory& workloadFactory,
10236     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10237 {
10238     return SpaceToBatchNdPaddingNHWCTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10239 }
10240
10241 LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleUint16Test(
10242         armnn::IWorkloadFactory& workloadFactory,
10243         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10244 {
10245     return SpaceToBatchNdSimpleTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10246 }
10247
10248 LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsUint16Test(
10249         armnn::IWorkloadFactory& workloadFactory,
10250         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10251 {
10252     return SpaceToBatchNdMultiChannelsTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10253 }
10254
10255 LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockUint16Test(
10256         armnn::IWorkloadFactory& workloadFactory,
10257         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10258 {
10259     return SpaceToBatchNdMultiBlockTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10260 }
10261
10262 LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingUint16Test(
10263         armnn::IWorkloadFactory& workloadFactory,
10264         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10265 {
10266     return SpaceToBatchNdPaddingTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10267 }
10268
10269 LayerTestResult<int16_t, 4> SpaceToBatchNdSimpleNHWCUint16Test(
10270         armnn::IWorkloadFactory& workloadFactory,
10271         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10272 {
10273     return SpaceToBatchNdSimpleNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10274 }
10275
10276 LayerTestResult<int16_t, 4> SpaceToBatchNdMultiChannelsNHWCUint16Test(
10277         armnn::IWorkloadFactory& workloadFactory,
10278         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10279 {
10280     return SpaceToBatchNdMultiChannelsNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10281 }
10282
10283 LayerTestResult<int16_t, 4> SpaceToBatchNdMultiBlockNHWCUint16Test(
10284         armnn::IWorkloadFactory& workloadFactory,
10285         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10286 {
10287     return SpaceToBatchNdMultiBlockNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10288 }
10289
10290 LayerTestResult<int16_t, 4> SpaceToBatchNdPaddingNHWCUint16Test(
10291         armnn::IWorkloadFactory& workloadFactory,
10292         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10293 {
10294     return SpaceToBatchNdPaddingNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10295 }
10296
10297 LayerTestResult<uint8_t, 4> SpaceToDepthNHWCAsymmQ8Test(
10298     armnn::IWorkloadFactory& workloadFactory,
10299     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10300 {
10301     return SpaceToDepthSimpleTest1<armnn::DataType::QuantisedAsymm8>(
10302         workloadFactory,
10303         memoryManager);
10304 }
10305
10306 LayerTestResult<uint8_t, 4> SpaceToDepthNCHWAsymmQ8Test(
10307     armnn::IWorkloadFactory& workloadFactory,
10308     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10309 {
10310     return SpaceToDepthSimpleTest1<armnn::DataType::QuantisedAsymm8>(
10311         workloadFactory,
10312         memoryManager,
10313         armnn::DataLayout::NCHW);
10314 }
10315
10316 LayerTestResult<float, 4> SpaceToDepthNHWCFloat32Test1(
10317     armnn::IWorkloadFactory& workloadFactory,
10318     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10319 {
10320     return SpaceToDepthSimpleTest1<armnn::DataType::Float32>(
10321         workloadFactory,
10322         memoryManager);
10323 }
10324
10325 LayerTestResult<float, 4> SpaceToDepthNCHWFloat32Test1(
10326     armnn::IWorkloadFactory& workloadFactory,
10327     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10328 {
10329     return SpaceToDepthSimpleTest1<armnn::DataType::Float32>(
10330         workloadFactory,
10331         memoryManager,
10332         armnn::DataLayout::NCHW);
10333 }
10334
10335 LayerTestResult<float, 4> SpaceToDepthNHWCFloat32Test2(
10336     armnn::IWorkloadFactory& workloadFactory,
10337     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10338 {
10339     return SpaceToDepthSimpleTest2<armnn::DataType::Float32>(
10340         workloadFactory,
10341         memoryManager);
10342 }
10343
10344 LayerTestResult<float, 4> SpaceToDepthNCHWFloat32Test2(
10345     armnn::IWorkloadFactory& workloadFactory,
10346     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10347 {
10348     return SpaceToDepthSimpleTest2<armnn::DataType::Float32>(
10349         workloadFactory,
10350         memoryManager,
10351         armnn::DataLayout::NCHW);
10352 }
10353
10354 LayerTestResult<int16_t, 4> SpaceToDepthNHWCQSymm16Test(
10355     armnn::IWorkloadFactory& workloadFactory,
10356     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10357 {
10358     return SpaceToDepthSimpleTest2<armnn::DataType::QuantisedSymm16>(
10359         workloadFactory,
10360         memoryManager);
10361 }
10362
10363 LayerTestResult<int16_t, 4> SpaceToDepthNCHWQSymm16Test(
10364     armnn::IWorkloadFactory& workloadFactory,
10365     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10366 {
10367     return SpaceToDepthSimpleTest2<armnn::DataType::QuantisedSymm16>(
10368         workloadFactory,
10369         memoryManager,
10370         armnn::DataLayout::NCHW);
10371 }
10372
10373 namespace {
10374
10375 } // anonymous namespace
10376
10377 LayerTestResult<float, 4> StridedSlice4DFloat32Test(
10378     armnn::IWorkloadFactory& workloadFactory,
10379     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10380 {
10381     return StridedSlice4DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10382 }
10383
10384 LayerTestResult<float, 4> StridedSlice4DReverseFloat32Test(
10385     armnn::IWorkloadFactory& workloadFactory,
10386     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10387 {
10388     return StridedSlice4DReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10389 }
10390
10391 LayerTestResult<float, 4> StridedSliceSimpleStrideFloat32Test(
10392     armnn::IWorkloadFactory& workloadFactory,
10393     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10394 {
10395     return StridedSliceSimpleStrideTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10396 }
10397
10398 LayerTestResult<float, 4> StridedSliceSimpleRangeMaskFloat32Test(
10399     armnn::IWorkloadFactory& workloadFactory,
10400     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10401 {
10402     return StridedSliceSimpleRangeMaskTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10403 }
10404
10405 LayerTestResult<float, 2> StridedSliceShrinkAxisMaskFloat32Test(
10406     armnn::IWorkloadFactory& workloadFactory,
10407     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10408 {
10409     return StridedSliceShrinkAxisMaskTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10410 }
10411
10412 LayerTestResult<float, 3> StridedSlice3DFloat32Test(
10413     armnn::IWorkloadFactory& workloadFactory,
10414     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10415 {
10416     return StridedSlice3DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10417 }
10418
10419 LayerTestResult<float, 3> StridedSlice3DReverseFloat32Test(
10420     armnn::IWorkloadFactory& workloadFactory,
10421     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10422 {
10423     return StridedSlice3DReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10424 }
10425
10426 LayerTestResult<float, 2> StridedSlice2DFloat32Test(
10427     armnn::IWorkloadFactory& workloadFactory,
10428     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10429 {
10430     return StridedSlice2DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10431 }
10432
10433 LayerTestResult<float, 2> StridedSlice2DReverseFloat32Test(
10434     armnn::IWorkloadFactory& workloadFactory,
10435     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10436 {
10437     return StridedSlice2DReverseTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10438 }
10439
10440 LayerTestResult<uint8_t, 4> StridedSlice4DUint8Test(
10441     armnn::IWorkloadFactory& workloadFactory,
10442     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10443 {
10444     return StridedSlice4DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10445 }
10446
10447 LayerTestResult<uint8_t, 4> StridedSlice4DReverseUint8Test(
10448     armnn::IWorkloadFactory& workloadFactory,
10449     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10450 {
10451     return StridedSlice4DReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10452 }
10453
10454 LayerTestResult<uint8_t, 4> StridedSliceSimpleStrideUint8Test(
10455     armnn::IWorkloadFactory& workloadFactory,
10456     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10457 {
10458     return StridedSliceSimpleStrideTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10459 }
10460
10461 LayerTestResult<uint8_t, 4> StridedSliceSimpleRangeMaskUint8Test(
10462     armnn::IWorkloadFactory& workloadFactory,
10463     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10464 {
10465     return StridedSliceSimpleRangeMaskTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10466 }
10467
10468 LayerTestResult<uint8_t, 2> StridedSliceShrinkAxisMaskUint8Test(
10469     armnn::IWorkloadFactory& workloadFactory,
10470     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10471 {
10472     return StridedSliceShrinkAxisMaskTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10473 }
10474
10475 LayerTestResult<uint8_t, 3> StridedSlice3DUint8Test(
10476     armnn::IWorkloadFactory& workloadFactory,
10477     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10478 {
10479     return StridedSlice3DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10480 }
10481
10482 LayerTestResult<uint8_t, 3> StridedSlice3DReverseUint8Test(
10483     armnn::IWorkloadFactory& workloadFactory,
10484     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10485 {
10486     return StridedSlice3DReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10487 }
10488
10489 LayerTestResult<uint8_t, 2> StridedSlice2DUint8Test(
10490     armnn::IWorkloadFactory& workloadFactory,
10491     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10492 {
10493     return StridedSlice2DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10494 }
10495
10496 LayerTestResult<uint8_t, 2> StridedSlice2DReverseUint8Test(
10497     armnn::IWorkloadFactory& workloadFactory,
10498     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10499 {
10500     return StridedSlice2DReverseTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10501 }
10502
10503 LayerTestResult<int16_t, 4> StridedSlice4DInt16Test(
10504     armnn::IWorkloadFactory& workloadFactory,
10505     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10506 {
10507     return StridedSlice4DTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10508 }
10509
10510 LayerTestResult<int16_t, 4> StridedSlice4DReverseInt16Test(
10511     armnn::IWorkloadFactory& workloadFactory,
10512     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10513 {
10514     return StridedSlice4DReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10515 }
10516
10517 LayerTestResult<int16_t, 4> StridedSliceSimpleStrideInt16Test(
10518     armnn::IWorkloadFactory& workloadFactory,
10519     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10520 {
10521     return StridedSliceSimpleStrideTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10522 }
10523
10524 LayerTestResult<int16_t, 4> StridedSliceSimpleRangeMaskInt16Test(
10525     armnn::IWorkloadFactory& workloadFactory,
10526     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10527 {
10528     return StridedSliceSimpleRangeMaskTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10529 }
10530
10531 LayerTestResult<int16_t, 2> StridedSliceShrinkAxisMaskInt16Test(
10532     armnn::IWorkloadFactory& workloadFactory,
10533     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10534 {
10535     return StridedSliceShrinkAxisMaskTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10536 }
10537
10538 LayerTestResult<int16_t, 3> StridedSlice3DInt16Test(
10539     armnn::IWorkloadFactory& workloadFactory,
10540     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10541 {
10542     return StridedSlice3DTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10543 }
10544
10545 LayerTestResult<int16_t, 3> StridedSlice3DReverseInt16Test(
10546     armnn::IWorkloadFactory& workloadFactory,
10547     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10548 {
10549     return StridedSlice3DReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10550 }
10551
10552 LayerTestResult<int16_t, 2> StridedSlice2DInt16Test(
10553     armnn::IWorkloadFactory& workloadFactory,
10554     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10555 {
10556     return StridedSlice2DTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10557 }
10558
10559 LayerTestResult<int16_t, 2> StridedSlice2DReverseInt16Test(
10560     armnn::IWorkloadFactory& workloadFactory,
10561     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10562 {
10563     return StridedSlice2DReverseTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10564 }
10565
10566 LayerTestResult<float, 4> Debug4DFloat32Test(
10567     armnn::IWorkloadFactory& workloadFactory,
10568     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10569 {
10570     return Debug4DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10571 }
10572
10573 LayerTestResult<float, 3> Debug3DFloat32Test(
10574     armnn::IWorkloadFactory& workloadFactory,
10575     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10576 {
10577     return Debug3DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10578 }
10579
10580 LayerTestResult<float, 2> Debug2DFloat32Test(
10581     armnn::IWorkloadFactory& workloadFactory,
10582     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10583 {
10584     return Debug2DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10585 }
10586
10587 LayerTestResult<float, 1> Debug1DFloat32Test(
10588     armnn::IWorkloadFactory& workloadFactory,
10589     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10590 {
10591     return Debug1DTest<armnn::DataType::Float32>(workloadFactory, memoryManager);
10592 }
10593
10594 LayerTestResult<uint8_t, 4> Debug4DUint8Test(
10595     armnn::IWorkloadFactory& workloadFactory,
10596     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10597 {
10598     return Debug4DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10599 }
10600
10601 LayerTestResult<uint8_t, 3> Debug3DUint8Test(
10602     armnn::IWorkloadFactory& workloadFactory,
10603     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10604 {
10605     return Debug3DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10606 }
10607
10608 LayerTestResult<uint8_t, 2> Debug2DUint8Test(
10609     armnn::IWorkloadFactory& workloadFactory,
10610     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10611 {
10612     return Debug2DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10613 }
10614
10615 LayerTestResult<uint8_t, 1> Debug1DUint8Test(
10616     armnn::IWorkloadFactory& workloadFactory,
10617     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10618 {
10619     return Debug1DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10620 }
10621
10622 LayerTestResult<float, 1> Gather1DParamsFloatTest(
10623     armnn::IWorkloadFactory& workloadFactory,
10624     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10625 {
10626     return Gather1DParamsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
10627 }
10628
10629 LayerTestResult<uint8_t, 1> Gather1DParamsUint8Test(
10630     armnn::IWorkloadFactory& workloadFactory,
10631     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10632 {
10633     return Gather1DParamsTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10634 }
10635
10636 LayerTestResult<int16_t, 1> Gather1DParamsInt16Test(
10637         armnn::IWorkloadFactory& workloadFactory,
10638         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10639 {
10640     return Gather1DParamsTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10641 }
10642
10643 LayerTestResult<float, 2> GatherMultiDimParamsFloatTest(
10644     armnn::IWorkloadFactory& workloadFactory,
10645     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10646 {
10647     return GatherMultiDimParamsTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
10648 }
10649
10650 LayerTestResult<uint8_t, 2> GatherMultiDimParamsUint8Test(
10651     armnn::IWorkloadFactory& workloadFactory,
10652     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10653 {
10654     return GatherMultiDimParamsTestImpl<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10655 }
10656
10657 LayerTestResult<int16_t, 2> GatherMultiDimParamsInt16Test(
10658         armnn::IWorkloadFactory& workloadFactory,
10659         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10660 {
10661     return GatherMultiDimParamsTestImpl<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10662 }
10663
10664 LayerTestResult<float, 4> GatherMultiDimParamsMultiDimIndicesFloatTest(
10665     armnn::IWorkloadFactory& workloadFactory,
10666     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10667 {
10668     return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::Float32>(workloadFactory, memoryManager);
10669 }
10670
10671 LayerTestResult<uint8_t, 4> GatherMultiDimParamsMultiDimIndicesUint8Test(
10672     armnn::IWorkloadFactory& workloadFactory,
10673     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10674 {
10675     return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::QuantisedAsymm8>(
10676         workloadFactory, memoryManager);
10677 }
10678
10679 LayerTestResult<int16_t, 4> GatherMultiDimParamsMultiDimIndicesInt16Test(
10680         armnn::IWorkloadFactory& workloadFactory,
10681         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10682 {
10683     return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::QuantisedSymm16>(
10684             workloadFactory, memoryManager);
10685 }
10686
10687 LayerTestResult<float, 4> DequantizeSimpleUint8Test(
10688     armnn::IWorkloadFactory& workloadFactory,
10689     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10690 {
10691     return DequantizeSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10692 }
10693
10694 LayerTestResult<float, 4> DequantizeOffsetUint8Test(
10695     armnn::IWorkloadFactory& workloadFactory,
10696     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10697 {
10698     return DequantizeOffsetTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10699 }
10700
10701 LayerTestResult<float, 4> DequantizeSimpleInt16Test(
10702     armnn::IWorkloadFactory& workloadFactory,
10703     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10704 {
10705     return DequantizeSimpleTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10706 }
10707
10708 LayerTestResult<uint8_t, 4> QuantizeSimpleUint8Test(
10709     armnn::IWorkloadFactory& workloadFactory,
10710     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10711 {
10712     return QuantizeSimpleTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10713 }
10714
10715 LayerTestResult<uint8_t, 4> QuantizeClampUint8Test(
10716     armnn::IWorkloadFactory& workloadFactory,
10717     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10718 {
10719     return QuantizeClampTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
10720 }
10721
10722 LayerTestResult<int16_t, 4> QuantizeClampInt16Test(
10723     armnn::IWorkloadFactory& workloadFactory,
10724     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
10725 {
10726     return QuantizeClampTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
10727 }