Release 18.02
[platform/upstream/armnn.git] / src / armnn / backends / test / LayerTests.cpp
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // See LICENSE file in the project root for full license information.
4 //
5 #include "LayerTests.hpp"
6
7 #include "test/TensorHelpers.hpp"
8 #include "TensorCopyUtils.hpp"
9
10 #include <boost/test/unit_test.hpp>
11
12 #include "armnn/LayerSupport.hpp"
13
14 #include "backends/CpuTensorHandle.hpp"
15 #include "backends/WorkloadFactory.hpp"
16
17 #ifdef ARMCOMPUTECL_ENABLED
18 #include "backends/ClTensorHandle.hpp"
19 #include "backends/ArmComputeTensorUtils.hpp"
20 #endif
21
22 #include <algorithm>
23 #include <boost/cast.hpp>
24
25 #include "WorkloadTestUtils.hpp"
26 #include "Conv2dTestImpl.hpp"
27 #include "BatchNormTestImpl.hpp"
28 #include "ActivationTestImpl.hpp"
29 #include "Pooling2dTestImpl.hpp"
30 #include "ReshapeTestImpl.hpp"
31 #include "FullyConnectedTestImpl.hpp"
32 #include "SplitterTestImpl.hpp"
33 #include "SoftmaxTestImpl.hpp"
34 #include "NormTestImpl.hpp"
35 #include "PermuteTestImpl.hpp"
36
37 // 3-channel 16x8 image used as common input data for a number of Conv2d tests
38 static std::vector<float> ConvInput3x8x16({
39     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
40     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
41     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
42     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
43     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
44     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
45     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
46     0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
47     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
56     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
63 });
64
65 // 2-channel bias used by a number of Conv2d tests
66 static std::vector<float> Bias2({0, 2});
67
68 // Helper function that returns either Bias2 or an empty vector depending on whether bias is enabled
69 template<typename T>
70 boost::multi_array<T, 1> GetBias2(bool biasEnabled, float qScale, int32_t qOffset)
71 {
72     if(biasEnabled)
73     {
74         armnn::TensorInfo biasDesc({static_cast<unsigned int>(Bias2.size())}, armnn::GetDataType<T>());
75         boost::multi_array<T, 1> bias = MakeTensor<T, 1>(biasDesc, QuantizedVector<T>(qScale, qOffset, Bias2));
76         return bias;
77     }
78     else
79     {
80         return boost::multi_array<T, 1>();
81     }
82 }
83
84 template<typename T>
85 LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& workloadFactory,
86                                                        float                    qScale,
87                                                        int32_t                  qOffset,
88                                                        bool                     biasEnabled)
89 {
90     // Use common single-batch 3-channel 16x8 image
91     armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>());
92     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
93
94     // Use a 2-element batch with 3-channel 3x5 kernels
95     armnn::TensorInfo kernelDesc({2, 3, 5, 3}, armnn::GetDataType<T>());
96     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
97         QuantizedVector<T>(qScale, qOffset, {
98             1, 1, 1,
99             1, -1, 1,
100             1, 1, 1,
101             1, 1, 1,
102             1, 1, 1,
103
104             0, 0, 0,
105             0, 0, 0,
106             0, 0, 0,
107             0, 0, 0,
108             0, 0, 0,
109
110             2, 2, 2,
111             2, 2, 2,
112             2, 2, 2,
113             2, 2, 2,
114             2, 2, 2,
115
116
117             0, 0, 0,
118             0, 0, 0,
119             0, 0, 0,
120             0, 0, 0,
121             0, 0, 0,
122
123             1, 1, 1,
124             1, 1, 1,
125             1, 1, 1,
126             1, 1, 1,
127             1, 1, 1,
128
129             0, 0, 0,
130             0, 0, 0,
131             0, 0, 0,
132             0, 0, 0,
133             0, 0, 0
134         })));
135
136     // Expected output is 2 batch elements of a 1-channel 14x4 image
137     armnn::TensorInfo outputDesc({1, 2, 4, 14}, armnn::GetDataType<T>());
138     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
139         QuantizedVector<T>(qScale, qOffset, {
140             -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
141             -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
142             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
143             -23.5f, -23.5f, -23.5f,
144             -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f, -23.5f,
145             -23.5f, -23.5f, -23.5f,
146
147             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
148             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
149             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
150             5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
151         })));
152
153     return SimpleConvolution2dTestImpl<T>(workloadFactory,
154       input,
155       kernel,
156       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
157       expectedOutput,
158       qScale,
159       qOffset);
160 }
161
162 template<typename T>
163 LayerTestResult<T, 4> SimpleConvolution2d3x3TestCommon(armnn::IWorkloadFactory& workloadFactory,
164                                                        float                    qScale,
165                                                        int32_t                  qOffset,
166                                                        bool                     biasEnabled)
167 {
168     // Use a 3x3 kernel, which exercises ArmCompute's direct convolution path
169
170     // Use common single-batch 3-channel 16x8 image
171     armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>());
172     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, QuantizedVector<T>(qScale, qOffset, ConvInput3x8x16));
173
174     // Use a 2-element batch of 3-channel 3x3 kernels
175     armnn::TensorInfo kernelDesc({2, 3, 3, 3}, armnn::GetDataType<T>());
176     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
177         QuantizedVector<T>(qScale, qOffset, {
178             1, 1, 1,
179             1, -1, 1,
180             1, 1, 1,
181
182             0, 0, 0,
183             0, 0, 0,
184             0, 0, 0,
185
186             2, 2, 2,
187             2, 2, 2,
188             2, 2, 2,
189
190
191             0, 0, 0,
192             0, 0, 0,
193             0, 0, 0,
194
195             1, 1, 1,
196             1, 1, 1,
197             1, 1, 1,
198
199             0, 0, 0,
200             0, 0, 0,
201             0, 0, 0
202         })));
203
204     // Expected output is 1 batch of a 2-channel 14x6 image
205     armnn::TensorInfo outputDesc({1, 2, 6, 14}, armnn::GetDataType<T>());
206     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
207         QuantizedVector<T>(qScale, qOffset, {
208             -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
209             -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
210             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
211             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
212             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
213             -14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,-14.5f,
214
215             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
216             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
219             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220             3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
221         })));
222
223     return SimpleConvolution2dTestImpl<T>(workloadFactory,
224       input,
225       kernel,
226       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
227       expectedOutput,
228       qScale,
229       qOffset);
230 }
231
232 LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory,
233                                                      bool                     biasEnabled)
234 {
235     return SimpleConvolution2d3x5TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled);
236 }
237
238 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory,
239                                                          bool                     biasEnabled)
240 {
241     return SimpleConvolution2d3x5TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled);
242 }
243
244 LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory,
245                                                      bool                     biasEnabled)
246 {
247     return SimpleConvolution2d3x3TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled);
248 }
249
250 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory,
251                                                             bool                     biasEnabled)
252 {
253     return SimpleConvolution2d3x3TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled);
254 }
255
256 template<typename T>
257 LayerTestResult<T, 4> Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon(
258     armnn::IWorkloadFactory& workloadFactory,
259     float                    qScale,
260     int32_t                  qOffset)
261 {
262     // Use a single-batch 1-channel 3x3 image as input
263     armnn::TensorInfo inputDesc({1, 1, 3, 3}, armnn::GetDataType<T>());
264     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
265         QuantizedVector<T>(qScale, qOffset, {
266             11,21,31,
267             12,22,32,
268             13,23,33
269         })));
270
271     // Use 1 batch of a 1-channel 2x2 kernel
272     armnn::TensorInfo kernelDesc({1, 1, 2, 2}, armnn::GetDataType<T>());
273     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
274         QuantizedVector<T>(qScale, qOffset, {
275             -11,-21,
276             -12,-22,
277         })));
278
279 // Expected output is 1 batch of a 1-channel 6x8 image
280 // Manually calculated like this:
281 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
282 //[-11*0 -21*0  -12*0 -22*11 ; -11*0  -21*0  -12*11 -22*21 ; -11*0  -21*0  -12*21 -22*31 ; -11*0  -21*0 -12*31 -22*0 ..]
283 //[-11*0 -21*11 -12*0 -22*12 ; -11*11 -21*21 -12*12 -22*22 ; -11*21 -21*31 -12*22 -22*32 ; -11*31 -21*0 -12*32 -22*0 ..]
284 //[-11*0 -21*12 -12*0 -22*13 ; -11*12 -21*22 -12*13 -22*23 ; -11*22 -21*32 -12*23 -22*33 ; -11*32 -21*0 -12*33 -22*0 ..]
285 //[-11*0 -21*13 -12*0 -22*0  ; -11*13 -21*23 -12*0  -22*0  ; -11*23 -21*33 -12*0  -22*0  ; -11*33 -21*0 -12*0  -22*0 ..]
286 //[-11*0 -21*0  -12*0 -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0  -12*0  -22*0  ; -11*0  -21*0 -12*0  -22*0 ..]
287 //[..... .....  ..... .....  ; .....  .....  .....  .....  ; .....  .....  .....  .....  ; .....  ..... .....  ..... ..]
288     armnn::TensorInfo outputDesc({1, 1, 8, 6}, armnn::GetDataType<T>());
289     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
290         QuantizedVector<T>(qScale, qOffset, {
291                0,    0,      0,    0,    0,    0,
292             -242,  -594,  -934, -372,    0,    0,
293             -495, -1190, -1850, -725,    0,    0,
294             -538, -1256, -1916, -748,    0,    0,
295             -273, -626,  -946,  -363,    0,    0,
296                0,    0,     0,     0,    0,    0,
297                0,    0,     0,     0,    0,    0,
298                0,    0,     0,     0,    0,    0
299         })));
300
301     return SimpleConvolution2dTestImpl<T>(workloadFactory,
302       input,
303       kernel,
304       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset),
305       expectedOutput,
306       qScale,
307       qOffset,
308       1,  // padding left
309       2,  // padding top
310       3,  // padding right
311       4); // padding bottom
312 }
313
314 template<typename T>
315 LayerTestResult<T, 4> SimpleConvolution2dAsymmetricPaddingTestCommon(armnn::IWorkloadFactory& workloadFactory,
316     float                    qScale,
317     int32_t                  qOffset)
318 {
319     // Use a single-batch 1-channel 5x5 image as input
320     armnn::TensorInfo inputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>());
321     boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputDesc, std::vector<T>(
322         QuantizedVector<T>(qScale, qOffset, {
323             11,21,31,41,51,
324             12,22,32,42,52,
325             13,23,33,43,53,
326             14,24,34,44,54,
327             15,25,35,45,55,
328         })));
329
330     // Use 1 batch of a 1-channel 4x4 kernel
331     armnn::TensorInfo kernelDesc({ 1, 1, 4, 4 }, armnn::GetDataType<T>());
332     boost::multi_array<T, 4> kernel = MakeTensor<T, 4>(kernelDesc, std::vector<T>(
333         QuantizedVector<T>(qScale, qOffset, {
334             -11,-21,-31,-41,
335             -12,-22,-32,-42,
336             -13,-23,-33,-43,
337             -14,-24,-34,-44,
338         })));
339
340     // Expected output is 1 batch of a 1-channel 5x5 image
341     armnn::TensorInfo outputDesc({ 1, 1, 5, 5 }, armnn::GetDataType<T>());
342     std::vector<T> myVec(outputDesc.GetNumElements(), 0);
343     boost::multi_array<T, 4> expectedOutput = MakeTensor<T, 4>(outputDesc, std::vector<T>(
344         QuantizedVector<T>(qScale, qOffset, {
345             -4723,  -7044,  -9324,  -6253, -3542,
346             -7140, -10580, -13940,  -9300, -5230,
347             -9590, -14120, -18520, -12290, -6860,
348             -9980, -14560, -18960, -12560, -7000,
349             -7518, -10904, -14144,  -9318, -5152,
350         })));
351
352     return SimpleConvolution2dTestImpl<T>(workloadFactory,
353         input,
354         kernel,
355         GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(false, qScale, qOffset),
356         expectedOutput,
357         qScale,
358         qOffset,
359         1,  // padding left
360         2,  // padding top
361         2,  // padding right
362         1); // padding bottom
363 }
364
365 LayerTestResult<float, 4>
366 Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTest(armnn::IWorkloadFactory& workloadFactory)
367 {
368     return Convolution2dAsymmetricPaddingLargerThanHalfKernelSizeTestCommon<float>(workloadFactory, 0.0f, 0);
369 }
370
371 LayerTestResult<float, 4> Convolution2dAsymmetricPaddingTest(armnn::IWorkloadFactory& workloadFactory)
372 {
373     return SimpleConvolution2dAsymmetricPaddingTestCommon<float>(workloadFactory, 0.0f, 0);
374 }
375
376 LayerTestResult<float, 4> DepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
377                                                      bool                     biasEnabled)
378 {
379     return DepthwiseConvolution2dTestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled);
380 }
381
382 LayerTestResult<float, 4> DepthwiseConvolution2dDepthMul1Test(armnn::IWorkloadFactory& workloadFactory,
383                                                               bool biasEnabled)
384 {
385     return DepthwiseConvolution2dDepthMul1TestImpl<float, float>(workloadFactory, 0.0f, 0, biasEnabled);
386 }
387
388 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
389                                                             bool                     biasEnabled)
390 {
391     return DepthwiseConvolution2dTestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled);
392 }
393
394 LayerTestResult<uint8_t, 4> DepthwiseConvolution2dDepthMul1Uint8Test(armnn::IWorkloadFactory& workloadFactory,
395                                                                      bool biasEnabled)
396 {
397     return DepthwiseConvolution2dDepthMul1TestImpl<uint8_t, int32_t>(workloadFactory, 0.5f, 50, biasEnabled);
398 }
399
400 LayerTestResult<float, 4> Convolution1dTest(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled)
401 {
402     return Convolution1dTestImpl<float>(workloadFactory, 0.0f, 0, biasEnabled);
403 }
404
405 LayerTestResult<uint8_t, 4> Convolution1dUint8Test(armnn::IWorkloadFactory& workloadFactory, bool biasEnabled)
406 {
407     return Convolution1dTestImpl<uint8_t>(workloadFactory, 0.1f, 128, biasEnabled);
408 }
409
410 LayerTestResult<float,4> CompareConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
411                                                 armnn::IWorkloadFactory& refWorkloadFactory)
412 {
413     return CompareConvolution2dTestImpl<float>(workloadFactory, refWorkloadFactory);
414 }
415
416 template<typename T>
417 LayerTestResult<T,4> CompareDepthwiseConvolution2dTest(armnn::IWorkloadFactory& workloadFactory,
418     armnn::IWorkloadFactory& refWorkloadFactory)
419 {
420     return CompareDepthwiseConvolution2dTestImpl<T>(workloadFactory, refWorkloadFactory);
421 }
422
423 template LayerTestResult<float, 4> CompareDepthwiseConvolution2dTest<float>(
424     armnn::IWorkloadFactory&, armnn::IWorkloadFactory&);
425 template LayerTestResult<uint8_t, 4> CompareDepthwiseConvolution2dTest<uint8_t>(
426     armnn::IWorkloadFactory&, armnn::IWorkloadFactory&);
427
428 LayerTestResult<float,4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory)
429 {
430     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
431     auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
432     return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod);
433 }
434
435 LayerTestResult<float,4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory)
436 {
437     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
438     auto normChannel = armnn::NormalizationAlgorithmChannel::Within;
439     return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod);
440 }
441
442 LayerTestResult<float,2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta)
443 {
444     return SimpleSoftmaxTestImpl<float>(workloadFactory, beta);
445 }
446
447 LayerTestResult<uint8_t,2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta)
448 {
449     return SimpleSoftmaxTestImpl<uint8_t>(workloadFactory, beta);
450 }
451
452 LayerTestResult<float,4> CompareNormalizationTest(armnn::IWorkloadFactory& workloadFactory,
453                                                   armnn::IWorkloadFactory& refWorkloadFactory,
454                                                   armnn::NormalizationAlgorithmChannel normChannel,
455                                                   armnn::NormalizationAlgorithmMethod normMethod)
456 {
457     return CompareNormalizationTestImpl(workloadFactory, refWorkloadFactory, normChannel, normMethod);
458 }
459
460 LayerTestResult<float,2> CompareSoftmaxTest(armnn::IWorkloadFactory& workloadFactory,
461     armnn::IWorkloadFactory& refWorkloadFactory,
462     float beta)
463 {
464     return CompareSoftmaxTestImpl<float>(workloadFactory, refWorkloadFactory, beta);
465 }
466
467 LayerTestResult<uint8_t,2> CompareSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory,
468     armnn::IWorkloadFactory& refWorkloadFactory,
469     float beta)
470 {
471     return CompareSoftmaxTestImpl<uint8_t>(workloadFactory, refWorkloadFactory, beta);
472 }
473
474 std::vector<LayerTestResult<float,3>> SplitterTest(armnn::IWorkloadFactory& workloadFactory)
475 {
476     return SplitterTestCommon<float>(workloadFactory);
477 }
478
479 std::vector<LayerTestResult<uint8_t,3>> SplitterUint8Test(armnn::IWorkloadFactory& workloadFactory)
480 {
481     return SplitterTestCommon<uint8_t>(workloadFactory, 1.0f, 0);
482 }
483
484 LayerTestResult<float, 3> CopyViaSplitterTest(armnn::IWorkloadFactory& workloadFactory)
485 {
486     return CopyViaSplitterTestImpl<float>(workloadFactory, 0.0f, 0);
487 }
488
489 LayerTestResult<uint8_t, 3> CopyViaSplitterUint8Test(armnn::IWorkloadFactory& workloadFactory)
490 {
491     return CopyViaSplitterTestImpl<uint8_t>(workloadFactory, 1.0f, 0);
492 }
493
494 LayerTestResult<float,3> MergerTest(armnn::IWorkloadFactory& workloadFactory)
495 {
496     unsigned int outputWidth = 5;
497     unsigned int outputHeight = 6;
498     unsigned int outputChannels = 3;
499
500     unsigned int inputWidth1 = 2;
501     unsigned int inputHeight1 = 2;
502     unsigned int inputChannels1 = 3;
503
504     unsigned int inputWidth2 = 2;
505     unsigned int inputHeight2 = 4;
506     unsigned int inputChannels2 = 3;
507
508     unsigned int inputWidth3 = 3;
509     unsigned int inputHeight3 = 6;
510     unsigned int inputChannels3 = 2;
511
512     unsigned int inputWidth4 = 3;
513     unsigned int inputHeight4 = 6;
514     unsigned int inputChannels4 = 1;
515
516     // Define the tensor descriptors
517     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::Float32);
518     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::Float32);
519     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::Float32);
520     armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::Float32);
521     armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::Float32);
522
523     LayerTestResult<float,3> ret(outputTensorInfo);
524
525
526     ret.outputExpected = MakeTensor<float, 3>(outputTensorInfo, std::vector<float>(
527         {
528             1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
529             6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
530             11.0f, 12.0f, 13.0f, 14.0f, 15.0f,
531             16.0f, 17.0f, 18.0f, 19.0f, 20.0f,
532             21.0f, 22.0f, 23.0f, 24.0f, 25.0f,
533             26.0f, 27.0f, 28.0f, 29.0f, 30.0f,
534
535             31.0f, 32.0f, 33.0f, 34.0f, 35.0f,
536             36.0f, 37.0f, 38.0f, 39.0f, 40.0f,
537             41.0f, 42.0f, 43.0f, 44.0f, 45.0f,
538             46.0f, 47.0f, 48.0f, 49.0f, 50.0f,
539             51.0f, 52.0f, 53.0f, 54.0f, 55.0f,
540             56.0f, 57.0f, 58.0f, 59.0f, 60.0f,
541
542             61.0f, 62.0f, 63.0f, 64.0f, 65.0f,
543             66.0f, 67.0f, 68.0f, 69.0f, 70.0f,
544             71.0f, 72.0f, 73.0f, 74.0f, 75.0f,
545             76.0f, 77.0f, 78.0f, 79.0f, 80.0f,
546             81.0f, 82.0f, 83.0f, 84.0f, 85.0f,
547             86.0f, 87.0f, 88.0f, 89.0f, 90.0f,
548
549         })
550     );
551
552
553     auto input1 = MakeTensor<float, 3>(inputTensorInfo1, std::vector<float>(
554         {
555             1.0f, 2.0f,
556             6.0f, 7.0f,
557
558             31.0f, 32.0f,
559             36.0f, 37.0f,
560
561             61.0f, 62.0f,
562             66.0f, 67.0f,
563         })
564     );
565
566     auto input2 = MakeTensor<float, 3>(inputTensorInfo2, std::vector<float>(
567         {
568             11.0f, 12.0f,
569             16.0f, 17.0f,
570             21.0f, 22.0f,
571             26.0f, 27.0f,
572
573             41.0f, 42.0f,
574             46.0f, 47.0f,
575             51.0f, 52.0f,
576             56.0f, 57.0f,
577
578             71.0f, 72.0f,
579             76.0f, 77.0f,
580             81.0f, 82.0f,
581             86.0f, 87.0f,
582         })
583     );
584
585     auto input3 = MakeTensor<float, 3>(inputTensorInfo3, std::vector<float>(
586         {
587             3.0f, 4.0f, 5.0f,
588             8.0f, 9.0f, 10.0f,
589             13.0f, 14.0f, 15.0f,
590             18.0f, 19.0f, 20.0f,
591             23.0f, 24.0f, 25.0f,
592             28.0f, 29.0f, 30.0f,
593
594             33.0f, 34.0f, 35.0f,
595             38.0f, 39.0f, 40.0f,
596             43.0f, 44.0f, 45.0f,
597             48.0f, 49.0f, 50.0f,
598             53.0f, 54.0f, 55.0f,
599             58.0f, 59.0f, 60.0f,
600         })
601     );
602
603
604     auto input4 = MakeTensor<float, 3>(inputTensorInfo4, std::vector<float>(
605         {
606             63.0f, 64.0f, 65.0f,
607             68.0f, 69.0f, 70.0f,
608             73.0f, 74.0f, 75.0f,
609             78.0f, 79.0f, 80.0f,
610             83.0f, 84.0f, 85.0f,
611             88.0f, 89.0f, 90.0f,
612         })
613     );
614
615     std::vector<unsigned int> wOrigin1 = {0, 0, 0}; //extent of the window is defined by size of input[0]
616     armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1);
617
618     std::vector<unsigned int> wOrigin2 = {0, 2, 0}; //extent of the window is defined by size of input[1]
619     armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2);
620
621     std::vector<unsigned int> wOrigin3 = {0, 0, 2}; //extent of the window is defined by size of input[2]
622     armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3);
623
624     std::vector<unsigned int> wOrigin4 = {2, 0, 2}; //extent of the window is defined by size of input[3]
625     armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4);
626
627
628     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
629
630     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
631
632     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
633         subTensorsSupported ?
634             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
635             workloadFactory.CreateTensorHandle(inputTensorInfo1);
636
637     std::unique_ptr<armnn::ITensorHandle> inputHandle2  =
638         subTensorsSupported ?
639             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
640             workloadFactory.CreateTensorHandle(inputTensorInfo2);
641
642     std::unique_ptr<armnn::ITensorHandle> inputHandle3  =
643         subTensorsSupported ?
644             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) :
645             workloadFactory.CreateTensorHandle(inputTensorInfo3);
646
647     std::unique_ptr<armnn::ITensorHandle> inputHandle4  =
648         subTensorsSupported ?
649             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) :
650             workloadFactory.CreateTensorHandle(inputTensorInfo4);
651
652
653     armnn::MergerQueueDescriptor data;
654     armnn::WorkloadInfo info;
655     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
656     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
657     AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get());
658     AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get());
659     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
660
661     data.m_ViewOrigins.push_back(window1);
662     data.m_ViewOrigins.push_back(window2);
663     data.m_ViewOrigins.push_back(window3);
664     data.m_ViewOrigins.push_back(window4);
665
666     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info);
667
668     inputHandle1->Allocate();
669     inputHandle2->Allocate();
670     inputHandle3->Allocate();
671     inputHandle4->Allocate();
672     outputHandle->Allocate();
673
674     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
675     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
676     CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]);
677     CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]);
678
679     workload->Execute();
680
681     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
682
683     return ret;
684 }
685
686 LayerTestResult<float,4> AdditionTest(armnn::IWorkloadFactory& workloadFactory)
687 {
688     unsigned int batchSize = 2;
689     unsigned int channels  = 2;
690     unsigned int height    = 2;
691     unsigned int width     = 3;
692
693     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
694     armnn::TensorInfo outputTensorInfo;
695
696     unsigned int shape[] = {batchSize, channels, height, width};
697
698     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
699     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
700     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
701
702
703     auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>(
704         {
705             0.0f, 2.0f, 1.0f,
706             0.2f, 1.0f, 2.0f,
707
708             1.0f, 2.0f, 1.0f,
709             0.2f, 1.0f, 2.0f,
710
711             0.0f, 2.0f, 1.0f,
712             4.2f, 1.0f, 2.0f,
713
714             0.0f, 0.0f, 1.0f,
715             0.2f, 1.0f, 2.0f,
716         }));
717
718     auto input2 = MakeTensor<float, 4>(inputTensorInfo2, std::vector<float>(
719         {
720             1.0f, 2.0f, 1.0f,
721             0.0f, 1.0f, 2.0f,
722
723             1.0f, 2.0f, -2.0f,
724             0.2f, 1.0f, 2.0f,
725
726             0.0f, 2.0f, 1.0f,
727             4.2f, 0.0f, -3.0f,
728
729             0.0f, 0.0f, 1.0f,
730             0.7f, 1.0f, 5.0f,
731         }));
732
733     LayerTestResult<float,4> ret(outputTensorInfo);
734     ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>(
735         {
736             1.0f, 4.0f, 2.0f,
737             0.2f, 2.0f, 4.0f,
738
739             2.0f, 4.0f, -1.0f,
740             0.4f, 2.0f, 4.0f,
741
742             0.0f, 4.0f, 2.0f,
743             8.4f, 1.0f, -1.0f,
744
745             0.0f, 0.0f, 2.0f,
746             0.9f, 2.0f, 7.0f,
747         }));
748
749     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
750     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
751     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
752
753     armnn::AdditionQueueDescriptor data;
754     armnn::WorkloadInfo info;
755     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
756     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
757     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
758
759     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
760
761     inputHandle1->Allocate();
762     inputHandle2->Allocate();
763     outputHandle->Allocate();
764
765     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
766     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
767
768     workload->Execute();
769
770     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
771
772     return ret;
773 }
774
775 template <typename T>
776 LayerTestResult<T, 4> AdditionBroadcastTestImpl(armnn::IWorkloadFactory& workloadFactory,
777     float qScale,
778     int32_t qOffset)
779 {
780     armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 1}, armnn::GetDataType<T>());
781     armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 2, 3}, armnn::GetDataType<T>());
782     armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
783
784     if (armnn::IsQuantizedType<T>())
785     {
786         inputTensorInfo1.SetQuantizationScale(qScale);
787         inputTensorInfo1.SetQuantizationOffset(qOffset);
788         inputTensorInfo2.SetQuantizationScale(qScale);
789         inputTensorInfo2.SetQuantizationOffset(qOffset);
790         outputTensorInfo.SetQuantizationScale(qScale);
791         outputTensorInfo.SetQuantizationOffset(qOffset);
792     }
793
794     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset,
795         {
796             0.0f,
797             1.0f,
798
799             2.0f,
800             3.0f,
801
802             4.0f,
803             5.0f,
804         }));
805
806     auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset,
807         {
808             0.5f, 1.5f, 2.5f,
809             3.5f, 4.5f, 5.5f,
810         }));
811
812     LayerTestResult<T,4> ret(outputTensorInfo);
813     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset,
814         {
815             0.5f, 1.5f, 2.5f,
816             4.5f, 5.5f, 6.5f,
817
818             2.5f, 3.5f, 4.5f,
819             6.5f, 7.5f, 8.5f,
820
821             4.5f, 5.5f, 6.5f,
822             8.5f, 9.5f, 10.5f,
823         }));
824
825     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
826     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
827     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
828
829     armnn::AdditionQueueDescriptor data;
830     armnn::WorkloadInfo info;
831     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
832     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
833     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
834
835     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
836
837     inputHandle1->Allocate();
838     inputHandle2->Allocate();
839     outputHandle->Allocate();
840
841     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
842     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
843
844     workload->Execute();
845
846     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
847
848     return ret;
849 }
850
851 template <typename T>
852 LayerTestResult<T, 4> AdditionBroadcast1ElementTestImpl(armnn::IWorkloadFactory& workloadFactory,
853     float qScale,
854     int32_t qOffset)
855 {
856     armnn::TensorInfo inputTensorInfo1 = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
857     armnn::TensorInfo inputTensorInfo2 = armnn::TensorInfo({1, 1, 1, 1}, armnn::GetDataType<T>());
858     armnn::TensorInfo outputTensorInfo = armnn::TensorInfo({1, 3, 2, 3}, armnn::GetDataType<T>());
859
860     if (armnn::IsQuantizedType<T>())
861     {
862         inputTensorInfo1.SetQuantizationScale(qScale);
863         inputTensorInfo1.SetQuantizationOffset(qOffset);
864         inputTensorInfo2.SetQuantizationScale(qScale);
865         inputTensorInfo2.SetQuantizationOffset(qOffset);
866         outputTensorInfo.SetQuantizationScale(qScale);
867         outputTensorInfo.SetQuantizationOffset(qOffset);
868     }
869
870     auto input1 = MakeTensor<T, 4>(inputTensorInfo1, QuantizedVector<T>(qScale, qOffset,
871         {
872              0.0f,  1.0f,  2.0f,
873              3.0f,  4.0f,  5.0f,
874              6.0f,  7.0f,  8.0f,
875              9.0f, 10.0f, 11.0f,
876             12.0f, 13.0f, 14.0f,
877             15.0f, 16.0f, 17.0f,
878         }));
879
880     auto input2 = MakeTensor<T, 4>(inputTensorInfo2, QuantizedVector<T>(qScale, qOffset,
881         {
882             0.5f,
883         }));
884
885     LayerTestResult<T,4> ret(outputTensorInfo);
886     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset,
887         {
888              0.5f,  1.5f,  2.5f,
889              3.5f,  4.5f,  5.5f,
890              6.5f,  7.5f,  8.5f,
891              9.5f, 10.5f, 11.5f,
892             12.5f, 13.5f, 14.5f,
893             15.5f, 16.5f, 17.5f,
894         }));
895
896     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
897     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
898     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
899
900     armnn::AdditionQueueDescriptor data;
901     armnn::WorkloadInfo info;
902     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
903     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
904     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
905
906     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
907
908     inputHandle1->Allocate();
909     inputHandle2->Allocate();
910     outputHandle->Allocate();
911
912     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
913     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
914
915     workload->Execute();
916
917     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
918
919     return ret;
920 }
921
922 LayerTestResult<float, 4> AdditionBroadcastTest(armnn::IWorkloadFactory& workloadFactory)
923 {
924     return AdditionBroadcastTestImpl<float>(workloadFactory, 0.0f, 0);
925 }
926
927 LayerTestResult<uint8_t, 4> AdditionBroadcastUint8Test(armnn::IWorkloadFactory& workloadFactory)
928 {
929     return AdditionBroadcastTestImpl<uint8_t>(workloadFactory, 2.f, 0);
930 }
931
932 LayerTestResult<float, 4> AdditionBroadcast1ElementTest(armnn::IWorkloadFactory& workloadFactory)
933 {
934     return AdditionBroadcast1ElementTestImpl<float>(workloadFactory, 0.0f, 0);
935 }
936
937 LayerTestResult<uint8_t, 4> AdditionBroadcast1ElementUint8Test(armnn::IWorkloadFactory& workloadFactory)
938 {
939     return AdditionBroadcast1ElementTestImpl<uint8_t>(workloadFactory, 0.1333333f, 128);
940 }
941
942 LayerTestResult<float,4> CompareAdditionTest(armnn::IWorkloadFactory& workloadFactory,
943                                     armnn::IWorkloadFactory& refWorkloadFactory)
944 {
945     unsigned int batchSize = 4;
946     unsigned int channels  = 1;
947     unsigned int height    = 2;
948     unsigned int width     = 3;
949
950     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
951     armnn::TensorInfo outputTensorInfo;
952
953     unsigned int shape[] = {batchSize, channels, height, width};
954
955     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
956     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
957     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
958
959     auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 1232);
960     auto input2 = MakeRandomTensor<float, 4>(inputTensorInfo2, 456);
961
962     LayerTestResult<float,4> ret(outputTensorInfo);
963
964     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
965     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
966     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
967
968     std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1);
969     std::unique_ptr<armnn::ITensorHandle> inputHandle2Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo2);
970     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
971
972     armnn::AdditionQueueDescriptor data;
973     armnn::WorkloadInfo info;
974     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
975     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
976     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
977
978     armnn::AdditionQueueDescriptor refData = data;
979     armnn::WorkloadInfo refInfo = info;
980     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo1, inputHandle1Ref.get());
981     SetWorkloadInput(refData, refInfo, 1, inputTensorInfo2, inputHandle2Ref.get());
982     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
983
984     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
985     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateAddition(refData, refInfo);
986
987     inputHandle1->Allocate();
988     inputHandle2->Allocate();
989     outputHandle->Allocate();
990     inputHandle1Ref->Allocate();
991     inputHandle2Ref->Allocate();
992     outputHandleRef->Allocate();
993
994     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
995     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
996     CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
997     CopyDataToITensorHandle(inputHandle2Ref.get(), &input2[0][0][0][0]);
998
999     workload->Execute();
1000     workloadRef->Execute();
1001
1002     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1003     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1004
1005     return ret;
1006 }
1007
1008 LayerTestResult<float,4> MultiplicationTest(armnn::IWorkloadFactory& workloadFactory)
1009 {
1010     const unsigned int width = 2;
1011     const unsigned int height = 2;
1012     const unsigned int channelCount = 2;
1013     const unsigned int batchSize = 2;
1014
1015     armnn::TensorInfo inputTensorInfo0;
1016     armnn::TensorInfo inputTensorInfo1;
1017     armnn::TensorInfo outputTensorInfo;
1018
1019     constexpr unsigned int shape[] = { batchSize, channelCount, height, width };
1020     constexpr std::size_t dimensionCount = std::extent<decltype(shape)>::value;
1021
1022     inputTensorInfo0 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
1023     inputTensorInfo1 = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
1024     outputTensorInfo = armnn::TensorInfo(dimensionCount, shape, armnn::DataType::Float32);
1025
1026     auto input0 = MakeTensor<float, 4>(inputTensorInfo0, std::vector<float>({
1027         1,  1,  1,  1,    2,  2,  2,  2,
1028         3,  3,  3,  3,    4,  4,  4,  4 }));
1029
1030     auto input1 = MakeTensor<float, 4>(inputTensorInfo1, std::vector<float>({
1031         2,  2,  2,  2,    3,  3,  3,  3,
1032         4,  4,  4,  4,    5,  5,  5,  5 }));
1033
1034     LayerTestResult<float,4> ret(outputTensorInfo);
1035
1036     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
1037     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1038     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1039
1040     armnn::MultiplicationQueueDescriptor data;
1041     armnn::WorkloadInfo info;
1042     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
1043     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1044     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1045
1046     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
1047
1048     inputHandle0->Allocate();
1049     inputHandle1->Allocate();
1050     outputHandle->Allocate();
1051
1052     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
1053     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1054
1055     workload->Execute();
1056
1057     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1058
1059     ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
1060         2,  2,  2,  2,    6,  6,  6,  6,
1061         12, 12, 12, 12,  20, 20, 20, 20 }));
1062
1063     return ret;
1064 }
1065
1066 LayerTestResult<float,4> CompareMultiplicationTest(armnn::IWorkloadFactory& workloadFactory,
1067                                           armnn::IWorkloadFactory& refWorkloadFactory)
1068 {
1069     const unsigned int width = 16;
1070     const unsigned int height = 32;
1071     const unsigned int channelCount = 2;
1072     const unsigned int batchSize = 5;
1073
1074     armnn::TensorInfo inputTensorInfo0;
1075     armnn::TensorInfo inputTensorInfo1;
1076     armnn::TensorInfo outputTensorInfo;
1077
1078     constexpr unsigned int shape[] = { batchSize, channelCount, height, width };
1079
1080     inputTensorInfo0 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1081     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1082     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1083
1084     LayerTestResult<float,4> comparisonResult(outputTensorInfo);
1085
1086     auto input0 = MakeRandomTensor<float, 4>(inputTensorInfo0, 803506992);
1087     auto input1 = MakeRandomTensor<float, 4>(inputTensorInfo1, 54902257);
1088
1089     std::unique_ptr<armnn::ITensorHandle> inputHandle0 = workloadFactory.CreateTensorHandle(inputTensorInfo0);
1090     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
1091     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1092
1093     std::unique_ptr<armnn::ITensorHandle> inputHandle0Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo0);
1094     std::unique_ptr<armnn::ITensorHandle> inputHandle1Ref = refWorkloadFactory.CreateTensorHandle(inputTensorInfo1);
1095     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1096
1097     armnn::MultiplicationQueueDescriptor data;
1098     armnn::WorkloadInfo info;
1099     AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
1100     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
1101     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1102
1103     armnn::MultiplicationQueueDescriptor refData = data;
1104     armnn::WorkloadInfo refInfo = info;
1105     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo0, inputHandle0Ref.get());
1106     SetWorkloadInput(refData, refInfo, 1, inputTensorInfo1, inputHandle1Ref.get());
1107     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1108
1109     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
1110     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateMultiplication(refData, refInfo);
1111
1112     inputHandle0->Allocate();
1113     inputHandle1->Allocate();
1114     outputHandle->Allocate();
1115     inputHandle0Ref->Allocate();
1116     inputHandle1Ref->Allocate();
1117     outputHandleRef->Allocate();
1118
1119     CopyDataToITensorHandle(inputHandle0.get(), &input0[0][0][0][0]);
1120     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
1121     CopyDataToITensorHandle(inputHandle0Ref.get(), &input0[0][0][0][0]);
1122     CopyDataToITensorHandle(inputHandle1Ref.get(), &input1[0][0][0][0]);
1123
1124     workload->Execute();
1125     workloadRef->Execute();
1126
1127     CopyDataFromITensorHandle(&comparisonResult.output[0][0][0][0], outputHandle.get());
1128     CopyDataFromITensorHandle(&comparisonResult.outputExpected[0][0][0][0], outputHandleRef.get());
1129
1130     return comparisonResult;
1131 }
1132
1133 LayerTestResult<float,4> CompareBatchNormTest(armnn::IWorkloadFactory& workloadFactory,
1134                                      armnn::IWorkloadFactory& refWorkloadFactory)
1135 {
1136     const unsigned int width     = 2;
1137     const unsigned int height    = 3;
1138     const unsigned int channels  = 5;
1139     const unsigned int batchSize = 3;
1140
1141     armnn::TensorInfo inputTensorInfo;
1142     armnn::TensorInfo outputTensorInfo;
1143     armnn::TensorInfo tensorInfo;
1144
1145     constexpr unsigned int shape[]       = {batchSize, channels, height, width};
1146     constexpr unsigned int tensorShape[] = {channels};
1147
1148     inputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1149     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::Float32);
1150     tensorInfo = armnn::TensorInfo(1, tensorShape, armnn::DataType::Float32);
1151
1152     auto input = MakeRandomTensor<float, 4>(inputTensorInfo, 21312);
1153
1154     auto mean     = MakeRandomTensor<float, 1>(tensorInfo, 123);
1155     auto variance = MakeRandomTensor<float, 1>(tensorInfo, 234, 0.0f);
1156     auto beta     = MakeRandomTensor<float, 1>(tensorInfo, 123);
1157     auto gamma    = MakeRandomTensor<float, 1>(tensorInfo, 345);
1158
1159     LayerTestResult<float,4> ret(outputTensorInfo);
1160
1161     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputTensorInfo);
1162     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1163
1164     std::unique_ptr<armnn::ITensorHandle> inputHandleRef  = refWorkloadFactory.CreateTensorHandle(inputTensorInfo);
1165     std::unique_ptr<armnn::ITensorHandle> outputHandleRef = refWorkloadFactory.CreateTensorHandle(outputTensorInfo);
1166
1167     armnn::BatchNormalizationQueueDescriptor data;
1168     armnn::WorkloadInfo info;
1169     armnn::ScopedCpuTensorHandle meanTensor(tensorInfo);
1170     armnn::ScopedCpuTensorHandle varianceTensor(tensorInfo);
1171     armnn::ScopedCpuTensorHandle betaTensor(tensorInfo);
1172     armnn::ScopedCpuTensorHandle gammaTensor(tensorInfo);
1173
1174     AllocateAndCopyDataToITensorHandle(&meanTensor, &mean[0]);
1175     AllocateAndCopyDataToITensorHandle(&varianceTensor, &variance[0]);
1176     AllocateAndCopyDataToITensorHandle(&betaTensor, &beta[0]);
1177     AllocateAndCopyDataToITensorHandle(&gammaTensor, &gamma[0]);
1178
1179     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1180     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1181     data.m_Mean             = &meanTensor;
1182     data.m_Variance         = &varianceTensor;
1183     data.m_Beta             = &betaTensor;
1184     data.m_Gamma            = &gammaTensor;
1185     data.m_Parameters.m_Eps = 0.01f;
1186
1187     armnn::BatchNormalizationQueueDescriptor refData = data;
1188     armnn::WorkloadInfo refInfo = info;
1189     SetWorkloadInput(refData, refInfo, 0, inputTensorInfo, inputHandleRef.get());
1190     SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, outputHandleRef.get());
1191
1192     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
1193     std::unique_ptr<armnn::IWorkload> workloadRef = refWorkloadFactory.CreateBatchNormalization(refData, refInfo);
1194
1195     inputHandle->Allocate();
1196     outputHandle->Allocate();
1197     inputHandleRef->Allocate();
1198     outputHandleRef->Allocate();
1199
1200     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
1201     CopyDataToITensorHandle(inputHandleRef.get(), &input[0][0][0][0]);
1202
1203     workload->Execute();
1204     workloadRef->Execute();
1205
1206     CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
1207     CopyDataFromITensorHandle(&ret.outputExpected[0][0][0][0], outputHandleRef.get());
1208
1209     return ret;
1210 }
1211
1212 void Concatenate(armnn::IWorkloadFactory& workloadFactory,
1213     std::initializer_list<const armnn::TensorInfo> inputTensorInfos,
1214     std::initializer_list<void*> inputs,
1215     const armnn::TensorInfo& outputTensorInfo,
1216     void* output,
1217     unsigned int concatDim)
1218 {
1219     armnn::MergerQueueDescriptor queueDescriptor;
1220
1221     std::vector<armnn::TensorShape> shapes;
1222     shapes.reserve(inputTensorInfos.size());
1223     for (const armnn::TensorInfo& it: inputTensorInfos)
1224     {
1225         shapes.push_back(it.GetShape());
1226     }
1227     armnn::OriginsDescriptor viewsDescriptor = armnn::CreateMergerDescriptorForConcatenation(shapes.begin(),
1228         shapes.end(), concatDim);
1229
1230     queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews());
1231     for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i)
1232     {
1233         queueDescriptor.m_ViewOrigins.emplace_back(std::vector<unsigned int>(viewsDescriptor.GetViewOrigin(i),
1234             viewsDescriptor.GetViewOrigin(i) + viewsDescriptor.GetNumDimensions()));
1235     }
1236
1237     const size_t inputCount = inputTensorInfos.size();
1238
1239     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
1240
1241     std::vector<std::unique_ptr<armnn::ITensorHandle>> inputHandles;
1242     inputHandles.reserve(inputCount);
1243
1244     const bool subTensorsSupported = workloadFactory.SupportsSubTensors();
1245     for (unsigned int i = 0; i < inputCount; ++i)
1246     {
1247         const armnn::TensorInfo& inputTensorInfo = inputTensorInfos.begin()[i];
1248
1249         std::unique_ptr<armnn::ITensorHandle> inputHandle = subTensorsSupported ?
1250             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo.GetShape(),
1251                 queueDescriptor.m_ViewOrigins[i].m_Origin.data())
1252             : workloadFactory.CreateTensorHandle(inputTensorInfo);
1253
1254         inputHandles.emplace_back(std::move(inputHandle));
1255     }
1256
1257     armnn::WorkloadInfo workloadInfo;
1258
1259     for (unsigned int i = 0; i < inputCount; ++i)
1260     {
1261         AddInputToWorkload(queueDescriptor, workloadInfo, inputTensorInfos.begin()[i], inputHandles[i].get());
1262     }
1263
1264     AddOutputToWorkload(queueDescriptor, workloadInfo, outputTensorInfo, outputHandle.get());
1265
1266     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(queueDescriptor, workloadInfo);
1267
1268     for (auto& inputHandle : inputHandles)
1269     {
1270         inputHandle->Allocate();
1271     }
1272
1273     outputHandle->Allocate();
1274
1275     unsigned int nextInputId = 0;
1276     for (auto& inputHandle : inputHandles)
1277     {
1278         CopyDataToITensorHandle(inputHandle.get(), *(inputs.begin() + nextInputId++));
1279     }
1280
1281     workload->Execute();
1282
1283     CopyDataFromITensorHandle(output, outputHandle.get());
1284 }
1285
1286 template <typename T>
1287 LayerTestResult<T, 1> Concatenation1dTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale, int32_t qOffset)
1288 {
1289     armnn::TensorInfo inputTensorInfo({ 3 }, armnn::GetDataType<T>());
1290
1291     auto input0 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 1.0f, 2.0f, 3.0f }));
1292     auto input1 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 4.0f, 5.0f, 6.0f }));
1293     auto input2 = MakeTensor<T, 1>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, { 7.0f, 8.0f, 9.0f }));
1294
1295     armnn::TensorInfo outputTensorInfo({ 9 }, armnn::GetDataType<T>());
1296
1297     LayerTestResult<T, 1> result(outputTensorInfo);
1298
1299     std::vector<T> output;
1300     output.resize(outputTensorInfo.GetNumElements());
1301     Concatenate(workloadFactory,
1302         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
1303         { input0.data(), input1.data(), input2.data() },
1304         outputTensorInfo,
1305         output.data(),
1306         0);
1307
1308     result.output = MakeTensor<T, 1>(outputTensorInfo, output);
1309     result.outputExpected = MakeTensor<T, 1>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1310         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f
1311     }));
1312
1313     return result;
1314 }
1315
1316 LayerTestResult<float, 1> Concatenation1dTest(armnn::IWorkloadFactory& workloadFactory)
1317 {
1318     return Concatenation1dTestImpl<float>(workloadFactory, 0.0f, 0);
1319 }
1320
1321 template <typename T>
1322 LayerTestResult<T, 2> Concatenation2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
1323     const armnn::TensorInfo& outputTensorInfo,
1324     unsigned int dimension,
1325     const float qScale,
1326     const int32_t qOffset)
1327 {
1328     armnn::TensorInfo inputTensorInfo({ 2, 3 }, armnn::GetDataType<T>());
1329
1330     auto input0 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1331         // Batch 0
1332         1.0f, 2.0f, 3.0f,
1333
1334         // Batch 1
1335         10.0f, 11.0f, 12.0f,
1336     }));
1337
1338     auto input1 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1339         // Batch 0
1340         4.0f, 5.0f, 6.0f,
1341
1342         // Batch 1
1343         13.0f, 14.0f, 15.0f,
1344     }));
1345
1346     auto input2 = MakeTensor<T, 2>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1347         // Batch 0
1348         7.0f, 8.0f, 9.0f,
1349
1350         // Batch 1
1351         16.0f, 17.0f, 18.0f,
1352     }));
1353
1354     LayerTestResult<T, 2> result(outputTensorInfo);
1355
1356     std::vector<T> output;
1357     output.resize(outputTensorInfo.GetNumElements());
1358     Concatenate(workloadFactory,
1359         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
1360         { input0.data(), input1.data(), input2.data() },
1361         outputTensorInfo,
1362         output.data(),
1363         dimension);
1364
1365     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
1366     return result;
1367 }
1368
1369 template <typename T>
1370 LayerTestResult<T, 2> Concatenation2dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory,
1371     float qScale, int32_t qOffset)
1372 {
1373     armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>());
1374
1375     LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 0, qScale, qOffset);
1376     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1377         // Batch 0
1378         1.0f, 2.0f, 3.0f,
1379
1380         // Batch 1
1381         10.0f, 11.0f, 12.0f,
1382
1383         // Batch 2
1384         4.0f, 5.0f, 6.0f,
1385
1386         // Batch 3
1387         13.0f, 14.0f, 15.0f,
1388
1389         // Batch 4
1390         7.0f, 8.0f, 9.0f,
1391
1392         // Batch 5
1393         16.0f, 17.0f, 18.0f,
1394     }));
1395
1396     return result;
1397 }
1398
1399 LayerTestResult<float, 2> Concatenation2dDim0Test(armnn::IWorkloadFactory& workloadFactory)
1400 {
1401     return Concatenation2dDim0TestImpl<float>(workloadFactory, 0.0f, 0);
1402 }
1403
1404 template <typename T>
1405 LayerTestResult<T, 2> Concatenation2dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory,
1406     float qScale, int32_t qOffset)
1407 {
1408     armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>());
1409
1410     LayerTestResult<T, 2> result = Concatenation2dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset);
1411     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1412         // Batch 0
1413         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
1414
1415         // Batch 1
1416         10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
1417     }));
1418
1419     return result;
1420 }
1421
1422 LayerTestResult<float, 2> Concatenation2dDim1Test(armnn::IWorkloadFactory& workloadFactory)
1423 {
1424     return Concatenation2dDim1TestImpl<float>(workloadFactory, 0.0f, 0);
1425 }
1426
1427 template <typename T>
1428 LayerTestResult<T, 2> Concatenation2dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
1429     int32_t qOffset)
1430 {
1431     armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>());
1432     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1433         // Batch 0
1434         1.0f, 2.0f, 3.0f,
1435
1436         // Batch 1
1437         10.0f, 11.0f, 12.0f,
1438     }));
1439
1440     armnn::TensorInfo input1TensorInfo({ 3, 3 }, armnn::GetDataType<T>());
1441     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1442         // Batch 0
1443         4.0f, 5.0f, 6.0f,
1444
1445         // Batch 1
1446         13.0f, 14.0f, 15.0f,
1447
1448         // Batch 0
1449         7.0f, 8.0f, 9.0f,
1450     }));
1451
1452     armnn::TensorInfo input2TensorInfo({ 1, 3 }, armnn::GetDataType<T>());
1453     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1454         // Batch 1
1455         16.0f, 17.0f, 18.0f,
1456     }));
1457
1458     armnn::TensorInfo outputTensorInfo({ 6, 3 }, armnn::GetDataType<T>());
1459     LayerTestResult<T, 2> result(outputTensorInfo);
1460
1461     std::vector<T> output;
1462     output.resize(outputTensorInfo.GetNumElements());
1463     Concatenate(workloadFactory,
1464         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1465         { input0.data(), input1.data(), input2.data() },
1466         outputTensorInfo,
1467         output.data(),
1468         0);
1469
1470     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
1471     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1472         // Batch 0
1473         1.0f, 2.0f, 3.0f,
1474
1475         // Batch 1
1476         10.0f, 11.0f, 12.0f,
1477
1478         // Batch 2
1479         4.0f, 5.0f, 6.0f,
1480
1481         // Batch 3
1482         13.0f, 14.0f, 15.0f,
1483
1484         // Batch 4
1485         7.0f, 8.0f, 9.0f,
1486
1487         // Batch 5
1488         16.0f, 17.0f, 18.0f,
1489     }));
1490
1491     return result;
1492 }
1493
1494 LayerTestResult<float, 2> Concatenation2dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
1495 {
1496     return Concatenation2dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
1497 }
1498
1499 template <typename T>
1500 LayerTestResult<T, 2> Concatenation2dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
1501     int32_t qOffset)
1502 {
1503     armnn::TensorInfo input0TensorInfo({ 2, 3 }, armnn::GetDataType<T>());
1504     auto input0 = MakeTensor<T, 2>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1505         // Batch 0
1506         1.0f, 2.0f, 3.0f,
1507
1508         // Batch 1
1509         10.0f, 11.0f, 12.0f,
1510     }));
1511
1512     armnn::TensorInfo input1TensorInfo({ 2, 5 }, armnn::GetDataType<T>());
1513     auto input1 = MakeTensor<T, 2>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1514         // Batch 0
1515         4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
1516
1517         // Batch 1
1518         13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
1519     }));
1520
1521     armnn::TensorInfo input2TensorInfo({ 2, 1 }, armnn::GetDataType<T>());
1522     auto input2 = MakeTensor<T, 2>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1523         // Batch 0
1524         9.0f,
1525
1526         // Batch 1
1527         18.0f
1528     }));
1529
1530     armnn::TensorInfo outputTensorInfo({ 2, 9 }, armnn::GetDataType<T>());
1531     LayerTestResult<T, 2> result(outputTensorInfo);
1532
1533     std::vector<T> output;
1534     output.resize(outputTensorInfo.GetNumElements());
1535     Concatenate(workloadFactory,
1536         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1537         { input0.data(), input1.data(), input2.data() },
1538         outputTensorInfo,
1539         output.data(),
1540         1);
1541
1542     result.output = MakeTensor<T, 2>(outputTensorInfo, output);
1543     result.outputExpected = MakeTensor<T, 2>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1544         // Batch 0
1545         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
1546
1547         // Batch 1
1548         10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
1549     }));
1550
1551     return result;
1552 }
1553
1554 LayerTestResult<float, 2> Concatenation2dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
1555 {
1556     return Concatenation2dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
1557 }
1558
1559 template <typename T>
1560 LayerTestResult<T, 3> Concatenation3dTestImpl(armnn::IWorkloadFactory& workloadFactory,
1561     const armnn::TensorInfo& outputTensorInfo,
1562     unsigned int dimension,
1563     float qScale,
1564     int32_t qOffset)
1565 {
1566     armnn::TensorInfo inputTensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
1567
1568     auto input0 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1569         // Batch 0, Channel 0
1570         1.0f, 2.0f,
1571
1572         // Batch 0, Channel 1
1573         3.0f, 4.0f,
1574
1575         // Batch 0, Channel 2
1576         5.0f, 6.0f,
1577
1578         // Batch 1, Channel 0
1579         19.0f, 20.0f,
1580
1581         // Batch 1, Channel 1
1582         21.0f, 22.0f,
1583
1584         // Batch 1, Channel 2
1585         23.0f, 24.0f
1586     }));
1587
1588     auto input1 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1589         // Batch 0, Channel 0
1590         7.0f, 8.0f,
1591
1592         // Batch 0, Channel 1
1593         9.0f, 10.0f,
1594
1595         // Batch 0, Channel 2
1596         11.0f, 12.0f,
1597
1598         // Batch 1, Channel 0
1599         25.0f, 26.0f,
1600
1601         // Batch 1, Channel 1
1602         27.0f, 28.0f,
1603
1604         // Batch 1, Channel 2
1605         29.0f, 30.0f
1606     }));
1607
1608     auto input2 = MakeTensor<T, 3>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1609         // Batch 0, Channel 0
1610         13.0f, 14.0f,
1611
1612         // Batch 0, Channel 1
1613         15.0f, 16.0f,
1614
1615         // Batch 0, Channel 2
1616         17.0f, 18.0f,
1617
1618         // Batch 1, Channel 0
1619         31.0f, 32.0f,
1620
1621         // Batch 1, Channel 1
1622         33.0f, 34.0f,
1623
1624         // Batch 1, Channel 2
1625         35.0f, 36.0f
1626     }));
1627
1628     LayerTestResult<T, 3> result(outputTensorInfo);
1629
1630     std::vector<T> output;
1631     output.resize(outputTensorInfo.GetNumElements());
1632     Concatenate(workloadFactory,
1633         { inputTensorInfo, inputTensorInfo, inputTensorInfo },
1634         { input0.data(), input1.data(), input2.data() },
1635         outputTensorInfo,
1636         output.data(),
1637         dimension);
1638
1639     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
1640     return result;
1641 }
1642
1643 template <typename T>
1644 LayerTestResult<T, 3> Concatenation3dDim0TestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
1645     int32_t qOffset)
1646 {
1647     armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>());
1648
1649     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 0,
1650         qScale, qOffset);
1651     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1652         // Batch 0, Channel 0
1653         1.0f, 2.0f,
1654
1655         // Batch 0, Channel 1
1656         3.0f, 4.0f,
1657
1658         // Batch 0, Channel 2
1659         5.0f, 6.0f,
1660
1661         // Batch 1, Channel 0
1662         19.0f, 20.0f,
1663
1664         // Batch 1, Channel 1
1665         21.0f, 22.0f,
1666
1667         // Batch 1, Channel 2
1668         23.0f, 24.0f,
1669
1670         // Batch 2, Channel 0
1671         7.0f, 8.0f,
1672
1673         // Batch 2, Channel 1
1674         9.0f, 10.0f,
1675
1676         // Batch 2, Channel 2
1677         11.0f, 12.0f,
1678
1679         // Batch 3, Channel 0
1680         25.0f, 26.0f,
1681
1682         // Batch 3, Channel 1
1683         27.0f, 28.0f,
1684
1685         // Batch 3, Channel 2
1686         29.0f, 30.0f,
1687
1688         // Batch 4, Channel 0
1689         13.0f, 14.0f,
1690
1691         // Batch 4, Channel 1
1692         15.0f, 16.0f,
1693
1694         // Batch 4, Channel 2
1695         17.0f, 18.0f,
1696
1697         // Batch 5, Channel 0
1698         31.0f, 32.0f,
1699
1700         // Batch 5, Channel 1
1701         33.0f, 34.0f,
1702
1703         // Batch 5, Channel 2
1704         35.0f, 36.0f
1705     }));
1706     return result;
1707 }
1708
1709 LayerTestResult<float, 3> Concatenation3dDim0Test(armnn::IWorkloadFactory& workloadFactory)
1710 {
1711     return Concatenation3dDim0TestImpl<float>(workloadFactory, 0.0f, 0);
1712 }
1713
1714 template <typename T>
1715 LayerTestResult<T, 3> Concatenation3dDim1TestImpl(armnn::IWorkloadFactory& workloadFactory,
1716     float qScale, int32_t qOffset)
1717 {
1718     armnn::TensorInfo outputTensorInfo({ 2, 9, 2 }, armnn::GetDataType<T>());
1719
1720     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 1, qScale, qOffset);
1721     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1722         // Batch 0, Channel 0
1723         1.0f, 2.0f,
1724
1725         // Batch 0, Channel 1
1726         3.0f, 4.0f,
1727
1728         // Batch 0, Channel 2
1729         5.0f, 6.0f,
1730
1731         // Batch 0, Channel 3
1732         7.0f, 8.0f,
1733
1734         // Batch 0, Channel 4
1735         9.0f, 10.0f,
1736
1737         // Batch 0, Channel 5
1738         11.0f, 12.0f,
1739
1740         // Batch 0, Channel 6
1741         13.0f, 14.0f,
1742
1743         // Batch 0, Channel 7
1744         15.0f, 16.0f,
1745
1746         // Batch 0, Channel 8
1747         17.0f, 18.0f,
1748
1749         // Batch 1, Channel 0
1750         19.0f, 20.0f,
1751
1752         // Batch 1, Channel 1
1753         21.0f, 22.0f,
1754
1755         // Batch 1, Channel 2
1756         23.0f, 24.0f,
1757
1758         // Batch 1, Channel 3
1759         25.0f, 26.0f,
1760
1761         // Batch 1, Channel 4
1762         27.0f, 28.0f,
1763
1764         // Batch 1, Channel 5
1765         29.0f, 30.0f,
1766
1767         // Batch 1, Channel 6
1768         31.0f, 32.0f,
1769
1770         // Batch 1, Channel 7
1771         33.0f, 34.0f,
1772
1773         // Batch 1, Channel 8
1774         35.0f, 36.0f
1775     }));
1776
1777     return result;
1778 }
1779
1780 LayerTestResult<float, 3> Concatenation3dDim1Test(armnn::IWorkloadFactory& workloadFactory)
1781 {
1782     return Concatenation3dDim1TestImpl<float>(workloadFactory, 0.0f, 0);
1783 }
1784
1785 template <typename T>
1786 LayerTestResult<T, 3> Concatenation3dDim2TestImpl(armnn::IWorkloadFactory& workloadFactory,
1787     float qScale, int32_t qOffset)
1788 {
1789     armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>());
1790
1791     LayerTestResult<T, 3> result = Concatenation3dTestImpl<T>(workloadFactory, outputTensorInfo, 2, qScale, qOffset);
1792     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1793         // Batch 0, Channel 0
1794         1.0f, 2.0f, 7.0f, 8.0f, 13.0f, 14.0f,
1795
1796         // Batch 0, Channel 1
1797         3.0f, 4.0f, 9.0f, 10.0f, 15.0f, 16.0f,
1798
1799         // Batch 0, Channel 2
1800         5.0f, 6.0f, 11.0f, 12.0f, 17.0f, 18.0f,
1801
1802         // Batch 1, Channel 0
1803         19.0f, 20.0f, 25.0f, 26.0f, 31.0f, 32.0f,
1804
1805         // Batch 1, Channel 1
1806         21.0f, 22.0f, 27.0f, 28.0f, 33.0f, 34.0f,
1807
1808         // Batch 1, Channel 2
1809         23.0f, 24.0f, 29.0f, 30.0f, 35.0f, 36.0f,
1810     }));
1811
1812     return result;
1813 }
1814
1815 LayerTestResult<float, 3> Concatenation3dDim2Test(armnn::IWorkloadFactory& workloadFactory)
1816 {
1817     return Concatenation3dDim2TestImpl<float>(workloadFactory, 0.0f, 0);
1818 }
1819
1820 template <typename T>
1821 LayerTestResult<T, 3> Concatenation3dDim0DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
1822     int32_t qOffset)
1823 {
1824     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
1825     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1826             // Batch 0, Channel 0
1827             1.0f, 2.0f,
1828
1829             // Batch 0, Channel 1
1830             3.0f, 4.0f,
1831
1832             // Batch 0, Channel 2
1833             5.0f, 6.0f,
1834
1835             // Batch 1, Channel 0
1836             19.0f, 20.0f,
1837
1838             // Batch 1, Channel 1
1839             21.0f, 22.0f,
1840
1841             // Batch 1, Channel 2
1842             23.0f, 24.0f
1843     }));
1844
1845     armnn::TensorInfo input1TensorInfo({ 1, 3, 2 }, armnn::GetDataType<T>());
1846     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1847             // Batch 0, Channel 0
1848             7.0f, 8.0f,
1849
1850             // Batch 0, Channel 1
1851             9.0f, 10.0f,
1852
1853             // Batch 0, Channel 2
1854             11.0f, 12.0f,
1855     }));
1856
1857     armnn::TensorInfo input2TensorInfo({ 3, 3, 2 }, armnn::GetDataType<T>());
1858     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1859             // Batch 0, Channel 0
1860             25.0f, 26.0f,
1861
1862             // Batch 0, Channel 1
1863             27.0f, 28.0f,
1864
1865             // Batch 0, Channel 2
1866             29.0f, 30.0f,
1867
1868             // Batch 1, Channel 0
1869             13.0f, 14.0f,
1870
1871             // Batch 1, Channel 1
1872             15.0f, 16.0f,
1873
1874             // Batch 1, Channel 2
1875             17.0f, 18.0f,
1876
1877             // Batch 2, Channel 0
1878             31.0f, 32.0f,
1879
1880             // Batch 2, Channel 1
1881             33.0f, 34.0f,
1882
1883             // Batch 2, Channel 2
1884             35.0f, 36.0f
1885     }));
1886
1887     armnn::TensorInfo outputTensorInfo({ 6, 3, 2 }, armnn::GetDataType<T>());
1888     LayerTestResult<T, 3> result(outputTensorInfo);
1889
1890     std::vector<T> output;
1891     output.resize(outputTensorInfo.GetNumElements());
1892     Concatenate(workloadFactory,
1893         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
1894         { input0.data(), input1.data(), input2.data() },
1895         outputTensorInfo,
1896         output.data(),
1897         0);
1898
1899     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
1900     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
1901         // Batch 0, Channel 0
1902         1.0f, 2.0f,
1903
1904         // Batch 0, Channel 1
1905         3.0f, 4.0f,
1906
1907         // Batch 0, Channel 2
1908         5.0f, 6.0f,
1909
1910         // Batch 1, Channel 0
1911         19.0f, 20.0f,
1912
1913         // Batch 1, Channel 1
1914         21.0f, 22.0f,
1915
1916         // Batch 1, Channel 2
1917         23.0f, 24.0f,
1918
1919         // Batch 2, Channel 0
1920         7.0f, 8.0f,
1921
1922         // Batch 2, Channel 1
1923         9.0f, 10.0f,
1924
1925         // Batch 2, Channel 2
1926         11.0f, 12.0f,
1927
1928         // Batch 3, Channel 0
1929         25.0f, 26.0f,
1930
1931         // Batch 3, Channel 1
1932         27.0f, 28.0f,
1933
1934         // Batch 3, Channel 2
1935         29.0f, 30.0f,
1936
1937         // Batch 4, Channel 0
1938         13.0f, 14.0f,
1939
1940         // Batch 4, Channel 1
1941         15.0f, 16.0f,
1942
1943         // Batch 4, Channel 2
1944         17.0f, 18.0f,
1945
1946         // Batch 5, Channel 0
1947         31.0f, 32.0f,
1948
1949         // Batch 5, Channel 1
1950         33.0f, 34.0f,
1951
1952         // Batch 5, Channel 2
1953         35.0f, 36.0f
1954     }));
1955
1956     return result;
1957 }
1958
1959 LayerTestResult<float, 3> Concatenation3dDim0DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
1960 {
1961     return Concatenation3dDim0DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
1962 }
1963
1964 template <typename T>
1965 LayerTestResult<T, 3> Concatenation3dDim1DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
1966     int32_t qOffset)
1967 {
1968     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
1969     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1970         // Batch 0, Channel 0
1971         1.0f, 2.0f,
1972
1973         // Batch 0, Channel 1
1974         3.0f, 4.0f,
1975
1976         // Batch 0, Channel 2
1977         5.0f, 6.0f,
1978
1979         // Batch 1, Channel 0
1980         19.0f, 20.0f,
1981
1982         // Batch 1, Channel 1
1983         21.0f, 22.0f,
1984
1985         // Batch 1, Channel 2
1986         23.0f, 24.0f
1987     }));
1988
1989     armnn::TensorInfo input1TensorInfo({ 2, 4, 2 }, armnn::GetDataType<T>());
1990     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
1991         // Batch 0, Channel 0
1992         7.0f, 8.0f,
1993
1994         // Batch 0, Channel 1
1995         9.0f, 10.0f,
1996
1997         // Batch 0, Channel 2
1998         11.0f, 12.0f,
1999
2000         // Batch 0, Channel 3
2001         25.0f, 26.0f,
2002
2003         // Batch 1, Channel 0
2004         27.0f, 28.0f,
2005
2006         // Batch 1, Channel 1
2007         29.0f, 30.0f,
2008
2009         // Batch 1, Channel 2
2010         13.0f, 14.0f,
2011
2012         // Batch 1, Channel 3
2013         15.0f, 16.0f,
2014     }));
2015
2016     armnn::TensorInfo input2TensorInfo({ 2, 1, 2 }, armnn::GetDataType<T>());
2017     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2018         // Batch 0, Channel 0
2019         17.0f, 18.0f,
2020
2021         // Batch 1, Channel 0
2022         31.0f, 32.0f,
2023     }));
2024
2025     armnn::TensorInfo outputTensorInfo({ 2, 8, 2 }, armnn::GetDataType<T>());
2026     LayerTestResult<T, 3> result(outputTensorInfo);
2027
2028     std::vector<T> output;
2029     output.resize(outputTensorInfo.GetNumElements());
2030     Concatenate(workloadFactory,
2031         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2032         { input0.data(), input1.data(), input2.data() },
2033         outputTensorInfo,
2034         output.data(),
2035         1);
2036
2037     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2038     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2039         // Batch 0, Channel 0
2040         1.0f, 2.0f,
2041
2042         // Batch 0, Channel 1
2043         3.0f, 4.0f,
2044
2045         // Batch 0, Channel 2
2046         5.0f, 6.0f,
2047
2048         // Batch 0, Channel 3
2049         7.0f, 8.0f,
2050
2051         // Batch 0, Channel 4
2052         9.0f, 10.0f,
2053
2054         // Batch 0, Channel 5
2055         11.0f, 12.0f,
2056
2057         // Batch 0, Channel 6
2058         25.0f, 26.0f,
2059
2060         // Batch 0, Channel 7
2061         17.0f, 18.0f,
2062
2063         // Batch 1, Channel 0
2064         19.0f, 20.0f,
2065
2066         // Batch 1, Channel 1
2067         21.0f, 22.0f,
2068
2069         // Batch 1, Channel 2
2070         23.0f, 24.0f,
2071
2072         // Batch 1, Channel 3
2073         27.0f, 28.0f,
2074
2075         // Batch 1, Channel 4
2076         29.0f, 30.0f,
2077
2078         // Batch 1, Channel 5
2079         13.0f, 14.0f,
2080
2081         // Batch 1, Channel 6
2082         15.0f, 16.0f,
2083
2084         // Batch 1, Channel 7
2085         31.0f, 32.0f,
2086     }));
2087
2088     return result;
2089 }
2090
2091 LayerTestResult<float, 3> Concatenation3dDim1DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2092 {
2093     return Concatenation3dDim1DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2094 }
2095
2096 template <typename T>
2097 LayerTestResult<T, 3> Concatenation3dDim2DiffInputDimsTestImpl(armnn::IWorkloadFactory& workloadFactory, float qScale,
2098     int32_t qOffset)
2099 {
2100     armnn::TensorInfo input0TensorInfo({ 2, 3, 2 }, armnn::GetDataType<T>());
2101     auto input0 = MakeTensor<T, 3>(input0TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2102         // Batch 0, Channel 0
2103         1.0f, 2.0f,
2104
2105         // Batch 0, Channel 1
2106         3.0f, 4.0f,
2107
2108         // Batch 0, Channel 2
2109         5.0f, 6.0f,
2110
2111         // Batch 1, Channel 0
2112         19.0f, 20.0f,
2113
2114         // Batch 1, Channel 1
2115         21.0f, 22.0f,
2116
2117         // Batch 1, Channel 2
2118         23.0f, 24.0f
2119     }));
2120
2121     armnn::TensorInfo input1TensorInfo({ 2, 3, 1 }, armnn::GetDataType<T>());
2122     auto input1 = MakeTensor<T, 3>(input1TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2123         // Batch 0, Channel 0
2124         7.0f,
2125
2126         // Batch 0, Channel 1
2127         9.0f,
2128
2129         // Batch 0, Channel 2
2130         11.0f,
2131
2132         // Batch 1, Channel 0
2133         25.0f,
2134
2135         // Batch 1, Channel 1
2136         27.0f,
2137
2138         // Batch 1, Channel 2
2139         29.0f
2140     }));
2141
2142     armnn::TensorInfo input2TensorInfo({ 2, 3, 3 }, armnn::GetDataType<T>());
2143     auto input2 = MakeTensor<T, 3>(input2TensorInfo, QuantizedVector<T>(qScale, qOffset, {
2144         // Batch 0, Channel 0
2145         13.0f, 14.0f, 50.0f,
2146
2147         // Batch 0, Channel 1
2148         15.0f, 16.0f, 51.0f,
2149
2150         // Batch 0, Channel 2
2151         17.0f, 18.0f, 52.0f,
2152
2153         // Batch 1, Channel 0
2154         31.0f, 32.0f, 53.0f,
2155
2156         // Batch 1, Channel 1
2157         33.0f, 34.0f, 54.0f,
2158
2159         // Batch 1, Channel 2
2160         35.0f, 36.0f, 55.0f,
2161     }));
2162
2163     armnn::TensorInfo outputTensorInfo({ 2, 3, 6 }, armnn::GetDataType<T>());
2164     LayerTestResult<T, 3> result(outputTensorInfo);
2165
2166     std::vector<T> output;
2167     output.resize(outputTensorInfo.GetNumElements());
2168     Concatenate(workloadFactory,
2169         { input0TensorInfo, input1TensorInfo, input2TensorInfo },
2170         { input0.data(), input1.data(), input2.data() },
2171         outputTensorInfo,
2172         output.data(),
2173         2);
2174
2175     result.output = MakeTensor<T, 3>(outputTensorInfo, output);
2176     result.outputExpected = MakeTensor<T, 3>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, {
2177         // Batch 0, Channel 0
2178         1.0f, 2.0f, 7.0f, 13.0f, 14.0f, 50.0f,
2179
2180         // Batch 0, Channel 1
2181         3.0f, 4.0f, 9.0f, 15.0f, 16.0f, 51.0f,
2182
2183         // Batch 0, Channel 2
2184         5.0f, 6.0f, 11.0f, 17.0f, 18.0f, 52.0f,
2185
2186         // Batch 1, Channel 0
2187         19.0f, 20.0f, 25.0f, 31.0f, 32.0f, 53.0f,
2188
2189         // Batch 1, Channel 1
2190         21.0f, 22.0f, 27.0f, 33.0f, 34.0f, 54.0f,
2191
2192         // Batch 1, Channel 2
2193         23.0f, 24.0f, 29.0f, 35.0f, 36.0f, 55.0f,
2194     }));
2195
2196     return result;
2197 }
2198
2199 LayerTestResult<float, 3> Concatenation3dDim2DiffInputDimsTest(armnn::IWorkloadFactory& workloadFactory)
2200 {
2201     return Concatenation3dDim2DiffInputDimsTestImpl<float>(workloadFactory, 0.0f, 0);
2202 }
2203
2204 LayerTestResult<float, 4> ResizeBilinearNopTest(armnn::IWorkloadFactory& workloadFactory)
2205 {
2206     constexpr unsigned int inputWidth = 4;
2207     constexpr unsigned int inputHeight = 4;
2208     constexpr unsigned int inputChannels = 1;
2209     constexpr unsigned int inputBatchSize = 1;
2210
2211     constexpr unsigned int outputWidth = inputWidth;
2212     constexpr unsigned int outputHeight = inputHeight;
2213     constexpr unsigned int outputChannels = inputChannels;
2214     constexpr unsigned int outputBatchSize = inputBatchSize;
2215
2216     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2217         armnn::DataType::Float32);
2218     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2219         armnn::DataType::Float32);
2220
2221     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2222         1.0f, 2.0f, 3.0f, 4.0f,
2223         2.0f, 3.0f, 4.0f, 5.0f,
2224         3.0f, 4.0f, 5.0f, 6.0f,
2225         4.0f, 5.0f, 6.0f, 7.0f
2226     }));
2227
2228     LayerTestResult<float, 4> result(outputTensorInfo);
2229     result.outputExpected = input;
2230
2231     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2232     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2233
2234     armnn::ResizeBilinearQueueDescriptor descriptor;
2235     armnn::WorkloadInfo info;
2236     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2237     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2238
2239     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2240
2241     inputHandle->Allocate();
2242     outputHandle->Allocate();
2243     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2244
2245     workload->Execute();
2246
2247     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2248     return result;
2249 }
2250
2251 LayerTestResult<float, 4> SimpleResizeBilinearTest(armnn::IWorkloadFactory& workloadFactory)
2252 {
2253     constexpr unsigned int inputWidth = 2;
2254     constexpr unsigned int inputHeight = 2;
2255     constexpr unsigned int inputChannels = 1;
2256     constexpr unsigned int inputBatchSize = 1;
2257
2258     constexpr unsigned int outputWidth = inputWidth / 2;
2259     constexpr unsigned int outputHeight = inputHeight / 2;
2260     constexpr unsigned int outputChannels = inputChannels;
2261     constexpr unsigned int outputBatchSize = inputBatchSize;
2262
2263     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2264         armnn::DataType::Float32);
2265     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2266         armnn::DataType::Float32);
2267
2268     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2269         1.0f, 255.0f,
2270         200.0f, 250.f,
2271     }));
2272
2273     // The 'resize bilinear' operation projects the top-left corner of output texels into the input image,
2274     // then figures out the interpolants and weights. Note this is different to projecting the centre of the
2275     // output texel - and thus we'll expect the output 1x1 matrix to contain as its single element the value
2276     // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting
2277     // the centre).
2278     LayerTestResult<float, 4> result(outputTensorInfo);
2279     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
2280         1.0f
2281     }));
2282
2283     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2284     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2285
2286     armnn::ResizeBilinearQueueDescriptor descriptor;
2287     armnn::WorkloadInfo info;
2288     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2289     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2290
2291     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2292
2293     inputHandle->Allocate();
2294     outputHandle->Allocate();
2295     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2296
2297     workload->Execute();
2298
2299     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2300     return result;
2301 }
2302
2303 LayerTestResult<float, 4> ResizeBilinearSqMinTest(armnn::IWorkloadFactory& workloadFactory)
2304 {
2305     constexpr unsigned int inputWidth = 4;
2306     constexpr unsigned int inputHeight = 4;
2307     constexpr unsigned int inputChannels = 1;
2308     constexpr unsigned int inputBatchSize = 1;
2309
2310     constexpr unsigned int outputWidth = inputWidth / 2;
2311     constexpr unsigned int outputHeight = inputHeight / 2;
2312     constexpr unsigned int outputChannels = inputChannels;
2313     constexpr unsigned int outputBatchSize = inputBatchSize;
2314
2315     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2316         armnn::DataType::Float32);
2317     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2318         armnn::DataType::Float32);
2319
2320     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2321         1.0f, 2.0f, 3.0f, 4.0f,
2322         2.0f, 3.0f, 4.0f, 5.0f,
2323         3.0f, 4.0f, 5.0f, 6.0f,
2324         4.0f, 5.0f, 6.0f, 7.0f
2325     }));
2326
2327     LayerTestResult<float, 4> result(outputTensorInfo);
2328     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
2329         1.f, 3.f,
2330         3.f, 5.f
2331     }));
2332
2333     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2334     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2335
2336     armnn::ResizeBilinearQueueDescriptor descriptor;
2337     armnn::WorkloadInfo info;
2338     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2339     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2340
2341     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2342
2343     inputHandle->Allocate();
2344     outputHandle->Allocate();
2345     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2346
2347     workload->Execute();
2348
2349     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2350     return result;
2351 }
2352
2353 LayerTestResult<float, 4> ResizeBilinearMinTest(armnn::IWorkloadFactory& workloadFactory)
2354 {
2355     constexpr unsigned int inputWidth = 5;
2356     constexpr unsigned int inputHeight = 3;
2357     constexpr unsigned int inputChannels = 1;
2358     constexpr unsigned int inputBatchSize = 1;
2359
2360     constexpr unsigned int outputWidth = 3;
2361     constexpr unsigned int outputHeight = 2;
2362     constexpr unsigned int outputChannels = inputChannels;
2363     constexpr unsigned int outputBatchSize = inputBatchSize;
2364
2365     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2366         armnn::DataType::Float32);
2367     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2368         armnn::DataType::Float32);
2369
2370     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2371           1.0f,   2.0f,   3.0f,   5.0f,   8.0f,
2372          13.0f,  21.0f,  34.0f,  55.0f,  89.0f,
2373         144.0f, 233.0f, 377.0f, 610.0f, 987.0f
2374     }));
2375
2376     LayerTestResult<float, 4> result(outputTensorInfo);
2377     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
2378         1.0f, 2.6666f, 6.0f,
2379         78.5f, 179.3333f, 401.f
2380     }));
2381
2382     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2383     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2384
2385     armnn::ResizeBilinearQueueDescriptor descriptor;
2386     armnn::WorkloadInfo info;
2387     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2388     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2389
2390     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2391
2392     inputHandle->Allocate();
2393     outputHandle->Allocate();
2394     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2395
2396     workload->Execute();
2397
2398     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2399     return result;
2400 }
2401
2402 LayerTestResult<float, 4> ResizeBilinearMagTest(armnn::IWorkloadFactory& workloadFactory)
2403 {
2404     constexpr unsigned int inputWidth = 2;
2405     constexpr unsigned int inputHeight = 3;
2406     constexpr unsigned int inputChannels = 1;
2407     constexpr unsigned int inputBatchSize = 1;
2408
2409     constexpr unsigned int outputWidth = 5;
2410     constexpr unsigned int outputHeight = 3;
2411     constexpr unsigned int outputChannels = inputChannels;
2412     constexpr unsigned int outputBatchSize = inputBatchSize;
2413
2414     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2415         armnn::DataType::Float32);
2416     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2417         armnn::DataType::Float32);
2418
2419     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2420           1.0f,   2.0f,
2421          13.0f,  21.0f,
2422         144.0f, 233.0f
2423     }));
2424
2425     LayerTestResult<float, 4> result(outputTensorInfo);
2426     result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, std::vector<float>({
2427          1.0f,   1.4f,   1.8f,   2.f,   2.f,
2428          13.f,  16.2f,  19.4f,  21.f,  21.f,
2429         144.f, 179.6f, 215.2f, 233.f, 233.f
2430     }));
2431
2432     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2433     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2434
2435     armnn::ResizeBilinearQueueDescriptor descriptor;
2436     armnn::WorkloadInfo info;
2437     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2438     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2439
2440     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
2441
2442     inputHandle->Allocate();
2443     outputHandle->Allocate();
2444     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2445
2446     workload->Execute();
2447
2448     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2449     return result;
2450 }
2451
2452 LayerTestResult<float, 2> FakeQuantizationTest(armnn::IWorkloadFactory& workloadFactory)
2453 {
2454     constexpr unsigned int width = 2;
2455     constexpr unsigned int height = 3;
2456
2457     const armnn::TensorInfo tensorInfo({height, width },
2458         armnn::DataType::Float32);
2459     auto input = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
2460        -10.0f,  -5.0f,
2461          0.0f,   5.0f,
2462         10.0f,  10.0f
2463     }));
2464
2465     LayerTestResult<float, 2> ret(tensorInfo);
2466
2467     std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
2468
2469     std::unique_ptr<armnn::ITensorHandle> outputHandle  = workloadFactory.CreateTensorHandle(tensorInfo);
2470
2471     armnn::FakeQuantizationQueueDescriptor data;
2472     armnn::WorkloadInfo info;
2473
2474     AddInputToWorkload(data, info, tensorInfo, inputHandle.get());
2475     AddOutputToWorkload(data, info, tensorInfo, outputHandle.get());
2476     float min = -10.f;
2477     float max = 10.f;
2478
2479     data.m_Parameters.m_Min = min;
2480     data.m_Parameters.m_Max = max;
2481
2482     armnn::PassthroughCpuTensorHandle refHandle(tensorInfo, &ret.outputExpected[0][0]);
2483     armnn::FakeQuantizationQueueDescriptor refData = data;
2484     armnn::WorkloadInfo refInfo = info;
2485     SetWorkloadOutput(refData, refInfo, 0, tensorInfo, &refHandle);
2486
2487     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateFakeQuantization(data, info);
2488
2489     inputHandle->Allocate();
2490     outputHandle->Allocate();
2491
2492     CopyDataToITensorHandle(inputHandle.get(), &input[0][0]);
2493
2494     workload->Execute();
2495
2496     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
2497
2498     ret.outputExpected = MakeTensor<float, 2>(tensorInfo, std::vector<float>({
2499         0.0f,     63.0f,
2500         128.0f,   191.0f,
2501         255.0f,   255.0f
2502     }));
2503     return ret;
2504 }
2505
2506 LayerTestResult<float, 4> L2Normalization1dTest(armnn::IWorkloadFactory& workloadFactory)
2507 {
2508     constexpr unsigned int inputWidth = 1;
2509     constexpr unsigned int inputHeight = 1;
2510     constexpr unsigned int inputChannels = 10;
2511     constexpr unsigned int inputBatchSize = 1;
2512
2513     constexpr unsigned int outputWidth = inputWidth;
2514     constexpr unsigned int outputHeight = inputHeight;
2515     constexpr unsigned int outputChannels = inputChannels;
2516     constexpr unsigned int outputBatchSize = inputBatchSize;
2517
2518     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2519         armnn::DataType::Float32);
2520     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2521         armnn::DataType::Float32);
2522
2523     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2524         1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f
2525     }));
2526
2527     const float approxInvL2Norm = 0.050964719f;
2528     LayerTestResult<float, 4> result(outputTensorInfo);
2529     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2530          1.0f * approxInvL2Norm,
2531          2.0f * approxInvL2Norm,
2532          3.0f * approxInvL2Norm,
2533          4.0f * approxInvL2Norm,
2534          5.0f * approxInvL2Norm,
2535          6.0f * approxInvL2Norm,
2536          7.0f * approxInvL2Norm,
2537          8.0f * approxInvL2Norm,
2538          9.0f * approxInvL2Norm,
2539         10.0f * approxInvL2Norm
2540     }));
2541
2542     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2543     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2544
2545     armnn::L2NormalizationQueueDescriptor descriptor;
2546     armnn::WorkloadInfo info;
2547     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2548     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2549
2550     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
2551
2552     inputHandle->Allocate();
2553     outputHandle->Allocate();
2554     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2555
2556     workload->Execute();
2557
2558     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2559     return result;
2560 }
2561
2562 namespace
2563 {
2564
2565 float CalcInvL2Norm(std::initializer_list<float> elements)
2566 {
2567     const float reduction = std::accumulate(elements.begin(), elements.end(), 0.0f,
2568         [](float acc, float element) { return acc + element * element; });
2569     return 1.0f / sqrtf(reduction);
2570 }
2571
2572 }
2573
2574 LayerTestResult<float, 4> L2Normalization2dTest(armnn::IWorkloadFactory& workloadFactory)
2575 {
2576     constexpr unsigned int inputWidth = 5;
2577     constexpr unsigned int inputHeight = 1;
2578     constexpr unsigned int inputChannels = 2;
2579     constexpr unsigned int inputBatchSize = 1;
2580
2581     constexpr unsigned int outputWidth = inputWidth;
2582     constexpr unsigned int outputHeight = inputHeight;
2583     constexpr unsigned int outputChannels = inputChannels;
2584     constexpr unsigned int outputBatchSize = inputBatchSize;
2585
2586     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2587         armnn::DataType::Float32);
2588     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2589         armnn::DataType::Float32);
2590
2591     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2592         1.0f, 3.0f, 5.0f, 7.0f,  9.0f,
2593         2.0f, 4.0f, 6.0f, 8.0f, 10.0f
2594     }));
2595
2596     LayerTestResult<float, 4> result(outputTensorInfo);
2597     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2598          1.0f * CalcInvL2Norm({ 1.0f, 2.0f }),
2599          3.0f * CalcInvL2Norm({ 3.0f, 4.0f }),
2600          5.0f * CalcInvL2Norm({ 5.0f, 6.0f }),
2601          7.0f * CalcInvL2Norm({ 7.0f, 8.0f }),
2602          9.0f * CalcInvL2Norm({ 9.0f, 10.0f }),
2603
2604          2.0f * CalcInvL2Norm({ 1.0f, 2.0f }),
2605          4.0f * CalcInvL2Norm({ 3.0f, 4.0f }),
2606          6.0f * CalcInvL2Norm({ 5.0f, 6.0f }),
2607          8.0f * CalcInvL2Norm({ 7.0f, 8.0f }),
2608         10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
2609     }));
2610
2611     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2612     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2613
2614     armnn::L2NormalizationQueueDescriptor descriptor;
2615     armnn::WorkloadInfo info;
2616     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2617     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2618
2619     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
2620
2621     inputHandle->Allocate();
2622     outputHandle->Allocate();
2623     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2624
2625     workload->Execute();
2626
2627     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2628     return result;
2629 }
2630
2631 LayerTestResult<float, 4> L2Normalization3dTest(armnn::IWorkloadFactory& workloadFactory)
2632 {
2633     constexpr unsigned int inputWidth = 3;
2634     constexpr unsigned int inputHeight = 4;
2635     constexpr unsigned int inputChannels = 2;
2636     constexpr unsigned int inputBatchSize = 1;
2637
2638     constexpr unsigned int outputWidth = inputWidth;
2639     constexpr unsigned int outputHeight = inputHeight;
2640     constexpr unsigned int outputChannels = inputChannels;
2641     constexpr unsigned int outputBatchSize = inputBatchSize;
2642
2643     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2644         armnn::DataType::Float32);
2645     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2646         armnn::DataType::Float32);
2647
2648     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2649         // Channel 0
2650         119.0f,  21.0f, 150.0f,
2651         149.0f,  32.0f, 179.0f,
2652          15.0f, 227.0f, 141.0f,
2653         147.0f, 199.0f, 220.0f,
2654
2655         // Channel 1
2656         110.0f, 140.0f,  73.0f,
2657         211.0f, 212.0f,  89.0f,
2658          24.0f, 138.0f, 188.0f,
2659         162.0f,  12.0f, 161.0f,
2660     }));
2661
2662     LayerTestResult<float, 4> result(outputTensorInfo);
2663     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2664         119.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
2665          21.0f * CalcInvL2Norm({  21.0f, 140.0f }),
2666         150.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
2667         149.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
2668          32.0f * CalcInvL2Norm({  32.0f, 212.0f }),
2669         179.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
2670          15.0f * CalcInvL2Norm({  15.0f,  24.0f }),
2671         227.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
2672         141.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
2673         147.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
2674         199.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
2675         220.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
2676
2677         110.0f * CalcInvL2Norm({ 119.0f, 110.0f }),
2678         140.0f * CalcInvL2Norm({  21.0f, 140.0f }),
2679          73.0f * CalcInvL2Norm({ 150.0f,  73.0f }),
2680         211.0f * CalcInvL2Norm({ 149.0f, 211.0f }),
2681         212.0f * CalcInvL2Norm({  32.0f, 212.0f }),
2682          89.0f * CalcInvL2Norm({ 179.0f,  89.0f }),
2683          24.0f * CalcInvL2Norm({  15.0f,  24.0f }),
2684         138.0f * CalcInvL2Norm({ 227.0f, 138.0f }),
2685         188.0f * CalcInvL2Norm({ 141.0f, 188.0f }),
2686         162.0f * CalcInvL2Norm({ 147.0f, 162.0f }),
2687          12.0f * CalcInvL2Norm({ 199.0f,  12.0f }),
2688         161.0f * CalcInvL2Norm({ 220.0f, 161.0f }),
2689     }));
2690
2691     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2692     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2693
2694     armnn::L2NormalizationQueueDescriptor descriptor;
2695     armnn::WorkloadInfo info;
2696     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2697     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2698
2699     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
2700
2701     inputHandle->Allocate();
2702     outputHandle->Allocate();
2703     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2704
2705     workload->Execute();
2706
2707     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2708     return result;
2709 }
2710
2711 LayerTestResult<float, 4> L2Normalization4dTest(armnn::IWorkloadFactory& workloadFactory)
2712 {
2713     constexpr unsigned int inputWidth = 3;
2714     constexpr unsigned int inputHeight = 4;
2715     constexpr unsigned int inputChannels = 3;
2716     constexpr unsigned int inputBatchSize = 2;
2717
2718     constexpr unsigned int outputWidth = inputWidth;
2719     constexpr unsigned int outputHeight = inputHeight;
2720     constexpr unsigned int outputChannels = inputChannels;
2721     constexpr unsigned int outputBatchSize = inputBatchSize;
2722
2723     const armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2724         armnn::DataType::Float32);
2725     const armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2726         armnn::DataType::Float32);
2727
2728     auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2729         // Batch 0, Channel 0
2730         235.0f,  46.0f, 178.0f,
2731         100.0f, 123.0f,  19.0f,
2732         172.0f,  74.0f, 250.0f,
2733           6.0f, 195.0f,  80.0f,
2734
2735         // Batch 0, Channel 1
2736         113.0f,  95.0f, 202.0f,
2737          77.0f, 114.0f,  71.0f,
2738         122.0f, 246.0f, 166.0f,
2739          82.0f,  28.0f,  37.0f,
2740
2741         // Batch 0, Channel 2
2742          56.0f, 170.0f, 162.0f,
2743         194.0f,  89.0f, 254.0f,
2744          12.0f, 209.0f, 200.0f,
2745           1.0f,  64.0f,  54.0f,
2746
2747         // Batch 1, Channel 0
2748          67.0f,  90.0f,  49.0f,
2749           7.0f, 163.0f,  18.0f,
2750          25.0f, 117.0f, 103.0f,
2751         247.0f,  59.0f, 189.0f,
2752
2753         // Batch 1, Channel 1
2754         239.0f, 104.0f, 199.0f,
2755          17.0f, 124.0f, 153.0f,
2756         222.0f, 217.0f, 75.0f,
2757          32.0f, 126.0f, 21.0f,
2758
2759         // Batch 1, Channel 2
2760          97.0f, 145.0f, 215.0f,
2761         115.0f, 116.0f, 238.0f,
2762         226.0f,  16.0f, 132.0f,
2763          92.0f, 125.0f,  88.0f,
2764     }));
2765
2766     LayerTestResult<float, 4> result(outputTensorInfo);
2767     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
2768
2769         // Batch 0, Channel 0
2770         235.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
2771          46.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
2772         178.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
2773         100.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
2774         123.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
2775          19.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
2776         172.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
2777          74.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
2778         250.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
2779           6.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
2780         195.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
2781          80.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
2782
2783         // Batch 0, Channel 1
2784         113.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
2785          95.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
2786         202.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
2787          77.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
2788         114.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
2789          71.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
2790         122.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
2791         246.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
2792         166.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
2793          82.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
2794          28.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
2795          37.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
2796
2797         // Batch 0, Channel 2
2798          56.0f * CalcInvL2Norm({ 235.0f, 113.0f,  56.0f }),
2799         170.0f * CalcInvL2Norm({  46.0f,  95.0f, 170.0f }),
2800         162.0f * CalcInvL2Norm({ 178.0f, 202.0F, 162.0f }),
2801         194.0f * CalcInvL2Norm({ 100.0f,  77.0f, 194.0f }),
2802          89.0f * CalcInvL2Norm({ 123.0f, 114.0f,  89.0f }),
2803         254.0f * CalcInvL2Norm({  19.0f,  71.0f, 254.0f }),
2804          12.0f * CalcInvL2Norm({ 172.0f, 122.0f,  12.0f }),
2805         209.0f * CalcInvL2Norm({  74.0f, 246.0f, 209.0f }),
2806         200.0f * CalcInvL2Norm({ 250.0f, 166.0f, 200.0f }),
2807           1.0f * CalcInvL2Norm({   6.0f,  82.0f,   1.0f }),
2808          64.0f * CalcInvL2Norm({ 195.0f,  28.0f,  64.0f }),
2809          54.0f * CalcInvL2Norm({  80.0f,  37.0f,  54.0f }),
2810
2811         // Batch 1, Channel 0
2812          67.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
2813          90.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
2814          49.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
2815           7.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
2816         163.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
2817          18.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
2818          25.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
2819         117.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
2820         103.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
2821         247.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
2822          59.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
2823         189.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
2824
2825         // Batch 1, Channel 1
2826         239.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
2827         104.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
2828         199.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
2829          17.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
2830         124.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
2831         153.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
2832         222.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
2833         217.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
2834          75.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
2835          32.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
2836         126.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
2837          21.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
2838
2839         // Batch 1, Channel 2
2840          97.0f * CalcInvL2Norm({  67.0f, 239.0f,  97.0f }),
2841         145.0f * CalcInvL2Norm({  90.0f, 104.0f, 145.0f }),
2842         215.0f * CalcInvL2Norm({  49.0f, 199.0f, 215.0f }),
2843         115.0f * CalcInvL2Norm({   7.0f,  17.0f, 115.0f }),
2844         116.0f * CalcInvL2Norm({ 163.0f, 124.0f, 116.0f }),
2845         238.0f * CalcInvL2Norm({  18.0f, 153.0f, 238.0f }),
2846         226.0f * CalcInvL2Norm({  25.0f, 222.0f, 226.0f }),
2847          16.0f * CalcInvL2Norm({ 117.0f, 217.0f,  16.0f }),
2848         132.0f * CalcInvL2Norm({ 103.0f,  75.0f, 132.0f }),
2849          92.0f * CalcInvL2Norm({ 247.0f,  32.0f,  92.0f }),
2850         125.0f * CalcInvL2Norm({  59.0f, 126.0f, 125.0f }),
2851          88.0f * CalcInvL2Norm({ 189.0f,  21.0f,  88.0f }),
2852     }));
2853
2854     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
2855     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2856
2857     armnn::L2NormalizationQueueDescriptor descriptor;
2858     armnn::WorkloadInfo info;
2859     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
2860     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2861
2862     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
2863
2864     inputHandle->Allocate();
2865     outputHandle->Allocate();
2866     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
2867
2868     workload->Execute();
2869
2870     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2871     return result;
2872 }
2873
2874 template <typename T>
2875 LayerTestResult<T, 4> ConstantTestImpl(armnn::IWorkloadFactory& workloadFactory,
2876     float qScale,
2877     int32_t qOffset)
2878 {
2879     constexpr unsigned int inputWidth = 3;
2880     constexpr unsigned int inputHeight = 4;
2881     constexpr unsigned int inputChannels = 3;
2882     constexpr unsigned int inputBatchSize = 2;
2883
2884     constexpr unsigned int outputWidth = inputWidth;
2885     constexpr unsigned int outputHeight = inputHeight;
2886     constexpr unsigned int outputChannels = inputChannels;
2887     constexpr unsigned int outputBatchSize = inputBatchSize;
2888
2889     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
2890         armnn::GetDataType<T>());
2891
2892     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
2893         armnn::GetDataType<T>());
2894
2895     // Set quantization parameters if the requested type is a quantized type.
2896     if(armnn::IsQuantizedType<T>())
2897     {
2898         inputTensorInfo.SetQuantizationScale(qScale);
2899         inputTensorInfo.SetQuantizationOffset(qOffset);
2900         outputTensorInfo.SetQuantizationScale(qScale);
2901         outputTensorInfo.SetQuantizationOffset(qOffset);
2902     }
2903
2904     auto input = MakeTensor<T, 4>(inputTensorInfo, std::vector<T>(
2905         QuantizedVector<T>(qScale, qOffset, {
2906         // Batch 0, Channel 0
2907         235.0f,  46.0f, 178.0f,
2908         100.0f, 123.0f,  19.0f,
2909         172.0f,  74.0f, 250.0f,
2910           6.0f, 195.0f,  80.0f,
2911
2912         // Batch 0, Channel 1
2913         113.0f,  95.0f, 202.0f,
2914          77.0f, 114.0f,  71.0f,
2915         122.0f, 246.0f, 166.0f,
2916          82.0f,  28.0f,  37.0f,
2917
2918         // Batch 0, Channel 2
2919          56.0f, 170.0f, 162.0f,
2920         194.0f,  89.0f, 254.0f,
2921          12.0f, 209.0f, 200.0f,
2922           1.0f,  64.0f,  54.0f,
2923
2924         // Batch 1, Channel 0
2925          67.0f,  90.0f,  49.0f,
2926           7.0f, 163.0f,  18.0f,
2927          25.0f, 117.0f, 103.0f,
2928         247.0f,  59.0f, 189.0f,
2929
2930         // Batch 1, Channel 1
2931         239.0f, 104.0f, 199.0f,
2932          17.0f, 124.0f, 153.0f,
2933         222.0f, 217.0f, 75.0f,
2934          32.0f, 126.0f, 21.0f,
2935
2936         // Batch 1, Channel 2
2937          97.0f, 145.0f, 215.0f,
2938         115.0f, 116.0f, 238.0f,
2939         226.0f,  16.0f, 132.0f,
2940          92.0f, 125.0f,  88.0f,
2941     })));
2942
2943     LayerTestResult<T, 4> result(outputTensorInfo);
2944     result.outputExpected = input;
2945
2946     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
2947
2948     armnn::ScopedCpuTensorHandle constantTensor(inputTensorInfo);
2949     AllocateAndCopyDataToITensorHandle(&constantTensor, &input[0][0][0][0]);
2950
2951     armnn::ConstantQueueDescriptor descriptor;
2952     descriptor.m_LayerOutput = &constantTensor;
2953
2954     armnn::WorkloadInfo info;
2955     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
2956
2957     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConstant(descriptor, info);
2958
2959     outputHandle->Allocate();
2960
2961     workload->Execute();
2962
2963     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
2964     return result;
2965 }
2966
2967 LayerTestResult<float, 4> ConstantTest(armnn::IWorkloadFactory& workloadFactory)
2968 {
2969     return ConstantTestImpl<float>(workloadFactory, 0.0f, 0);
2970 }
2971
2972 LayerTestResult<uint8_t, 4> ConstantTestUint8(armnn::IWorkloadFactory& workloadFactory)
2973 {
2974     return ConstantTestImpl<uint8_t>(workloadFactory, 1.0f, 0);
2975 }
2976
2977 LayerTestResult<uint8_t, 3> MergerUint8Test(armnn::IWorkloadFactory& workloadFactory)
2978 {
2979     unsigned int outputWidth = 5;
2980     unsigned int outputHeight = 6;
2981     unsigned int outputChannels = 3;
2982
2983     unsigned int inputWidth1 = 2;
2984     unsigned int inputHeight1 = 2;
2985     unsigned int inputChannels1 = 3;
2986
2987     unsigned int inputWidth2 = 2;
2988     unsigned int inputHeight2 = 4;
2989     unsigned int inputChannels2 = 3;
2990
2991     unsigned int inputWidth3 = 3;
2992     unsigned int inputHeight3 = 6;
2993     unsigned int inputChannels3 = 2;
2994
2995     unsigned int inputWidth4 = 3;
2996     unsigned int inputHeight4 = 6;
2997     unsigned int inputChannels4 = 1;
2998
2999     // Define the tensor descriptors
3000     armnn::TensorInfo outputTensorInfo({ outputChannels, outputHeight, outputWidth }, armnn::DataType::QuantisedAsymm8);
3001     armnn::TensorInfo inputTensorInfo1({ inputChannels1, inputHeight1, inputWidth1 }, armnn::DataType::QuantisedAsymm8);
3002     armnn::TensorInfo inputTensorInfo2({ inputChannels2, inputHeight2, inputWidth2 }, armnn::DataType::QuantisedAsymm8);
3003     armnn::TensorInfo inputTensorInfo3({ inputChannels3, inputHeight3, inputWidth3 }, armnn::DataType::QuantisedAsymm8);
3004     armnn::TensorInfo inputTensorInfo4({ inputChannels4, inputHeight4, inputWidth4 }, armnn::DataType::QuantisedAsymm8);
3005
3006     // Arbitrary scale and offsets. They don't really matter as the merger operator doesn't dequantize/quantize
3007     const float scale = 0.13497836f;
3008     const int32_t offset = -7;
3009
3010     outputTensorInfo.SetQuantizationScale(scale);
3011     outputTensorInfo.SetQuantizationOffset(offset);
3012     inputTensorInfo1.SetQuantizationScale(scale);
3013     inputTensorInfo1.SetQuantizationOffset(offset);
3014     inputTensorInfo2.SetQuantizationScale(scale);
3015     inputTensorInfo2.SetQuantizationOffset(offset);
3016     inputTensorInfo3.SetQuantizationScale(scale);
3017     inputTensorInfo3.SetQuantizationOffset(offset);
3018     inputTensorInfo4.SetQuantizationScale(scale);
3019     inputTensorInfo4.SetQuantizationOffset(offset);
3020
3021     LayerTestResult<uint8_t, 3> ret(outputTensorInfo);
3022
3023     ret.outputExpected = MakeTensor<uint8_t, 3>(outputTensorInfo, std::vector<uint8_t>(
3024     {
3025         1, 2, 3, 4, 5,
3026         6, 7, 8, 9, 10,
3027         11, 12, 13, 14, 15,
3028         16, 17, 18, 19, 20,
3029         21, 22, 23, 24, 25,
3030         26, 27, 28, 29, 30,
3031
3032         31, 32, 33, 34, 35,
3033         36, 37, 38, 39, 40,
3034         41, 42, 43, 44, 45,
3035         46, 47, 48, 49, 50,
3036         51, 52, 53, 54, 55,
3037         56, 57, 58, 59, 60,
3038
3039         61, 62, 63, 64, 65,
3040         66, 67, 68, 69, 70,
3041         71, 72, 73, 74, 75,
3042         76, 77, 78, 79, 80,
3043         81, 82, 83, 84, 85,
3044         86, 87, 88, 89, 90,
3045     })
3046     );
3047
3048
3049     auto input1 = MakeTensor<uint8_t, 3>(inputTensorInfo1, std::vector<uint8_t>(
3050     {
3051         1, 2,
3052         6, 7,
3053
3054         31, 32,
3055         36, 37,
3056
3057         61, 62,
3058         66, 67,
3059     })
3060     );
3061
3062     auto input2 = MakeTensor<uint8_t, 3>(inputTensorInfo2, std::vector<uint8_t>(
3063     {
3064         11, 12,
3065         16, 17,
3066         21, 22,
3067         26, 27,
3068
3069         41, 42,
3070         46, 47,
3071         51, 52,
3072         56, 57,
3073
3074         71, 72,
3075         76, 77,
3076         81, 82,
3077         86, 87,
3078     })
3079     );
3080
3081     auto input3 = MakeTensor<uint8_t, 3>(inputTensorInfo3, std::vector<uint8_t>(
3082     {
3083         3, 4, 5,
3084         8, 9, 10,
3085         13, 14, 15,
3086         18, 19, 20,
3087         23, 24, 25,
3088         28, 29, 30,
3089
3090         33, 34, 35,
3091         38, 39, 40,
3092         43, 44, 45,
3093         48, 49, 50,
3094         53, 54, 55,
3095         58, 59, 60,
3096     })
3097     );
3098
3099
3100     auto input4 = MakeTensor<uint8_t, 3>(inputTensorInfo4, std::vector<uint8_t>(
3101     {
3102         63, 64, 65,
3103         68, 69, 70,
3104         73, 74, 75,
3105         78, 79, 80,
3106         83, 84, 85,
3107         88, 89, 90,
3108     })
3109     );
3110
3111     std::vector<unsigned int> wOrigin1 = { 0, 0, 0 }; //extent of the window is defined by size of input[0]
3112     armnn::MergerQueueDescriptor::ViewOrigin window1(wOrigin1);
3113
3114     std::vector<unsigned int> wOrigin2 = { 0, 2, 0 }; //extent of the window is defined by size of input[1]
3115     armnn::MergerQueueDescriptor::ViewOrigin window2(wOrigin2);
3116
3117     std::vector<unsigned int> wOrigin3 = { 0, 0, 2 }; //extent of the window is defined by size of input[2]
3118     armnn::MergerQueueDescriptor::ViewOrigin window3(wOrigin3);
3119
3120     std::vector<unsigned int> wOrigin4 = { 2, 0, 2 }; //extent of the window is defined by size of input[3]
3121     armnn::MergerQueueDescriptor::ViewOrigin window4(wOrigin4);
3122
3123
3124     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3125
3126     bool subTensorsSupported = workloadFactory.SupportsSubTensors();
3127
3128     std::unique_ptr<armnn::ITensorHandle> inputHandle1 =
3129         subTensorsSupported ?
3130             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo1.GetShape(), wOrigin1.data()) :
3131             workloadFactory.CreateTensorHandle(inputTensorInfo1);
3132
3133     std::unique_ptr<armnn::ITensorHandle> inputHandle2 =
3134         subTensorsSupported ?
3135             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo2.GetShape(), wOrigin2.data()) :
3136             workloadFactory.CreateTensorHandle(inputTensorInfo2);
3137
3138     std::unique_ptr<armnn::ITensorHandle> inputHandle3 =
3139         subTensorsSupported ?
3140             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo3.GetShape(), wOrigin3.data()) :
3141             workloadFactory.CreateTensorHandle(inputTensorInfo3);
3142
3143     std::unique_ptr<armnn::ITensorHandle> inputHandle4 =
3144         subTensorsSupported ?
3145             workloadFactory.CreateSubTensorHandle(*outputHandle, inputTensorInfo4.GetShape(), wOrigin4.data()) :
3146             workloadFactory.CreateTensorHandle(inputTensorInfo4);
3147
3148
3149     armnn::MergerQueueDescriptor data;
3150     armnn::WorkloadInfo info;
3151     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3152     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
3153     AddInputToWorkload(data, info, inputTensorInfo3, inputHandle3.get());
3154     AddInputToWorkload(data, info, inputTensorInfo4, inputHandle4.get());
3155     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3156
3157     data.m_ViewOrigins.push_back(window1);
3158     data.m_ViewOrigins.push_back(window2);
3159     data.m_ViewOrigins.push_back(window3);
3160     data.m_ViewOrigins.push_back(window4);
3161
3162     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMerger(data, info);
3163
3164     inputHandle1->Allocate();
3165     inputHandle2->Allocate();
3166     inputHandle3->Allocate();
3167     inputHandle4->Allocate();
3168     outputHandle->Allocate();
3169
3170     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0]);
3171     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0]);
3172     CopyDataToITensorHandle(inputHandle3.get(), &input3[0][0][0]);
3173     CopyDataToITensorHandle(inputHandle4.get(), &input4[0][0][0]);
3174
3175     workload->Execute();
3176
3177     CopyDataFromITensorHandle(&ret.output[0][0][0], outputHandle.get());
3178
3179     return ret;
3180 }
3181
3182 LayerTestResult<uint8_t, 4> AdditionUint8Test(armnn::IWorkloadFactory& workloadFactory)
3183 {
3184     unsigned int batchSize = 1;
3185     unsigned int channels = 2;
3186     unsigned int height = 2;
3187     unsigned int width = 3;
3188
3189     const float scale = 7.0f;
3190     const int32_t offset = 3;
3191
3192     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
3193     armnn::TensorInfo outputTensorInfo;
3194
3195     const unsigned int shape[] = { batchSize, channels, height, width };
3196     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3197     inputTensorInfo1.SetQuantizationScale(scale);
3198     inputTensorInfo1.SetQuantizationOffset(offset);
3199
3200     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3201     inputTensorInfo2.SetQuantizationScale(scale);
3202     inputTensorInfo2.SetQuantizationOffset(offset);
3203
3204     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3205     outputTensorInfo.SetQuantizationScale(scale);
3206     outputTensorInfo.SetQuantizationOffset(offset);
3207
3208     // See dequantized values to the right
3209     auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
3210     {
3211          63,  35,  77,  70,  56, 112, //  420, 224,  518,  469,  371, 763
3212         203,  28, 252, 168, 245,  91  // 1400, 175, 1743, 1155, 1694, 616
3213     }));
3214
3215     // See dequantized values to the right
3216     auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
3217     {
3218          21,   7, 175, 231, 175, 210, // 126,   28, 1204, 1596, 1204, 1449
3219         126, 161,  63,  21, 105, 126  // 861, 1106,  420,  126,  714,  861
3220     }));
3221
3222     // See dequantized values to the right
3223     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3224     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>(
3225     {
3226          81,  39, 249, 255, 228, 255, //  546,  252, 1722, 2065(clamped), 1575, 2212(clamped)
3227         255, 186, 255, 186, 255, 214, // 2261(clamped), 1281, 2163(clamped), 1281, 2408(clamped), 1477
3228     }));
3229
3230     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3231     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
3232     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3233
3234     armnn::AdditionQueueDescriptor data;
3235     armnn::WorkloadInfo info;
3236     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3237     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
3238     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3239
3240     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateAddition(data, info);
3241
3242     inputHandle1->Allocate();
3243     inputHandle2->Allocate();
3244     outputHandle->Allocate();
3245
3246     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3247     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
3248
3249     workload->Execute();
3250
3251     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3252
3253     return result;
3254 }
3255
3256 LayerTestResult<uint8_t, 4> MultiplicationUint8Test(armnn::IWorkloadFactory& workloadFactory)
3257 {
3258     unsigned int batchSize = 1;
3259     unsigned int channels = 2;
3260     unsigned int height = 2;
3261     unsigned int width = 3;
3262
3263     armnn::TensorInfo inputTensorInfo1, inputTensorInfo2;
3264     armnn::TensorInfo outputTensorInfo;
3265
3266     const unsigned int shape[] = { batchSize, channels, height, width };
3267     inputTensorInfo1 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3268     inputTensorInfo1.SetQuantizationScale(4.0f);
3269     inputTensorInfo1.SetQuantizationOffset(1);
3270
3271     inputTensorInfo2 = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3272     inputTensorInfo2.SetQuantizationScale(3.0f);
3273     inputTensorInfo2.SetQuantizationOffset(-2);
3274
3275     outputTensorInfo = armnn::TensorInfo(4, shape, armnn::DataType::QuantisedAsymm8);
3276     outputTensorInfo.SetQuantizationScale(1366.255f); // Scale/offset chosen to have output values out of range
3277     outputTensorInfo.SetQuantizationOffset(-5);
3278
3279     // See dequantized values to the right
3280     auto input1 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
3281     {
3282          62,  37,   3, 172,  13, 111, // 244, 144,   8, 684,  48, 440,
3283         188,  20,  73,  31,  23,  31  // 748,  76, 288, 120,  88, 120
3284     }));
3285
3286     // See dequantized values to the right
3287     auto input2 = MakeTensor<uint8_t, 4>(inputTensorInfo1, std::vector<uint8_t>(
3288     {
3289         126, 240, 252, 183, 121, 247, // 384, 726, 762, 555, 369, 747,
3290          48, 115, 151,  79,  78,  97  // 150, 351, 459, 243, 240, 297
3291     }));
3292
3293     // See dequantized values to the right
3294     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3295     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>(
3296     {
3297          64,  72,   0, 255,   8, 236, //  93696, 104544, 6096(clamped), 379620(clamped), 17712, 328680,
3298          77,  15,  92,  16,  10,  21, // 112200,  26676,        132192,           29160, 21120,  35640
3299     }));
3300
3301     std::unique_ptr<armnn::ITensorHandle> inputHandle1 = workloadFactory.CreateTensorHandle(inputTensorInfo1);
3302     std::unique_ptr<armnn::ITensorHandle> inputHandle2 = workloadFactory.CreateTensorHandle(inputTensorInfo2);
3303     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3304
3305     armnn::MultiplicationQueueDescriptor data;
3306     armnn::WorkloadInfo info;
3307     AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
3308     AddInputToWorkload(data, info, inputTensorInfo2, inputHandle2.get());
3309     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
3310
3311     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateMultiplication(data, info);
3312
3313     inputHandle1->Allocate();
3314     inputHandle2->Allocate();
3315     outputHandle->Allocate();
3316
3317     CopyDataToITensorHandle(inputHandle1.get(), &input1[0][0][0][0]);
3318     CopyDataToITensorHandle(inputHandle2.get(), &input2[0][0][0][0]);
3319
3320     workload->Execute();
3321
3322     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3323
3324     return result;
3325 }
3326
3327 LayerTestResult<uint8_t, 4> ResizeBilinearNopUint8Test(armnn::IWorkloadFactory& workloadFactory)
3328 {
3329     constexpr unsigned int inputWidth = 4;
3330     constexpr unsigned int inputHeight = 4;
3331     constexpr unsigned int inputChannels = 1;
3332     constexpr unsigned int inputBatchSize = 1;
3333
3334     constexpr unsigned int outputWidth = inputWidth;
3335     constexpr unsigned int outputHeight = inputHeight;
3336     constexpr unsigned int outputChannels = inputChannels;
3337     constexpr unsigned int outputBatchSize = inputBatchSize;
3338
3339     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3340         armnn::DataType::QuantisedAsymm8);
3341     inputTensorInfo.SetQuantizationScale(1.5f);
3342     inputTensorInfo.SetQuantizationOffset(-3);
3343
3344     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3345         armnn::DataType::QuantisedAsymm8);
3346     outputTensorInfo.SetQuantizationScale(1.5f);
3347     outputTensorInfo.SetQuantizationOffset(-3);
3348
3349     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3350         1, 2, 3, 4,
3351         2, 3, 4, 5,
3352         3, 4, 5, 6,
3353         4, 5, 6, 7
3354     }));
3355
3356     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3357     result.outputExpected = input;
3358
3359     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3360     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3361
3362     armnn::ResizeBilinearQueueDescriptor descriptor;
3363     armnn::WorkloadInfo info;
3364     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3365     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3366
3367     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3368
3369     inputHandle->Allocate();
3370     outputHandle->Allocate();
3371     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3372
3373     workload->Execute();
3374
3375     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3376     return result;
3377 }
3378
3379 LayerTestResult<uint8_t, 4> SimpleResizeBilinearUint8Test(armnn::IWorkloadFactory& workloadFactory)
3380 {
3381     constexpr unsigned int inputWidth = 2;
3382     constexpr unsigned int inputHeight = 2;
3383     constexpr unsigned int inputChannels = 1;
3384     constexpr unsigned int inputBatchSize = 1;
3385
3386     constexpr unsigned int outputWidth = inputWidth / 2;
3387     constexpr unsigned int outputHeight = inputHeight / 2;
3388     constexpr unsigned int outputChannels = inputChannels;
3389     constexpr unsigned int outputBatchSize = inputBatchSize;
3390
3391     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3392         armnn::DataType::QuantisedAsymm8);
3393     inputTensorInfo.SetQuantizationScale(0.1567f);
3394     inputTensorInfo.SetQuantizationOffset(1);
3395
3396     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3397         armnn::DataType::QuantisedAsymm8);
3398     outputTensorInfo.SetQuantizationScale(0.1567f);
3399     outputTensorInfo.SetQuantizationOffset(1);
3400
3401     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3402         1, 255,
3403         200, 250
3404     }));
3405
3406     // The 'resize bilinear' operation projects the top-left corner of output texels into the input image,
3407     // then figures out the interpolants and weights. Note this is different to projecting the centre of the
3408     // output texel - and thus we'll expect the output 1x1 matrix to contain as its single element the value
3409     // that was at position (0,0) of the input matrix (rather than an average, which we would expect if projecting
3410     // the centre).
3411     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3412     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
3413         1
3414     }));
3415
3416     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3417     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3418
3419     armnn::ResizeBilinearQueueDescriptor descriptor;
3420     armnn::WorkloadInfo info;
3421     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3422     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3423
3424     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3425
3426     inputHandle->Allocate();
3427     outputHandle->Allocate();
3428     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3429
3430     workload->Execute();
3431
3432     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3433     return result;
3434 }
3435
3436 LayerTestResult<uint8_t, 4> ResizeBilinearSqMinUint8Test(armnn::IWorkloadFactory& workloadFactory)
3437 {
3438     constexpr unsigned int inputWidth = 4;
3439     constexpr unsigned int inputHeight = 4;
3440     constexpr unsigned int inputChannels = 1;
3441     constexpr unsigned int inputBatchSize = 1;
3442
3443     constexpr unsigned int outputWidth = inputWidth / 2;
3444     constexpr unsigned int outputHeight = inputHeight / 2;
3445     constexpr unsigned int outputChannels = inputChannels;
3446     constexpr unsigned int outputBatchSize = inputBatchSize;
3447
3448     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3449         armnn::DataType::QuantisedAsymm8);
3450     inputTensorInfo.SetQuantizationScale(3.141592f);
3451     inputTensorInfo.SetQuantizationOffset(3);
3452
3453     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3454         armnn::DataType::QuantisedAsymm8);
3455     outputTensorInfo.SetQuantizationScale(3.141592f);
3456     outputTensorInfo.SetQuantizationOffset(3);
3457
3458     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3459         1, 2, 3, 4,
3460         2, 3, 4, 5,
3461         3, 4, 5, 6,
3462         4, 5, 6, 7
3463     }));
3464
3465     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3466     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
3467         1, 3,
3468         3, 5
3469     }));
3470
3471     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3472     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3473
3474     armnn::ResizeBilinearQueueDescriptor descriptor;
3475     armnn::WorkloadInfo info;
3476     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3477     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3478
3479     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3480
3481     inputHandle->Allocate();
3482     outputHandle->Allocate();
3483     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3484
3485     workload->Execute();
3486
3487     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3488     return result;
3489 }
3490
3491 LayerTestResult<uint8_t, 4> ResizeBilinearMinUint8Test(armnn::IWorkloadFactory& workloadFactory)
3492 {
3493     constexpr unsigned int inputWidth = 3;
3494     constexpr unsigned int inputHeight = 2;
3495     constexpr unsigned int inputChannels = 1;
3496     constexpr unsigned int inputBatchSize = 1;
3497
3498     constexpr unsigned int outputWidth = 2;
3499     constexpr unsigned int outputHeight = 1;
3500     constexpr unsigned int outputChannels = inputChannels;
3501     constexpr unsigned int outputBatchSize = inputBatchSize;
3502
3503     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3504         armnn::DataType::QuantisedAsymm8);
3505     inputTensorInfo.SetQuantizationScale(1.5f);
3506     inputTensorInfo.SetQuantizationOffset(-1);
3507
3508     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3509         armnn::DataType::QuantisedAsymm8);
3510     outputTensorInfo.SetQuantizationScale(1.5f);
3511     outputTensorInfo.SetQuantizationOffset(-1);
3512
3513     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3514         1,  2,  3, // 3.0, 4.5, 6.0
3515         5,  8, 13  // 9.0, 13.5, 21.0
3516     }));
3517
3518     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3519     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
3520         1, 3 // 3.0, 5.25
3521     }));
3522
3523     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3524     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3525
3526     armnn::ResizeBilinearQueueDescriptor descriptor;
3527     armnn::WorkloadInfo info;
3528     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3529     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3530
3531     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3532
3533     inputHandle->Allocate();
3534     outputHandle->Allocate();
3535
3536     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3537
3538     workload->Execute();
3539
3540     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3541     return result;
3542 }
3543
3544 LayerTestResult<uint8_t, 4> ResizeBilinearMagUint8Test(armnn::IWorkloadFactory& workloadFactory)
3545 {
3546     constexpr unsigned int inputWidth = 2;
3547     constexpr unsigned int inputHeight = 3;
3548     constexpr unsigned int inputChannels = 1;
3549     constexpr unsigned int inputBatchSize = 1;
3550
3551     constexpr unsigned int outputWidth = 5;
3552     constexpr unsigned int outputHeight = 3;
3553     constexpr unsigned int outputChannels = inputChannels;
3554     constexpr unsigned int outputBatchSize = inputBatchSize;
3555
3556     armnn::TensorInfo inputTensorInfo({ inputBatchSize, inputChannels, inputHeight, inputWidth },
3557         armnn::DataType::QuantisedAsymm8);
3558     inputTensorInfo.SetQuantizationScale(0.010765f);
3559     inputTensorInfo.SetQuantizationOffset(7);
3560
3561     armnn::TensorInfo outputTensorInfo({ outputBatchSize, outputChannels, outputHeight, outputWidth },
3562         armnn::DataType::QuantisedAsymm8);
3563     outputTensorInfo.SetQuantizationScale(0.010132f);
3564     outputTensorInfo.SetQuantizationOffset(-18);
3565
3566     auto input = MakeTensor<uint8_t, 4>(inputTensorInfo, std::vector<uint8_t>({
3567          24, 228, // 0.183005, 2.379065,
3568         105, 128, // 1.05497, 1.302565
3569         230,  71  // 2.400595, 0.68896
3570     }));
3571
3572     LayerTestResult<uint8_t, 4> result(outputTensorInfo);
3573     result.outputExpected = MakeTensor<uint8_t, 4>(outputTensorInfo, std::vector<uint8_t>({
3574           0,  87, 173, 217, 217, // 0.18300501, 1.06142902, 1.93985295, 2.37906504, 2.37906504
3575          86,  96, 106, 111, 111, // 1.05497003, 1.15400803, 1.25304604, 1.30256498, 1.30256498
3576         219, 151,  84,  50,  50  // 2.40059495, 1.71594095, 1.03128707, 0.68896002, 0.68896002
3577     }));
3578
3579     std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
3580     std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
3581
3582     armnn::ResizeBilinearQueueDescriptor descriptor;
3583     armnn::WorkloadInfo info;
3584     AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
3585     AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
3586
3587     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateResizeBilinear(descriptor, info);
3588
3589     inputHandle->Allocate();
3590     outputHandle->Allocate();
3591     CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
3592
3593     workload->Execute();
3594
3595     CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
3596     return result;
3597 }
3598
3599 LayerTestResult<float, 4> BatchNormTest(armnn::IWorkloadFactory& workloadFactory)
3600 {
3601     auto ret = BatchNormTestImpl<float>(workloadFactory, 0.f, 0);
3602     return ret;
3603 }
3604
3605 LayerTestResult<uint8_t, 4> BatchNormUint8Test(armnn::IWorkloadFactory& workloadFactory)
3606 {
3607     auto ret = BatchNormTestImpl<uint8_t>(workloadFactory, 1.f/20.f, 50);
3608     return ret;
3609 }
3610
3611 LayerTestResult<uint8_t, 4> ConstantUint8Test(armnn::IWorkloadFactory& workloadFactory)
3612 {
3613     return ConstantTestImpl<uint8_t>(workloadFactory, 2e-6f, 1);
3614 }
3615
3616 LayerTestResult<uint8_t, 1> Concatenation1dUint8Test(armnn::IWorkloadFactory& workloadFactory)
3617 {
3618     return Concatenation1dTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3619 }
3620
3621 LayerTestResult<uint8_t, 2> Concatenation2dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3622 {
3623     return Concatenation2dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3624 }
3625
3626 LayerTestResult<uint8_t, 2> Concatenation2dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3627 {
3628     return Concatenation2dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3629 }
3630
3631 LayerTestResult<uint8_t, 2> Concatenation2dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
3632 {
3633     return Concatenation2dDim0DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3634 }
3635
3636 LayerTestResult<uint8_t, 2> Concatenation2dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
3637 {
3638     return Concatenation2dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3639 }
3640
3641 LayerTestResult<uint8_t, 3> Concatenation3dDim0Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3642 {
3643     return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3644 }
3645
3646 LayerTestResult<uint8_t, 3> Concatenation3dDim1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3647 {
3648     return Concatenation3dDim1TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3649 }
3650
3651 LayerTestResult<uint8_t, 3> Concatenation3dDim2Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3652 {
3653     return Concatenation3dDim2TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3654 }
3655
3656 LayerTestResult<uint8_t, 3> Concatenation3dDim0DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
3657 {
3658     return Concatenation3dDim0TestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3659 }
3660
3661 LayerTestResult<uint8_t, 3> Concatenation3dDim1DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
3662 {
3663     return Concatenation3dDim1DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3664 }
3665
3666 LayerTestResult<uint8_t, 3> Concatenation3dDim2DiffInputDimsUint8Test(armnn::IWorkloadFactory& workloadFactory)
3667 {
3668     return Concatenation3dDim2DiffInputDimsTestImpl<uint8_t>(workloadFactory, 0.5f, -1);
3669 }
3670
3671 LayerTestResult<float, 4> SimpleMaxPooling2dSize2x2Stride2x2Test(armnn::IWorkloadFactory& workloadFactory,
3672                                                                  bool forceNoPadding)
3673 {
3674     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<float>(workloadFactory, forceNoPadding);
3675 }
3676
3677 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize2x2Stride2x2Uint8Test(armnn::IWorkloadFactory& workloadFactory,
3678                                                                         bool forceNoPadding)
3679 {
3680     return SimpleMaxPooling2dSize2x2Stride2x2TestCommon<uint8_t>(workloadFactory, forceNoPadding, 3.0f, -5);
3681 }
3682
3683 LayerTestResult<float, 4> SimpleMaxPooling2dSize3x3Stride2x4Test(armnn::IWorkloadFactory& workloadFactory,
3684                                                                  bool forceNoPadding)
3685 {
3686     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<float>(workloadFactory, forceNoPadding);
3687 }
3688
3689 LayerTestResult<uint8_t, 4> SimpleMaxPooling2dSize3x3Stride2x4Uint8Test(armnn::IWorkloadFactory& workloadFactory,
3690                                                                         bool forceNoPadding)
3691 {
3692     return SimpleMaxPooling2dSize3x3Stride2x4TestCommon<uint8_t>(workloadFactory, forceNoPadding, 0.1f, 128);
3693 }
3694
3695 LayerTestResult<float, 4> SimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
3696 {
3697     return SimpleAveragePooling2dTestCommon<float>(workloadFactory);
3698 }
3699
3700 LayerTestResult<uint8_t, 4> SimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
3701 {
3702     return SimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1);
3703 }
3704
3705 LayerTestResult<float, 4> LargeTensorsAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
3706 {
3707     return LargeTensorsAveragePooling2dTestCommon<float>(workloadFactory);
3708 }
3709
3710 LayerTestResult<uint8_t, 4> LargeTensorsAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
3711 {
3712     return LargeTensorsAveragePooling2dTestCommon<uint8_t>(workloadFactory, 0.5, -1);
3713 }
3714
3715 LayerTestResult<float, 4> SimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory)
3716 {
3717     return SimpleL2Pooling2dTestCommon<float>(workloadFactory);
3718 }
3719
3720 LayerTestResult<uint8_t, 4> SimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
3721 {
3722     return SimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory);
3723 }
3724
3725 LayerTestResult<float, 4> L2Pooling2dSize3Stride1Test(armnn::IWorkloadFactory& workloadFactory)
3726 {
3727     return L2Pooling2dSize3Stride1TestCommon<float>(workloadFactory);
3728 }
3729
3730 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride1Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3731 {
3732     return L2Pooling2dSize3Stride1TestCommon<uint8_t>(workloadFactory);
3733 }
3734
3735 LayerTestResult<float, 4> L2Pooling2dSize3Stride3Test(armnn::IWorkloadFactory& workloadFactory)
3736 {
3737     return L2Pooling2dSize3Stride3TestCommon<float>(workloadFactory);
3738 }
3739
3740 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3741 {
3742     return L2Pooling2dSize3Stride3TestCommon<uint8_t>(workloadFactory);
3743 }
3744
3745 LayerTestResult<float, 4> L2Pooling2dSize3Stride4Test(armnn::IWorkloadFactory& workloadFactory)
3746 {
3747     return L2Pooling2dSize3Stride4TestCommon<float>(workloadFactory);
3748 }
3749
3750 LayerTestResult<uint8_t, 4> L2Pooling2dSize3Stride4Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3751 {
3752     return L2Pooling2dSize3Stride4TestCommon<uint8_t>(workloadFactory);
3753 }
3754
3755 LayerTestResult<float, 4> L2Pooling2dSize7Test(armnn::IWorkloadFactory& workloadFactory)
3756 {
3757     return L2Pooling2dSize7TestCommon<float>(workloadFactory);
3758 }
3759
3760 LayerTestResult<uint8_t, 4> L2Pooling2dSize7Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3761 {
3762     return L2Pooling2dSize7TestCommon<uint8_t>(workloadFactory);
3763 }
3764
3765 LayerTestResult<float, 4> L2Pooling2dSize9Test(armnn::IWorkloadFactory& workloadFactory)
3766 {
3767     return L2Pooling2dSize9TestCommon<float>(workloadFactory);
3768 }
3769
3770 LayerTestResult<uint8_t, 4> L2Pooling2dSize9Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3771 {
3772     return L2Pooling2dSize9TestCommon<uint8_t>(workloadFactory);
3773 }
3774
3775 LayerTestResult<float, 4> AsymmetricNonSquarePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
3776 {
3777     return AsymmetricNonSquarePooling2dTestCommon<float>(workloadFactory);
3778 }
3779
3780 LayerTestResult<uint8_t, 4> AsymmetricNonSquarePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
3781 {
3782     return AsymmetricNonSquarePooling2dTestCommon<uint8_t>(workloadFactory);
3783 }
3784
3785 LayerTestResult<float, 4> ComparePooling2dTest(armnn::IWorkloadFactory& workloadFactory,
3786                                                armnn::IWorkloadFactory& refWorkloadFactory,
3787                                                armnn::PoolingAlgorithm  poolingType)
3788 {
3789     return ComparePooling2dTestCommon<float>(workloadFactory, refWorkloadFactory, poolingType);
3790 }
3791
3792 LayerTestResult<uint8_t, 4> ComparePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory,
3793                                                       armnn::IWorkloadFactory& refWorkloadFactory,
3794                                                       armnn::PoolingAlgorithm  poolingType)
3795 {
3796     return ComparePooling2dTestCommon<uint8_t>(workloadFactory, refWorkloadFactory, poolingType, 0.1f, 128);
3797 }
3798
3799 LayerTestResult<float, 2> FullyConnectedLargeTest(armnn::IWorkloadFactory& workloadFactory,
3800                                                   bool transposeWeights)
3801 {
3802     return FullyConnectedLargeTestCommon<float>(workloadFactory, transposeWeights);
3803 }
3804
3805 LayerTestResult<float, 4> IgnorePaddingSimpleMaxPooling2dTest(armnn::IWorkloadFactory& workloadFactory)
3806 {
3807     return IgnorePaddingSimpleMaxPooling2dTestCommon<float>(workloadFactory);
3808 }
3809
3810 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleMaxPooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
3811 {
3812     return IgnorePaddingSimpleMaxPooling2dTestCommon<uint8_t>(workloadFactory, 1.0f, -5);
3813 }
3814
3815 LayerTestResult<float, 4> IgnorePaddingMaxPooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
3816 {
3817     return IgnorePaddingMaxPooling2dSize3TestCommon<float>(workloadFactory);
3818 }
3819
3820 LayerTestResult<uint8_t, 4> IgnorePaddingMaxPooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3821 {
3822     return IgnorePaddingMaxPooling2dSize3TestCommon<uint8_t>(workloadFactory, 1.0f, -5);
3823 }
3824
3825 LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dTest(armnn::IWorkloadFactory& workloadFactory)
3826 {
3827     return IgnorePaddingSimpleAveragePooling2dTestCommon<float>(workloadFactory);
3828 }
3829
3830 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
3831 {
3832     return IgnorePaddingSimpleAveragePooling2dTestCommon<uint8_t>(workloadFactory);
3833 }
3834
3835 LayerTestResult<float, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingTest(armnn::IWorkloadFactory& workloadFactory)
3836 {
3837     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<float>(workloadFactory);
3838 }
3839
3840 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleAveragePooling2dNoPaddingUint8Test(
3841     armnn::IWorkloadFactory& workloadFactory)
3842 {
3843     return IgnorePaddingSimpleAveragePooling2dNoPaddingTestCommon<uint8_t>(workloadFactory);
3844 }
3845
3846 LayerTestResult<float, 4> IgnorePaddingAveragePooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
3847 {
3848     return IgnorePaddingAveragePooling2dSize3TestCommon<float>(workloadFactory);
3849 }
3850
3851 LayerTestResult<uint8_t, 4> IgnorePaddingAveragePooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3852 {
3853     return IgnorePaddingAveragePooling2dSize3TestCommon<uint8_t>(workloadFactory);
3854 }
3855
3856 LayerTestResult<float, 4> IgnorePaddingSimpleL2Pooling2dTest(armnn::IWorkloadFactory& workloadFactory)
3857 {
3858     return IgnorePaddingSimpleL2Pooling2dTestCommon<float>(workloadFactory);
3859 }
3860
3861 LayerTestResult<uint8_t, 4> IgnorePaddingSimpleL2Pooling2dUint8Test(armnn::IWorkloadFactory& workloadFactory)
3862 {
3863     return IgnorePaddingSimpleL2Pooling2dTestCommon<uint8_t>(workloadFactory);
3864 }
3865
3866 LayerTestResult<float, 4> IgnorePaddingL2Pooling2dSize3Test(armnn::IWorkloadFactory& workloadFactory)
3867 {
3868     return IgnorePaddingL2Pooling2dSize3TestCommon<float>(workloadFactory);
3869 }
3870
3871 LayerTestResult<uint8_t, 4> IgnorePaddingL2Pooling2dSize3Uint8Test(armnn::IWorkloadFactory& workloadFactory)
3872 {
3873     return IgnorePaddingL2Pooling2dSize3TestCommon<uint8_t>(workloadFactory);
3874 }
3875
3876 LayerTestResult<float, 4> SimplePermuteFloat32Test(armnn::IWorkloadFactory& workloadFactory)
3877 {
3878     return SimplePermuteFloat32TestCommon(workloadFactory);
3879 };
3880
3881 LayerTestResult<uint8_t, 4> SimplePermuteUint8Test(armnn::IWorkloadFactory& workloadFactory)
3882 {
3883     return SimplePermuteUint8TestCommon(workloadFactory);
3884 };